1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
55 vect_free_slp_tree (child
);
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt
))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
66 SLP_TREE_CHILDREN (node
).release ();
67 SLP_TREE_SCALAR_STMTS (node
).release ();
68 SLP_TREE_VEC_STMTS (node
).release ();
69 SLP_TREE_LOAD_PERMUTATION (node
).release ();
75 /* Free the memory allocated for the SLP instance. */
78 vect_free_slp_instance (slp_instance instance
)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
81 SLP_INSTANCE_LOADS (instance
).release ();
86 /* Create an SLP node for SCALAR_STMTS. */
89 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
92 gimple
*stmt
= scalar_stmts
[0];
95 if (is_gimple_call (stmt
))
96 nops
= gimple_call_num_args (stmt
);
97 else if (is_gimple_assign (stmt
))
99 nops
= gimple_num_ops (stmt
) - 1;
100 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
103 else if (gimple_code (stmt
) == GIMPLE_PHI
)
108 node
= XNEW (struct _slp_tree
);
109 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
110 SLP_TREE_VEC_STMTS (node
).create (0);
111 SLP_TREE_CHILDREN (node
).create (nops
);
112 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
113 SLP_TREE_TWO_OPERATORS (node
) = false;
114 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
117 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec
<gimple
*> def_stmts
;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
135 enum vect_def_type first_dt
;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 static vec
<slp_oprnd_info
>
144 vect_create_oprnd_info (int nops
, int group_size
)
147 slp_oprnd_info oprnd_info
;
148 vec
<slp_oprnd_info
> oprnds_info
;
150 oprnds_info
.create (nops
);
151 for (i
= 0; i
< nops
; i
++)
153 oprnd_info
= XNEW (struct _slp_oprnd_info
);
154 oprnd_info
->def_stmts
.create (group_size
);
155 oprnd_info
->first_dt
= vect_uninitialized_def
;
156 oprnd_info
->first_op_type
= NULL_TREE
;
157 oprnd_info
->first_pattern
= false;
158 oprnd_info
->second_pattern
= false;
159 oprnds_info
.quick_push (oprnd_info
);
166 /* Free operands info. */
169 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
172 slp_oprnd_info oprnd_info
;
174 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
176 oprnd_info
->def_stmts
.release ();
177 XDELETE (oprnd_info
);
180 oprnds_info
.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
190 gimple
*next_stmt
= first_stmt
;
193 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
198 if (next_stmt
== stmt
)
200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
202 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
223 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
224 gimple
*stmt
, unsigned stmt_num
,
225 vec
<slp_oprnd_info
> *oprnds_info
)
228 unsigned int i
, number_of_oprnds
;
230 enum vect_def_type dt
= vect_uninitialized_def
;
231 bool pattern
= false;
232 slp_oprnd_info oprnd_info
;
233 int first_op_idx
= 1;
234 bool commutative
= false;
235 bool first_op_cond
= false;
236 bool first
= stmt_num
== 0;
237 bool second
= stmt_num
== 1;
239 if (is_gimple_call (stmt
))
241 number_of_oprnds
= gimple_call_num_args (stmt
);
244 else if (is_gimple_assign (stmt
))
246 enum tree_code code
= gimple_assign_rhs_code (stmt
);
247 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
253 first_op_cond
= true;
257 commutative
= commutative_tree_code (code
);
262 bool swapped
= (*swap
!= 0);
263 gcc_assert (!swapped
|| first_op_cond
);
264 for (i
= 0; i
< number_of_oprnds
; i
++)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map
= maps
[*swap
];
274 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
276 oprnd
= gimple_op (stmt
, map
[i
]);
279 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
281 oprnd_info
= (*oprnds_info
)[i
];
283 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
290 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
299 if (def_stmt
&& gimple_bb (def_stmt
)
300 && vect_stmt_in_region_p (vinfo
, def_stmt
)
301 && vinfo_for_stmt (def_stmt
)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
307 if (!first
&& !oprnd_info
->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info
->first_dt
!= vect_reduction_def
311 || (!second
&& !oprnd_info
->second_pattern
)))
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
327 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
333 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
334 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
336 if (dt
== vect_unknown_def_type
)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
340 "Unsupported pattern.\n");
344 switch (gimple_code (def_stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
353 "unsupported defining stmt:\n");
359 oprnd_info
->second_pattern
= pattern
;
363 oprnd_info
->first_dt
= dt
;
364 oprnd_info
->first_pattern
= pattern
;
365 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info
->first_dt
!= dt
375 && !(oprnd_info
->first_dt
== vect_reduction_def
376 && dt
== vect_internal_def
)
377 && !((oprnd_info
->first_dt
== vect_external_def
378 || oprnd_info
->first_dt
== vect_constant_def
)
379 && (dt
== vect_external_def
380 || dt
== vect_constant_def
)))
381 || !types_compatible_p (oprnd_info
->first_op_type
,
384 /* Try swapping operands if we got a mismatch. */
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
395 "Build SLP failed: different types\n");
401 /* Check the types of the definitions. */
404 case vect_constant_def
:
405 case vect_external_def
:
408 case vect_reduction_def
:
409 case vect_induction_def
:
410 case vect_internal_def
:
411 oprnd_info
->def_stmts
.quick_push (def_stmt
);
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: cannot swap operands of "
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
447 tree cond
= gimple_assign_rhs1 (stmt
);
448 enum tree_code code
= TREE_CODE (cond
);
453 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
454 &TREE_OPERAND (cond
, 1));
455 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
460 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
461 gimple_assign_rhs3_ptr (stmt
));
462 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
463 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
464 gcc_assert (code
!= ERROR_MARK
);
465 TREE_SET_CODE (cond
, code
);
469 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
470 gimple_assign_rhs2_ptr (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
488 as for vect_build_slp_tree. */
491 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
492 tree vectype
, unsigned int *max_nunits
)
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
499 "Build SLP failed: unsupported data-type in ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
503 /* Fatal mismatch. */
507 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a
<bb_vec_info
> (vinfo
)
510 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "Build SLP failed: unrolling required "
514 "in basic block SLP\n");
515 /* Fatal mismatch. */
519 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
521 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
526 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES
529 which stmts are not isomorphic to the first one. If MATCHES[0]
530 is false then this indicates the comparison could not be
531 carried out or the stmts will never be vectorized by SLP.
533 Note COND_EXPR is possibly ismorphic to another one after swapping its
534 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
535 the first stmt by swapping the two operands of comparison; set SWAP[i]
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
541 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
542 vec
<gimple
*> stmts
, unsigned int group_size
,
543 unsigned nops
, unsigned int *max_nunits
,
544 bool *matches
, bool *two_operators
)
547 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
548 enum tree_code first_stmt_code
= ERROR_MARK
;
549 enum tree_code alt_stmt_code
= ERROR_MARK
;
550 enum tree_code rhs_code
= ERROR_MARK
;
551 enum tree_code first_cond_code
= ERROR_MARK
;
553 bool need_same_oprnds
= false;
554 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
557 machine_mode optab_op2_mode
;
558 machine_mode vec_mode
;
560 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
562 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
568 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
574 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
577 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
580 "Build SLP failed: unvectorizable statement ");
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
583 /* Fatal mismatch. */
588 lhs
= gimple_get_lhs (stmt
);
589 if (lhs
== NULL_TREE
)
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
594 "Build SLP failed: not GIMPLE_ASSIGN nor "
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
598 /* Fatal mismatch. */
603 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
604 vectype
= get_vectype_for_scalar_type (scalar_type
);
605 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
608 /* Fatal mismatch. */
613 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
615 rhs_code
= CALL_EXPR
;
616 if (gimple_call_internal_p (call_stmt
)
617 || gimple_call_tail_p (call_stmt
)
618 || gimple_call_noreturn_p (call_stmt
)
619 || !gimple_call_nothrow_p (call_stmt
)
620 || gimple_call_chain (call_stmt
))
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
625 "Build SLP failed: unsupported call type ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
629 /* Fatal mismatch. */
635 rhs_code
= gimple_assign_rhs_code (stmt
);
637 /* Check the operation. */
640 first_stmt_code
= rhs_code
;
642 /* Shift arguments should be equal in all the packed stmts for a
643 vector shift with scalar shift operand. */
644 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
645 || rhs_code
== LROTATE_EXPR
646 || rhs_code
== RROTATE_EXPR
)
648 vec_mode
= TYPE_MODE (vectype
);
650 /* First see if we have a vector/vector shift. */
651 optab
= optab_for_tree_code (rhs_code
, vectype
,
655 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
657 /* No vector/vector shift, try for a vector/scalar shift. */
658 optab
= optab_for_tree_code (rhs_code
, vectype
,
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
665 "Build SLP failed: no optab.\n");
666 /* Fatal mismatch. */
670 icode
= (int) optab_handler (optab
, vec_mode
);
671 if (icode
== CODE_FOR_nothing
)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
676 "op not supported by target.\n");
677 /* Fatal mismatch. */
681 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
682 if (!VECTOR_MODE_P (optab_op2_mode
))
684 need_same_oprnds
= true;
685 first_op1
= gimple_assign_rhs2 (stmt
);
689 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
691 need_same_oprnds
= true;
692 first_op1
= gimple_assign_rhs2 (stmt
);
697 if (first_stmt_code
!= rhs_code
698 && alt_stmt_code
== ERROR_MARK
)
699 alt_stmt_code
= rhs_code
;
700 if (first_stmt_code
!= rhs_code
701 && (first_stmt_code
!= IMAGPART_EXPR
702 || rhs_code
!= REALPART_EXPR
)
703 && (first_stmt_code
!= REALPART_EXPR
704 || rhs_code
!= IMAGPART_EXPR
)
705 /* Handle mismatches in plus/minus by computing both
706 and merging the results. */
707 && !((first_stmt_code
== PLUS_EXPR
708 || first_stmt_code
== MINUS_EXPR
)
709 && (alt_stmt_code
== PLUS_EXPR
710 || alt_stmt_code
== MINUS_EXPR
)
711 && rhs_code
== alt_stmt_code
)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
713 && (first_stmt_code
== ARRAY_REF
714 || first_stmt_code
== BIT_FIELD_REF
715 || first_stmt_code
== INDIRECT_REF
716 || first_stmt_code
== COMPONENT_REF
717 || first_stmt_code
== MEM_REF
)))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "Build SLP failed: different operation "
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
735 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
740 "Build SLP failed: different shift "
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
748 if (rhs_code
== CALL_EXPR
)
750 gimple
*first_stmt
= stmts
[0];
751 if (gimple_call_num_args (stmt
) != nops
752 || !operand_equal_p (gimple_call_fn (first_stmt
),
753 gimple_call_fn (stmt
), 0)
754 || gimple_call_fntype (first_stmt
)
755 != gimple_call_fntype (stmt
))
757 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
760 "Build SLP failed: different calls in ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
770 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
773 if (REFERENCE_CLASS_P (lhs
))
781 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load
!= first_load
)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
802 prev_first_load
= first_load
;
804 } /* Grouped access. */
807 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
809 /* Not grouped load. */
810 if (dump_enabled_p ())
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
817 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */
823 /* Not memory operation. */
824 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
825 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
826 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
828 && rhs_code
!= CALL_EXPR
)
830 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
833 "Build SLP failed: operation");
834 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
837 /* Fatal mismatch. */
842 if (rhs_code
== COND_EXPR
)
844 tree cond_expr
= gimple_assign_rhs1 (stmt
);
845 enum tree_code cond_code
= TREE_CODE (cond_expr
);
846 enum tree_code swap_code
= ERROR_MARK
;
847 enum tree_code invert_code
= ERROR_MARK
;
850 first_cond_code
= TREE_CODE (cond_expr
);
851 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
853 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
854 swap_code
= swap_tree_comparison (cond_code
);
855 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
858 if (first_cond_code
== cond_code
)
860 /* Isomorphic can be achieved by swapping. */
861 else if (first_cond_code
== swap_code
)
863 /* Isomorphic can be achieved by inverting. */
864 else if (first_cond_code
== invert_code
)
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
871 "Build SLP failed: different"
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
885 for (i
= 0; i
< group_size
; ++i
)
889 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */
891 if (alt_stmt_code
!= ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
894 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
895 auto_vec_perm_indices
sel (count
);
896 for (i
= 0; i
< count
; ++i
)
898 unsigned int elt
= i
;
899 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
901 sel
.quick_push (elt
);
903 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
905 for (i
= 0; i
< group_size
; ++i
)
906 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
909 if (dump_enabled_p ())
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
912 "Build SLP failed: different operation "
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
924 *two_operators
= true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */
935 typedef vec
<gimple
*> value_type
;
936 typedef vec
<gimple
*> compare_type
;
937 static inline hashval_t
hash (value_type
);
938 static inline bool equal (value_type existing
, value_type candidate
);
939 static inline bool is_empty (value_type x
) { return !x
.exists (); }
940 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
941 static inline void mark_empty (value_type
&x
) { x
.release (); }
942 static inline void mark_deleted (value_type
&x
) { x
.release (); }
943 static inline void remove (value_type
&x
) { x
.release (); }
946 bst_traits::hash (value_type x
)
949 for (unsigned i
= 0; i
< x
.length (); ++i
)
950 h
.add_int (gimple_uid (x
[i
]));
954 bst_traits::equal (value_type existing
, value_type candidate
)
956 if (existing
.length () != candidate
.length ())
958 for (unsigned i
= 0; i
< existing
.length (); ++i
)
959 if (existing
[i
] != candidate
[i
])
964 static hash_set
<vec
<gimple
*>, bst_traits
> *bst_fail
;
967 vect_build_slp_tree_2 (vec_info
*vinfo
,
968 vec
<gimple
*> stmts
, unsigned int group_size
,
969 unsigned int *max_nunits
,
970 vec
<slp_tree
> *loads
,
971 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
972 unsigned max_tree_size
);
975 vect_build_slp_tree (vec_info
*vinfo
,
976 vec
<gimple
*> stmts
, unsigned int group_size
,
977 unsigned int *max_nunits
,
978 vec
<slp_tree
> *loads
,
979 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
980 unsigned max_tree_size
)
982 if (bst_fail
->contains (stmts
))
984 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
985 loads
, matches
, npermutes
, tree_size
,
987 /* When SLP build fails for stmts record this, otherwise SLP build
988 can be exponential in time when we allow to construct parts from
989 scalars, see PR81723. */
993 x
.create (stmts
.length ());
1000 /* Recursively build an SLP tree starting from NODE.
1001 Fail (and return a value not equal to zero) if def-stmts are not
1002 isomorphic, require data permutation or are of unsupported types of
1003 operation. Otherwise, return 0.
1004 The value returned is the depth in the SLP tree where a mismatch
1008 vect_build_slp_tree_2 (vec_info
*vinfo
,
1009 vec
<gimple
*> stmts
, unsigned int group_size
,
1010 unsigned int *max_nunits
,
1011 vec
<slp_tree
> *loads
,
1012 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1013 unsigned max_tree_size
)
1015 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
1022 if (is_gimple_call (stmt
))
1023 nops
= gimple_call_num_args (stmt
);
1024 else if (is_gimple_assign (stmt
))
1026 nops
= gimple_num_ops (stmt
) - 1;
1027 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1030 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1035 /* If the SLP node is a PHI (induction or reduction), terminate
1037 if (gimple_code (stmt
) == GIMPLE_PHI
)
1039 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1040 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1041 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1045 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1046 /* Induction from different IVs is not supported. */
1047 if (def_type
== vect_induction_def
)
1049 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1050 if (stmt
!= stmts
[0])
1055 /* Else def types have to match. */
1056 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1058 /* But for reduction chains only check on the first stmt. */
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1066 node
= vect_create_new_slp_node (stmts
);
1071 bool two_operators
= false;
1072 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1073 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1074 stmts
, group_size
, nops
,
1075 &this_max_nunits
, matches
, &two_operators
))
1078 /* If the SLP node is a load, terminate the recursion. */
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1082 *max_nunits
= this_max_nunits
;
1083 node
= vect_create_new_slp_node (stmts
);
1084 loads
->safe_push (node
);
1088 /* Get at the operands, verifying they are compatible. */
1089 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1090 slp_oprnd_info oprnd_info
;
1091 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1093 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1094 stmt
, i
, &oprnds_info
);
1096 matches
[(res
== -1) ? 0 : i
] = false;
1100 for (i
= 0; i
< group_size
; ++i
)
1103 vect_free_oprnd_info (oprnds_info
);
1107 auto_vec
<slp_tree
, 4> children
;
1108 auto_vec
<slp_tree
> this_loads
;
1113 max_tree_size
-= *tree_size
;
1115 /* Create SLP_TREE nodes for the definition node/s. */
1116 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1119 unsigned old_nloads
= this_loads
.length ();
1120 unsigned old_tree_size
= this_tree_size
;
1123 if (oprnd_info
->first_dt
!= vect_internal_def
1124 && oprnd_info
->first_dt
!= vect_reduction_def
1125 && oprnd_info
->first_dt
!= vect_induction_def
)
1128 if (++this_tree_size
> max_tree_size
)
1130 FOR_EACH_VEC_ELT (children
, j
, child
)
1131 vect_free_slp_tree (child
);
1132 vect_free_oprnd_info (oprnds_info
);
1136 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1137 group_size
, &this_max_nunits
,
1138 &this_loads
, matches
, npermutes
,
1140 max_tree_size
)) != NULL
)
1142 /* If we have all children of child built up from scalars then just
1143 throw that away and build it up this node from scalars. */
1144 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to
1146 do extra work to cancel the pattern so the uses see the
1148 && !is_pattern_stmt_p
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1151 slp_tree grandchild
;
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1154 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1159 this_loads
.truncate (old_nloads
);
1160 this_tree_size
= old_tree_size
;
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1162 vect_free_slp_tree (grandchild
);
1163 SLP_TREE_CHILDREN (child
).truncate (0);
1165 dump_printf_loc (MSG_NOTE
, vect_location
,
1166 "Building parent vector operands from "
1167 "scalars instead\n");
1168 oprnd_info
->def_stmts
= vNULL
;
1169 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1170 children
.safe_push (child
);
1175 oprnd_info
->def_stmts
= vNULL
;
1176 children
.safe_push (child
);
1180 /* If the SLP build failed fatally and we analyze a basic-block
1181 simply treat nodes we fail to build as externally defined
1182 (and thus build vectors from the scalar defs).
1183 The cost model will reject outright expensive cases.
1184 ??? This doesn't treat cases where permutation ultimatively
1185 fails (or we don't try permutation below). Ideally we'd
1186 even compute a permutation that will end up with the maximum
1188 if (is_a
<bb_vec_info
> (vinfo
)
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to
1191 do extra work to cancel the pattern so the uses see the
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1195 dump_printf_loc (MSG_NOTE
, vect_location
,
1196 "Building vector operands from scalars\n");
1197 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1198 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1199 children
.safe_push (child
);
1200 oprnd_info
->def_stmts
= vNULL
;
1204 /* If the SLP build for operand zero failed and operand zero
1205 and one can be commutated try that for the scalar stmts
1206 that failed the match. */
1208 /* A first scalar stmt mismatch signals a fatal mismatch. */
1210 /* ??? For COND_EXPRs we can swap the comparison operands
1211 as well as the arms under some constraints. */
1213 && oprnds_info
[1]->first_dt
== vect_internal_def
1214 && is_gimple_assign (stmt
)
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1217 /* Do so only if the number of not successful permutes was nor more
1218 than a cut-ff as re-trying the recursive match on
1219 possibly each level of the tree would expose exponential
1223 /* Verify if we can safely swap or if we committed to a specific
1224 operand order already. */
1225 for (j
= 0; j
< group_size
; ++j
)
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1230 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1233 "Build SLP failed: cannot swap operands "
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1241 /* Swap mismatched definition stmts. */
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "Re-trying with swapped operands of stmts ");
1244 for (j
= 0; j
< group_size
; ++j
)
1247 std::swap (oprnds_info
[0]->def_stmts
[j
],
1248 oprnds_info
[1]->def_stmts
[j
]);
1249 dump_printf (MSG_NOTE
, "%d ", j
);
1251 dump_printf (MSG_NOTE
, "\n");
1252 /* And try again with scratch 'matches' ... */
1253 bool *tem
= XALLOCAVEC (bool, group_size
);
1254 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1255 group_size
, &this_max_nunits
,
1256 &this_loads
, tem
, npermutes
,
1258 max_tree_size
)) != NULL
)
1260 /* ... so if successful we can apply the operand swapping
1261 to the GIMPLE IL. This is necessary because for example
1262 vect_get_slp_defs uses operand indexes and thus expects
1263 canonical operand order. This is also necessary even
1264 if we end up building the operand from scalars as
1265 we'll continue to process swapped operand two. */
1266 for (j
= 0; j
< group_size
; ++j
)
1268 gimple
*stmt
= stmts
[j
];
1269 gimple_set_plf (stmt
, GF_PLF_1
, false);
1271 for (j
= 0; j
< group_size
; ++j
)
1273 gimple
*stmt
= stmts
[j
];
1276 /* Avoid swapping operands twice. */
1277 if (gimple_plf (stmt
, GF_PLF_1
))
1279 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1280 gimple_assign_rhs2_ptr (stmt
));
1281 gimple_set_plf (stmt
, GF_PLF_1
, true);
1284 /* Verify we swap all duplicates or none. */
1286 for (j
= 0; j
< group_size
; ++j
)
1288 gimple
*stmt
= stmts
[j
];
1289 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1292 /* If we have all children of child built up from scalars then
1293 just throw that away and build it up this node from scalars. */
1294 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1295 /* ??? Rejecting patterns this way doesn't work. We'd have
1296 to do extra work to cancel the pattern so the uses see the
1298 && !is_pattern_stmt_p
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1302 slp_tree grandchild
;
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1305 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1310 this_loads
.truncate (old_nloads
);
1311 this_tree_size
= old_tree_size
;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1313 vect_free_slp_tree (grandchild
);
1314 SLP_TREE_CHILDREN (child
).truncate (0);
1316 dump_printf_loc (MSG_NOTE
, vect_location
,
1317 "Building parent vector operands from "
1318 "scalars instead\n");
1319 oprnd_info
->def_stmts
= vNULL
;
1320 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1321 children
.safe_push (child
);
1326 oprnd_info
->def_stmts
= vNULL
;
1327 children
.safe_push (child
);
1335 gcc_assert (child
== NULL
);
1336 FOR_EACH_VEC_ELT (children
, j
, child
)
1337 vect_free_slp_tree (child
);
1338 vect_free_oprnd_info (oprnds_info
);
1342 vect_free_oprnd_info (oprnds_info
);
1345 *tree_size
+= this_tree_size
;
1346 *max_nunits
= this_max_nunits
;
1347 loads
->safe_splice (this_loads
);
1349 node
= vect_create_new_slp_node (stmts
);
1350 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1351 SLP_TREE_CHILDREN (node
).splice (children
);
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1358 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1364 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1365 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1366 ? " (external)" : "");
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1369 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1370 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1373 vect_print_slp_tree (dump_kind
, loc
, child
);
1377 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1378 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1379 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1380 stmts in NODE are to be marked. */
1383 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1389 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1393 if (j
< 0 || i
== j
)
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1397 vect_mark_slp_stmts (child
, mark
, j
);
1401 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1404 vect_mark_slp_stmts_relevant (slp_tree node
)
1408 stmt_vec_info stmt_info
;
1411 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1416 stmt_info
= vinfo_for_stmt (stmt
);
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1418 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1419 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1422 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1423 vect_mark_slp_stmts_relevant (child
);
1427 /* Rearrange the statements of NODE according to PERMUTATION. */
1430 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1431 vec
<unsigned> permutation
)
1434 vec
<gimple
*> tmp_stmts
;
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1439 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1441 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1442 tmp_stmts
.create (group_size
);
1443 tmp_stmts
.quick_grow_cleared (group_size
);
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1446 tmp_stmts
[permutation
[i
]] = stmt
;
1448 SLP_TREE_SCALAR_STMTS (node
).release ();
1449 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1453 /* Attempt to reorder stmts in a reduction chain so that we don't
1454 require any load permutation. Return true if that was possible,
1455 otherwise return false. */
1458 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1460 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1463 slp_tree node
, load
;
1465 /* Compare all the permutation sequences to the first one. We know
1466 that at least one load is permuted. */
1467 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1468 if (!node
->load_permutation
.exists ())
1470 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1472 if (!load
->load_permutation
.exists ())
1474 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1475 if (lidx
!= node
->load_permutation
[j
])
1479 /* Check that the loads in the first sequence are different and there
1480 are no gaps between them. */
1481 auto_sbitmap
load_index (group_size
);
1482 bitmap_clear (load_index
);
1483 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1485 if (lidx
>= group_size
)
1487 if (bitmap_bit_p (load_index
, lidx
))
1490 bitmap_set_bit (load_index
, lidx
);
1492 for (i
= 0; i
< group_size
; i
++)
1493 if (!bitmap_bit_p (load_index
, i
))
1496 /* This permutation is valid for reduction. Since the order of the
1497 statements in the nodes is not important unless they are memory
1498 accesses, we can rearrange the statements in all the nodes
1499 according to the order of the loads. */
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1501 node
->load_permutation
);
1503 /* We are done, no actual permutations need to be generated. */
1504 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1507 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1508 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1509 /* But we have to keep those permutations that are required because
1510 of handling of gaps. */
1511 if (unrolling_factor
== 1
1512 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1514 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1516 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1517 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1523 /* Check if the required load permutations in the SLP instance
1524 SLP_INSTN are supported. */
1527 vect_supported_load_permutation_p (slp_instance slp_instn
)
1529 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1530 unsigned int i
, j
, k
, next
;
1532 gimple
*stmt
, *load
, *next_load
;
1534 if (dump_enabled_p ())
1536 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1538 if (node
->load_permutation
.exists ())
1539 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1540 dump_printf (MSG_NOTE
, "%d ", next
);
1542 for (k
= 0; k
< group_size
; ++k
)
1543 dump_printf (MSG_NOTE
, "%d ", k
);
1544 dump_printf (MSG_NOTE
, "\n");
1547 /* In case of reduction every load permutation is allowed, since the order
1548 of the reduction statements is not important (as opposed to the case of
1549 grouped stores). The only condition we need to check is that all the
1550 load nodes are of the same size and have the same permutation (and then
1551 rearrange all the nodes of the SLP instance according to this
1554 /* Check that all the load nodes are of the same size. */
1555 /* ??? Can't we assert this? */
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1557 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1560 node
= SLP_INSTANCE_TREE (slp_instn
);
1561 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1563 /* Reduction (there are no data-refs in the root).
1564 In reduction chain the order of the loads is not important. */
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1567 vect_attempt_slp_rearrange_stmts (slp_instn
);
1569 /* In basic block vectorization we allow any subchain of an interleaving
1571 FORNOW: not supported in loop SLP because of realignment compications. */
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1574 /* Check whether the loads in an instance form a subchain and thus
1575 no permutation is necessary. */
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1578 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1580 bool subchain_p
= true;
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1585 && (next_load
!= load
1586 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1591 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1594 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1597 stmt_vec_info group_info
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1599 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1602 unsigned k
, maxk
= 0;
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1606 /* In BB vectorization we may not actually use a loaded vector
1607 accessing elements in excess of GROUP_SIZE. */
1608 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1611 "BB vectorization with gaps at the end of "
1612 "a load is not supported\n");
1616 /* Verify the permutation can be generated. */
1619 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1620 1, slp_instn
, true, &n_perms
))
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1624 "unsupported load permutation\n");
1632 /* For loop vectorization verify we can generate the permutation. Be
1633 conservative about the vectorization factor, there are permutations
1634 that will use three vector inputs only starting from a specific factor
1635 and the vectorization factor is not yet final.
1636 ??? The SLP instance unrolling factor might not be the maximum one. */
1639 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1640 LOOP_VINFO_VECT_FACTOR
1641 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1642 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1643 if (node
->load_permutation
.exists ()
1644 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1645 slp_instn
, true, &n_perms
))
1652 /* Find the last store in SLP INSTANCE. */
1655 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1657 gimple
*last
= NULL
, *stmt
;
1659 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1661 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1662 if (is_pattern_stmt_p (stmt_vinfo
))
1663 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1665 last
= get_later_stmt (stmt
, last
);
1671 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1674 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1675 stmt_vector_for_cost
*prologue_cost_vec
,
1676 stmt_vector_for_cost
*body_cost_vec
,
1677 unsigned ncopies_for_cost
)
1682 stmt_vec_info stmt_info
;
1685 /* Recurse down the SLP tree. */
1686 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1687 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1688 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1689 body_cost_vec
, ncopies_for_cost
);
1691 /* Look at the first scalar stmt to determine the cost. */
1692 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1693 stmt_info
= vinfo_for_stmt (stmt
);
1694 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1696 vect_memory_access_type memory_access_type
1697 = (STMT_VINFO_STRIDED_P (stmt_info
)
1700 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1701 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1702 memory_access_type
, vect_uninitialized_def
,
1703 node
, prologue_cost_vec
, body_cost_vec
);
1706 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1707 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1709 /* If the load is permuted then the alignment is determined by
1710 the first group element not by the first scalar stmt DR. */
1711 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1712 stmt_info
= vinfo_for_stmt (stmt
);
1713 /* Record the cost for the permutation. */
1715 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1716 ncopies_for_cost
, instance
, true,
1718 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1719 stmt_info
, 0, vect_body
);
1721 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1722 /* And adjust the number of loads performed. This handles
1723 redundancies as well as loads that are later dead. */
1724 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1725 bitmap_clear (perm
);
1726 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1727 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1728 ncopies_for_cost
= 0;
1729 bool load_seen
= false;
1730 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1732 if (i
% nunits
== 0)
1738 if (bitmap_bit_p (perm
, i
))
1743 gcc_assert (ncopies_for_cost
1744 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1745 + nunits
- 1) / nunits
);
1746 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1748 /* Record the cost for the vector loads. */
1749 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1750 memory_access_type
, node
, prologue_cost_vec
,
1755 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1757 /* ncopies_for_cost is the number of IVs we generate. */
1758 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1759 stmt_info
, 0, vect_body
);
1761 /* Prologue cost for the initial values and step vector. */
1762 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1766 ? vector_load
: vec_construct
,
1767 stmt_info
, 0, vect_prologue
);
1768 record_stmt_cost (prologue_cost_vec
, 1,
1770 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1771 ? vector_load
: vec_construct
,
1772 stmt_info
, 0, vect_prologue
);
1774 /* ??? No easy way to get at the actual number of vector stmts
1775 to be geneated and thus the derived IVs. */
1779 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1780 stmt_info
, 0, vect_body
);
1781 if (SLP_TREE_TWO_OPERATORS (node
))
1783 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1784 stmt_info
, 0, vect_body
);
1785 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1786 stmt_info
, 0, vect_body
);
1790 /* Push SLP node def-type to stmts. */
1791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1792 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1793 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1796 /* Scan operands and account for prologue cost of constants/externals.
1797 ??? This over-estimates cost for multiple uses and should be
1799 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1800 lhs
= gimple_get_lhs (stmt
);
1801 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1803 tree op
= gimple_op (stmt
, i
);
1805 enum vect_def_type dt
;
1806 if (!op
|| op
== lhs
)
1808 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1810 /* Without looking at the actual initializer a vector of
1811 constants can be implemented as load from the constant pool.
1812 ??? We need to pass down stmt_info for a vector type
1813 even if it points to the wrong stmt. */
1814 if (dt
== vect_constant_def
)
1815 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1816 stmt_info
, 0, vect_prologue
);
1817 else if (dt
== vect_external_def
)
1818 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1819 stmt_info
, 0, vect_prologue
);
1823 /* Restore stmt def-types. */
1824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1825 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1827 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1830 /* Compute the cost for the SLP instance INSTANCE. */
1833 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1835 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1836 unsigned ncopies_for_cost
;
1837 stmt_info_for_cost
*si
;
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE
, vect_location
,
1842 "=== vect_analyze_slp_cost ===\n");
1844 /* Calculate the number of vector stmts to create based on the unrolling
1845 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1846 GROUP_SIZE / NUNITS otherwise. */
1847 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1848 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1849 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1850 /* Adjust the group_size by the vectorization factor which is always one
1851 for basic-block vectorization. */
1852 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1853 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1854 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1855 /* For reductions look at a reduction operand in case the reduction
1856 operation is widening like DOT_PROD or SAD. */
1857 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1859 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1860 switch (gimple_assign_rhs_code (stmt
))
1864 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1865 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1870 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1872 prologue_cost_vec
.create (10);
1873 body_cost_vec
.create (10);
1874 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1875 &prologue_cost_vec
, &body_cost_vec
,
1878 /* Record the prologue costs, which were delayed until we were
1879 sure that SLP was successful. */
1880 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1882 struct _stmt_vec_info
*stmt_info
1883 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1884 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1885 si
->misalign
, vect_prologue
);
1888 /* Record the instance's instructions in the target cost model. */
1889 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1891 struct _stmt_vec_info
*stmt_info
1892 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1893 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1894 si
->misalign
, vect_body
);
1897 prologue_cost_vec
.release ();
1898 body_cost_vec
.release ();
1901 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1902 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1903 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1904 containing the remainder.
1905 Return the first stmt in the second group. */
1908 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1910 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1911 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1912 gcc_assert (group1_size
> 0);
1913 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1914 gcc_assert (group2_size
> 0);
1915 GROUP_SIZE (first_vinfo
) = group1_size
;
1917 gimple
*stmt
= first_stmt
;
1918 for (unsigned i
= group1_size
; i
> 1; i
--)
1920 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1921 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1923 /* STMT is now the last element of the first group. */
1924 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1925 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1927 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1928 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1930 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1931 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1934 /* For the second group, the GROUP_GAP is that before the original group,
1935 plus skipping over the first vector. */
1936 GROUP_GAP (vinfo_for_stmt (group2
)) =
1937 GROUP_GAP (first_vinfo
) + group1_size
;
1939 /* GROUP_GAP of the first group now has to skip over the second group too. */
1940 GROUP_GAP (first_vinfo
) += group2_size
;
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1944 group1_size
, group2_size
);
1949 /* Analyze an SLP instance starting from a group of grouped stores. Call
1950 vect_build_slp_tree to build a tree of packed stmts if possible.
1951 Return FALSE if it's impossible to SLP any stmt in the loop. */
1954 vect_analyze_slp_instance (vec_info
*vinfo
,
1955 gimple
*stmt
, unsigned max_tree_size
)
1957 slp_instance new_instance
;
1959 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1960 unsigned int unrolling_factor
= 1, nunits
;
1961 tree vectype
, scalar_type
= NULL_TREE
;
1964 unsigned int max_nunits
= 0;
1965 vec
<slp_tree
> loads
;
1966 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1967 vec
<gimple
*> scalar_stmts
;
1969 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1973 scalar_type
= TREE_TYPE (DR_REF (dr
));
1974 vectype
= get_vectype_for_scalar_type (scalar_type
);
1978 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1979 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1982 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1986 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1987 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1988 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1993 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1996 "Build SLP failed: unsupported data-type ");
1997 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1998 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2003 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2005 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2006 scalar_stmts
.create (group_size
);
2008 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2013 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2014 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2015 scalar_stmts
.safe_push (
2016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2018 scalar_stmts
.safe_push (next
);
2019 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2021 /* Mark the first element of the reduction chain as reduction to properly
2022 transform the node. In the reduction analysis phase only the last
2023 element of the chain is marked as reduction. */
2024 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2025 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2029 /* Collect reduction statements. */
2030 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2031 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2032 scalar_stmts
.safe_push (next
);
2035 loads
.create (group_size
);
2037 /* Build the tree for the SLP instance. */
2038 bool *matches
= XALLOCAVEC (bool, group_size
);
2039 unsigned npermutes
= 0;
2040 bst_fail
= new hash_set
<vec
<gimple
*>, bst_traits
> ();
2041 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2042 &max_nunits
, &loads
, matches
, &npermutes
,
2043 NULL
, max_tree_size
);
2047 /* Calculate the unrolling factor based on the smallest type. */
2049 = least_common_multiple (max_nunits
, group_size
) / group_size
;
2051 if (unrolling_factor
!= 1
2052 && is_a
<bb_vec_info
> (vinfo
))
2055 if (max_nunits
> group_size
)
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2058 "Build SLP failed: store group "
2059 "size not a multiple of the vector size "
2060 "in basic block SLP\n");
2061 vect_free_slp_tree (node
);
2065 /* Fatal mismatch. */
2066 matches
[group_size
/max_nunits
* max_nunits
] = false;
2067 vect_free_slp_tree (node
);
2072 /* Create a new SLP instance. */
2073 new_instance
= XNEW (struct _slp_instance
);
2074 SLP_INSTANCE_TREE (new_instance
) = node
;
2075 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2076 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2077 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2079 /* Compute the load permutation. */
2081 bool loads_permuted
= false;
2082 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2084 vec
<unsigned> load_permutation
;
2086 gimple
*load
, *first_stmt
;
2087 bool this_load_permuted
= false;
2088 load_permutation
.create (group_size
);
2089 first_stmt
= GROUP_FIRST_ELEMENT
2090 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2093 int load_place
= vect_get_place_in_interleaving_chain
2095 gcc_assert (load_place
!= -1);
2096 if (load_place
!= j
)
2097 this_load_permuted
= true;
2098 load_permutation
.safe_push (load_place
);
2100 if (!this_load_permuted
2101 /* The load requires permutation when unrolling exposes
2102 a gap either because the group is larger than the SLP
2103 group-size or because there is a gap between the groups. */
2104 && (unrolling_factor
== 1
2105 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2106 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2108 load_permutation
.release ();
2111 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2112 loads_permuted
= true;
2117 if (!vect_supported_load_permutation_p (new_instance
))
2119 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2122 "Build SLP failed: unsupported load "
2124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2127 vect_free_slp_instance (new_instance
);
2132 /* If the loads and stores can be handled with load/store-lan
2133 instructions do not generate this SLP instance. */
2134 if (is_a
<loop_vec_info
> (vinfo
)
2136 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2139 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2141 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2142 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2143 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2144 /* Use SLP for strided accesses (or if we
2145 can't load-lanes). */
2146 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2147 || ! vect_load_lanes_supported
2148 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2149 GROUP_SIZE (stmt_vinfo
)))
2152 if (i
== loads
.length ())
2154 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2156 "Built SLP cancelled: can use "
2157 "load/store-lanes\n");
2158 vect_free_slp_instance (new_instance
);
2163 vinfo
->slp_instances
.safe_push (new_instance
);
2165 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_NOTE
, vect_location
,
2168 "Final SLP tree for instance:\n");
2169 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2177 /* Failed to SLP. */
2178 /* Free the allocated memory. */
2179 scalar_stmts
.release ();
2183 /* For basic block SLP, try to break the group up into multiples of the
2185 if (is_a
<bb_vec_info
> (vinfo
)
2186 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2187 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2189 /* We consider breaking the group only on VF boundaries from the existing
2191 for (i
= 0; i
< group_size
; i
++)
2192 if (!matches
[i
]) break;
2194 if (i
>= nunits
&& i
< group_size
)
2196 /* Split into two groups at the first vector boundary before i. */
2197 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2198 unsigned group1_size
= i
& ~(nunits
- 1);
2200 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2201 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2202 /* If the first non-match was in the middle of a vector,
2203 skip the rest of that vector. */
2204 if (group1_size
< i
)
2206 i
= group1_size
+ nunits
;
2208 rest
= vect_split_slp_store_group (rest
, nunits
);
2211 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2214 /* Even though the first vector did not all match, we might be able to SLP
2215 (some) of the remainder. FORNOW ignore this possibility. */
2222 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2223 trees of packed scalar stmts if SLP is possible. */
2226 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2229 gimple
*first_element
;
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2234 /* Find SLP sequences starting from groups of grouped stores. */
2235 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2236 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2238 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2240 if (loop_vinfo
->reduction_chains
.length () > 0)
2242 /* Find SLP sequences starting from reduction chains. */
2243 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2244 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2247 /* Dissolve reduction chain group. */
2248 gimple
*next
, *stmt
= first_element
;
2251 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2252 next
= GROUP_NEXT_ELEMENT (vinfo
);
2253 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2254 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2257 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2258 = vect_internal_def
;
2262 /* Find SLP sequences starting from groups of reductions. */
2263 if (loop_vinfo
->reductions
.length () > 1)
2264 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2272 /* For each possible SLP instance decide whether to SLP it and calculate overall
2273 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2274 least one instance. */
2277 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2279 unsigned int i
, unrolling_factor
= 1;
2280 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2281 slp_instance instance
;
2282 int decided_to_slp
= 0;
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2288 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2290 /* FORNOW: SLP if you can. */
2291 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2292 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2294 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2295 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2296 loop-based vectorization. Such stmts will be marked as HYBRID. */
2297 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2301 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2303 if (decided_to_slp
&& dump_enabled_p ())
2304 dump_printf_loc (MSG_NOTE
, vect_location
,
2305 "Decided to SLP %d instances. Unrolling factor %d\n",
2306 decided_to_slp
, unrolling_factor
);
2308 return (decided_to_slp
> 0);
2312 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2313 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2316 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2318 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2319 imm_use_iterator imm_iter
;
2321 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2323 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2324 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2327 /* Propagate hybrid down the SLP tree. */
2328 if (stype
== hybrid
)
2330 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2334 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2335 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2336 /* If we get a pattern stmt here we have to use the LHS of the
2337 original stmt for immediate uses. */
2338 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2339 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2340 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2342 if (gimple_code (stmt
) == GIMPLE_PHI
)
2343 def
= gimple_phi_result (stmt
);
2345 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2347 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2349 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2351 use_vinfo
= vinfo_for_stmt (use_stmt
);
2352 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2353 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2354 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2355 if (!STMT_SLP_TYPE (use_vinfo
)
2356 && (STMT_VINFO_RELEVANT (use_vinfo
)
2357 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2358 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2359 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2361 if (dump_enabled_p ())
2363 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2364 "def in non-SLP stmt: ");
2365 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2373 && !HYBRID_SLP_STMT (stmt_vinfo
))
2375 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2378 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2380 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2383 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2384 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2385 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2388 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2391 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2393 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2394 struct loop
*loopp
= (struct loop
*)wi
->info
;
2399 if (TREE_CODE (*tp
) == SSA_NAME
2400 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2402 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2403 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2404 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2406 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2409 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2411 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2419 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2422 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2423 /* If the stmt is in a SLP instance then this isn't a reason
2424 to mark use definitions in other SLP instances as hybrid. */
2425 if (! STMT_SLP_TYPE (use_vinfo
)
2426 && (STMT_VINFO_RELEVANT (use_vinfo
)
2427 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2428 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2429 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2436 /* Find stmts that must be both vectorized and SLPed. */
2439 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2442 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2443 slp_instance instance
;
2445 if (dump_enabled_p ())
2446 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2449 /* First walk all pattern stmt in the loop and mark defs of uses as
2450 hybrid because immediate uses in them are not recorded. */
2451 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2453 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2454 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2457 gimple
*stmt
= gsi_stmt (gsi
);
2458 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2459 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2462 memset (&wi
, 0, sizeof (wi
));
2463 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2464 gimple_stmt_iterator gsi2
2465 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2466 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2467 vect_detect_hybrid_slp_1
, &wi
);
2468 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2469 vect_detect_hybrid_slp_2
,
2470 vect_detect_hybrid_slp_1
, &wi
);
2475 /* Then walk the SLP instance trees marking stmts with uses in
2476 non-SLP stmts as hybrid, also propagating hybrid down the
2477 SLP tree, collecting the above info on-the-fly. */
2478 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2480 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2481 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2487 /* Initialize a bb_vec_info struct for the statements between
2488 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2490 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2491 gimple_stmt_iterator region_end_in
)
2492 : vec_info (vec_info::bb
, init_cost (NULL
)),
2493 bb (gsi_bb (region_begin_in
)),
2494 region_begin (region_begin_in
),
2495 region_end (region_end_in
)
2497 gimple_stmt_iterator gsi
;
2499 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2502 gimple
*stmt
= gsi_stmt (gsi
);
2503 gimple_set_uid (stmt
, 0);
2504 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2511 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2512 stmts in the basic block. */
2514 _bb_vec_info::~_bb_vec_info ()
2516 for (gimple_stmt_iterator si
= region_begin
;
2517 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2519 gimple
*stmt
= gsi_stmt (si
);
2520 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2523 /* Free stmt_vec_info. */
2524 free_stmt_vec_info (stmt
);
2526 /* Reset region marker. */
2527 gimple_set_uid (stmt
, -1);
2534 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2535 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2537 Return true if the operations are supported. */
2540 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2541 slp_instance node_instance
)
2548 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2552 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2555 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2556 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2557 gcc_assert (stmt_info
);
2558 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2560 /* For BB vectorization vector types are assigned here.
2561 Memory accesses already got their vector type assigned
2562 in vect_analyze_data_refs. */
2563 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2565 && ! STMT_VINFO_DATA_REF (stmt_info
))
2567 gcc_assert (PURE_SLP_STMT (stmt_info
));
2569 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2570 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_NOTE
, vect_location
,
2573 "get vectype for scalar type: ");
2574 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2575 dump_printf (MSG_NOTE
, "\n");
2578 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2581 if (dump_enabled_p ())
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2584 "not SLPed: unsupported data-type ");
2585 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2587 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2592 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2595 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2596 dump_printf (MSG_NOTE
, "\n");
2600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2601 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2604 /* Calculate the number of vector statements to be created for the
2605 scalar stmts in this node. For SLP reductions it is equal to the
2606 number of vector statements in the children (which has already been
2607 calculated by the recursive call). Otherwise it is the number of
2608 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2609 VF divided by the number of elements in a vector. */
2610 if (GROUP_FIRST_ELEMENT (stmt_info
)
2611 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2612 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2613 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2617 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2618 vf
= loop_vinfo
->vectorization_factor
;
2621 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2622 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2623 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2624 = vf
* group_size
/ TYPE_VECTOR_SUBPARTS (vectype
);
2627 /* Push SLP node def-type to stmt operands. */
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2629 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2631 = SLP_TREE_DEF_TYPE (child
);
2632 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2633 /* Restore def-types. */
2634 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2635 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2636 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2637 = vect_internal_def
;
2645 /* Analyze statements in SLP instances of VINFO. Return true if the
2646 operations are supported. */
2649 vect_slp_analyze_operations (vec_info
*vinfo
)
2651 slp_instance instance
;
2654 if (dump_enabled_p ())
2655 dump_printf_loc (MSG_NOTE
, vect_location
,
2656 "=== vect_slp_analyze_operations ===\n");
2658 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2660 if (!vect_slp_analyze_node_operations (vinfo
,
2661 SLP_INSTANCE_TREE (instance
),
2664 dump_printf_loc (MSG_NOTE
, vect_location
,
2665 "removing SLP instance operations starting from: ");
2666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2667 SLP_TREE_SCALAR_STMTS
2668 (SLP_INSTANCE_TREE (instance
))[0], 0);
2669 vect_free_slp_instance (instance
);
2670 vinfo
->slp_instances
.ordered_remove (i
);
2674 /* Compute the costs of the SLP instance. */
2675 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2680 return !vinfo
->slp_instances
.is_empty ();
2684 /* Compute the scalar cost of the SLP node NODE and its children
2685 and return it. Do not account defs that are marked in LIFE and
2686 update LIFE according to uses of NODE. */
2689 vect_bb_slp_scalar_cost (basic_block bb
,
2690 slp_tree node
, vec
<bool, va_heap
> *life
)
2692 unsigned scalar_cost
= 0;
2697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2700 ssa_op_iter op_iter
;
2701 def_operand_p def_p
;
2702 stmt_vec_info stmt_info
;
2707 /* If there is a non-vectorized use of the defs then the scalar
2708 stmt is kept live in which case we do not account it or any
2709 required defs in the SLP children in the scalar cost. This
2710 way we make the vectorization more costly when compared to
2712 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2714 imm_use_iterator use_iter
;
2716 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2717 if (!is_gimple_debug (use_stmt
)
2718 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2720 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2723 BREAK_FROM_IMM_USE_STMT (use_iter
);
2729 /* Count scalar stmts only once. */
2730 if (gimple_visited_p (stmt
))
2732 gimple_set_visited (stmt
, true);
2734 stmt_info
= vinfo_for_stmt (stmt
);
2735 if (STMT_VINFO_DATA_REF (stmt_info
))
2737 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2738 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2740 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2743 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2745 scalar_cost
+= stmt_cost
;
2748 auto_vec
<bool, 20> subtree_life
;
2749 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2751 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2753 /* Do not directly pass LIFE to the recursive call, copy it to
2754 confine changes in the callee to the current child/subtree. */
2755 subtree_life
.safe_splice (*life
);
2756 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2757 subtree_life
.truncate (0);
2764 /* Check if vectorization of the basic block is profitable. */
2767 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2769 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2770 slp_instance instance
;
2772 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2773 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2775 /* Calculate scalar cost. */
2776 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2778 auto_vec
<bool, 20> life
;
2779 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2780 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2781 SLP_INSTANCE_TREE (instance
),
2785 /* Unset visited flag. */
2786 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2787 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2788 gimple_set_visited (gsi_stmt (gsi
), false);
2790 /* Complete the target-specific cost calculation. */
2791 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2792 &vec_inside_cost
, &vec_epilogue_cost
);
2794 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2796 if (dump_enabled_p ())
2798 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2799 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2801 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2802 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2803 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2806 /* Vectorization is profitable if its cost is more than the cost of scalar
2807 version. Note that we err on the vector side for equal cost because
2808 the cost estimate is otherwise quite pessimistic (constant uses are
2809 free on the scalar side but cost a load on the vector side for
2811 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2817 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2818 if so and sets fatal to true if failure is independent of
2819 current_vector_size. */
2822 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2823 gimple_stmt_iterator region_end
,
2824 vec
<data_reference_p
> datarefs
, int n_stmts
,
2827 bb_vec_info bb_vinfo
;
2828 slp_instance instance
;
2832 /* The first group of checks is independent of the vector size. */
2835 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2839 "not vectorized: too many instructions in "
2841 free_data_refs (datarefs
);
2845 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2849 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2851 /* Analyze the data references. */
2853 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2857 "not vectorized: unhandled data-ref in basic "
2864 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2868 "not vectorized: not enough data-refs in "
2875 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2877 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2879 "not vectorized: unhandled data access in "
2886 /* If there are no grouped stores in the region there is no need
2887 to continue with pattern recog as vect_analyze_slp will fail
2889 if (bb_vinfo
->grouped_stores
.is_empty ())
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2893 "not vectorized: no grouped stores in "
2900 /* While the rest of the analysis below depends on it in some way. */
2903 vect_pattern_recog (bb_vinfo
);
2905 /* Check the SLP opportunities in the basic block, analyze and build SLP
2907 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2909 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2912 "Failed to SLP the basic block.\n");
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2914 "not vectorized: failed to find SLP opportunities "
2915 "in basic block.\n");
2922 vect_record_base_alignments (bb_vinfo
);
2924 /* Analyze and verify the alignment of data references and the
2925 dependence in the SLP instances. */
2926 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2929 || ! vect_slp_analyze_instance_dependence (instance
))
2931 dump_printf_loc (MSG_NOTE
, vect_location
,
2932 "removing SLP instance operations starting from: ");
2933 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2934 SLP_TREE_SCALAR_STMTS
2935 (SLP_INSTANCE_TREE (instance
))[0], 0);
2936 vect_free_slp_instance (instance
);
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2941 /* Mark all the statements that we want to vectorize as pure SLP and
2943 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2944 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2948 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2954 if (!vect_slp_analyze_operations (bb_vinfo
))
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2958 "not vectorized: bad operation in basic block.\n");
2964 /* Cost model: check if the vectorization is worthwhile. */
2965 if (!unlimited_cost_model (NULL
)
2966 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "not vectorized: vectorization is not "
2977 if (dump_enabled_p ())
2978 dump_printf_loc (MSG_NOTE
, vect_location
,
2979 "Basic block will be vectorized using SLP\n");
2985 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2986 true if anything in the basic-block was vectorized. */
2989 vect_slp_bb (basic_block bb
)
2991 bb_vec_info bb_vinfo
;
2992 gimple_stmt_iterator gsi
;
2993 unsigned int vector_sizes
;
2994 bool any_vectorized
= false;
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2999 /* Autodetect first vector size we try. */
3000 current_vector_size
= 0;
3001 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3003 gsi
= gsi_start_bb (bb
);
3007 if (gsi_end_p (gsi
))
3010 gimple_stmt_iterator region_begin
= gsi
;
3011 vec
<data_reference_p
> datarefs
= vNULL
;
3014 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3016 gimple
*stmt
= gsi_stmt (gsi
);
3017 if (is_gimple_debug (stmt
))
3021 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3022 vect_location
= gimple_location (stmt
);
3024 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3028 /* Skip leading unhandled stmts. */
3029 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3035 gimple_stmt_iterator region_end
= gsi
;
3037 bool vectorized
= false;
3039 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3040 datarefs
, insns
, fatal
);
3042 && dbg_cnt (vect_slp
))
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3047 vect_schedule_slp (bb_vinfo
);
3049 if (dump_enabled_p ())
3050 dump_printf_loc (MSG_NOTE
, vect_location
,
3051 "basic block part vectorized\n");
3057 any_vectorized
|= vectorized
;
3059 vector_sizes
&= ~current_vector_size
;
3061 || vector_sizes
== 0
3062 || current_vector_size
== 0
3063 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3064 vector sizes will fail do not bother iterating. */
3067 if (gsi_end_p (region_end
))
3070 /* Skip the unhandled stmt. */
3073 /* And reset vector sizes. */
3074 current_vector_size
= 0;
3075 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3079 /* Try the next biggest vector size. */
3080 current_vector_size
= 1 << floor_log2 (vector_sizes
);
3081 if (dump_enabled_p ())
3082 dump_printf_loc (MSG_NOTE
, vect_location
,
3083 "***** Re-trying analysis with "
3084 "vector size %d\n", current_vector_size
);
3091 return any_vectorized
;
3095 /* Return 1 if vector type of boolean constant which is OPNUM
3096 operand in statement STMT is a boolean vector. */
3099 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3101 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3102 enum tree_code code
= gimple_expr_code (stmt
);
3105 enum vect_def_type dt
;
3107 /* For comparison and COND_EXPR type is chosen depending
3108 on the other comparison operand. */
3109 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3112 op
= gimple_assign_rhs1 (stmt
);
3114 op
= gimple_assign_rhs2 (stmt
);
3116 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3120 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3123 if (code
== COND_EXPR
)
3125 tree cond
= gimple_assign_rhs1 (stmt
);
3127 if (TREE_CODE (cond
) == SSA_NAME
)
3130 op
= TREE_OPERAND (cond
, 1);
3132 op
= TREE_OPERAND (cond
, 0);
3134 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3138 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3141 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3145 /* For constant and loop invariant defs of SLP_NODE this function returns
3146 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3147 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3148 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3149 REDUC_INDEX is the index of the reduction operand in the statements, unless
3153 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3154 vec
<tree
> *vec_oprnds
,
3155 unsigned int op_num
, unsigned int number_of_vectors
)
3157 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3158 gimple
*stmt
= stmts
[0];
3159 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3162 unsigned j
, number_of_places_left_in_vector
;
3165 int group_size
= stmts
.length ();
3166 unsigned int vec_num
, i
;
3167 unsigned number_of_copies
= 1;
3169 voprnds
.create (number_of_vectors
);
3170 bool constant_p
, is_store
;
3171 tree neutral_op
= NULL
;
3172 enum tree_code code
= gimple_expr_code (stmt
);
3173 gimple_seq ctor_seq
= NULL
;
3175 /* Check if vector type is a boolean vector. */
3176 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3177 && vect_mask_constant_operand_p (stmt
, op_num
))
3179 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3181 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3182 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3184 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3187 op
= gimple_assign_rhs1 (stmt
);
3194 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3195 created vectors. It is greater than 1 if unrolling is performed.
3197 For example, we have two scalar operands, s1 and s2 (e.g., group of
3198 strided accesses of size two), while NUNITS is four (i.e., four scalars
3199 of this type can be packed in a vector). The output vector will contain
3200 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3203 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3204 containing the operands.
3206 For example, NUNITS is four as before, and the group size is 8
3207 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3208 {s5, s6, s7, s8}. */
3210 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3212 number_of_places_left_in_vector
= nunits
;
3214 auto_vec
<tree
, 32> elts (nunits
);
3215 elts
.quick_grow (nunits
);
3216 bool place_after_defs
= false;
3217 for (j
= 0; j
< number_of_copies
; j
++)
3219 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3222 op
= gimple_assign_rhs1 (stmt
);
3229 tree cond
= gimple_assign_rhs1 (stmt
);
3230 if (TREE_CODE (cond
) == SSA_NAME
)
3231 op
= gimple_op (stmt
, op_num
+ 1);
3232 else if (op_num
== 0 || op_num
== 1)
3233 op
= TREE_OPERAND (cond
, op_num
);
3237 op
= gimple_assign_rhs2 (stmt
);
3239 op
= gimple_assign_rhs3 (stmt
);
3245 op
= gimple_call_arg (stmt
, op_num
);
3252 op
= gimple_op (stmt
, op_num
+ 1);
3253 /* Unlike the other binary operators, shifts/rotates have
3254 the shift count being int, instead of the same type as
3255 the lhs, so make sure the scalar is the right type if
3256 we are dealing with vectors of
3257 long long/long/short/char. */
3258 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3259 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3263 op
= gimple_op (stmt
, op_num
+ 1);
3268 /* Create 'vect_ = {op0,op1,...,opn}'. */
3269 number_of_places_left_in_vector
--;
3271 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3273 if (CONSTANT_CLASS_P (op
))
3275 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3277 /* Can't use VIEW_CONVERT_EXPR for booleans because
3278 of possibly different sizes of scalar value and
3280 if (integer_zerop (op
))
3281 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3282 else if (integer_onep (op
))
3283 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3288 op
= fold_unary (VIEW_CONVERT_EXPR
,
3289 TREE_TYPE (vector_type
), op
);
3290 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3294 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3296 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3299 = build_all_ones_cst (TREE_TYPE (vector_type
));
3301 = build_zero_cst (TREE_TYPE (vector_type
));
3302 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3303 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3309 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3312 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3315 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3319 elts
[number_of_places_left_in_vector
] = op
;
3320 if (!CONSTANT_CLASS_P (op
))
3322 if (TREE_CODE (orig_op
) == SSA_NAME
3323 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3324 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3325 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3326 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3327 place_after_defs
= true;
3329 if (number_of_places_left_in_vector
== 0)
3332 vec_cst
= build_vector (vector_type
, elts
);
3335 vec
<constructor_elt
, va_gc
> *v
;
3337 vec_alloc (v
, nunits
);
3338 for (k
= 0; k
< nunits
; ++k
)
3339 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3340 vec_cst
= build_constructor (vector_type
, v
);
3343 gimple_stmt_iterator gsi
;
3344 if (place_after_defs
)
3347 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3348 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3351 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3352 if (ctor_seq
!= NULL
)
3354 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3355 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3359 voprnds
.quick_push (init
);
3360 place_after_defs
= false;
3361 number_of_places_left_in_vector
= nunits
;
3367 /* Since the vectors are created in the reverse order, we should invert
3369 vec_num
= voprnds
.length ();
3370 for (j
= vec_num
; j
!= 0; j
--)
3372 vop
= voprnds
[j
- 1];
3373 vec_oprnds
->quick_push (vop
);
3378 /* In case that VF is greater than the unrolling factor needed for the SLP
3379 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3380 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3381 to replicate the vectors. */
3382 while (number_of_vectors
> vec_oprnds
->length ())
3384 tree neutral_vec
= NULL
;
3389 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3391 vec_oprnds
->quick_push (neutral_vec
);
3395 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3396 vec_oprnds
->quick_push (vop
);
3402 /* Get vectorized definitions from SLP_NODE that contains corresponding
3403 vectorized def-stmts. */
3406 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3409 gimple
*vec_def_stmt
;
3412 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3414 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3416 gcc_assert (vec_def_stmt
);
3417 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3418 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3420 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3421 vec_oprnds
->quick_push (vec_oprnd
);
3426 /* Get vectorized definitions for SLP_NODE.
3427 If the scalar definitions are loop invariants or constants, collect them and
3428 call vect_get_constant_vectors() to create vector stmts.
3429 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3430 must be stored in the corresponding child of SLP_NODE, and we call
3431 vect_get_slp_vect_defs () to retrieve them. */
3434 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3435 vec
<vec
<tree
> > *vec_oprnds
)
3438 int number_of_vects
= 0, i
;
3439 unsigned int child_index
= 0;
3440 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3441 slp_tree child
= NULL
;
3444 bool vectorized_defs
;
3446 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3447 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3449 /* For each operand we check if it has vectorized definitions in a child
3450 node or we need to create them (for invariants and constants). We
3451 check if the LHS of the first stmt of the next child matches OPRND.
3452 If it does, we found the correct child. Otherwise, we call
3453 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3454 to check this child node for the next operand. */
3455 vectorized_defs
= false;
3456 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3458 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3460 /* We have to check both pattern and original def, if available. */
3461 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3463 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3465 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3468 if (gimple_code (first_def
) == GIMPLE_PHI
)
3469 first_def_op
= gimple_phi_result (first_def
);
3471 first_def_op
= gimple_get_lhs (first_def
);
3472 if (operand_equal_p (oprnd
, first_def_op
, 0)
3474 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3476 /* The number of vector defs is determined by the number of
3477 vector statements in the node from which we get those
3479 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3480 vectorized_defs
= true;
3488 if (!vectorized_defs
)
3492 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3493 /* Number of vector stmts was calculated according to LHS in
3494 vect_schedule_slp_instance (), fix it by replacing LHS with
3495 RHS, if necessary. See vect_get_smallest_scalar_type () for
3497 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3499 if (rhs_size_unit
!= lhs_size_unit
)
3501 number_of_vects
*= rhs_size_unit
;
3502 number_of_vects
/= lhs_size_unit
;
3507 /* Allocate memory for vectorized defs. */
3509 vec_defs
.create (number_of_vects
);
3511 /* For reduction defs we call vect_get_constant_vectors (), since we are
3512 looking for initial loop invariant values. */
3513 if (vectorized_defs
)
3514 /* The defs are already vectorized. */
3515 vect_get_slp_vect_defs (child
, &vec_defs
);
3517 /* Build vectors from scalar defs. */
3518 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3521 vec_oprnds
->quick_push (vec_defs
);
3525 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3526 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3527 permute statements for the SLP node NODE of the SLP instance
3528 SLP_NODE_INSTANCE. */
3531 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3532 gimple_stmt_iterator
*gsi
, int vf
,
3533 slp_instance slp_node_instance
, bool analyze_only
,
3536 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3537 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3538 tree mask_element_type
= NULL_TREE
, mask_type
;
3539 int nunits
, vec_index
= 0;
3540 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3541 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3545 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3548 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3550 mode
= TYPE_MODE (vectype
);
3552 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3553 same size as the vector element being permuted. */
3554 mask_element_type
= lang_hooks
.types
.type_for_mode
3555 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3556 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3557 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3558 auto_vec_perm_indices
mask (nunits
);
3559 mask
.quick_grow (nunits
);
3561 /* Initialize the vect stmts of NODE to properly insert the generated
3564 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3565 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3566 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3568 /* Generate permutation masks for every NODE. Number of masks for each NODE
3569 is equal to GROUP_SIZE.
3570 E.g., we have a group of three nodes with three loads from the same
3571 location in each node, and the vector size is 4. I.e., we have a
3572 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3573 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3574 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3577 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3578 The last mask is illegal since we assume two operands for permute
3579 operation, and the mask element values can't be outside that range.
3580 Hence, the last mask must be converted into {2,5,5,5}.
3581 For the first two permutations we need the first and the second input
3582 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3583 we need the second and the third vectors: {b1,c1,a2,b2} and
3586 int vect_stmts_counter
= 0;
3588 int first_vec_index
= -1;
3589 int second_vec_index
= -1;
3593 for (int j
= 0; j
< vf
; j
++)
3595 for (int k
= 0; k
< group_size
; k
++)
3597 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3598 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3599 vec_index
= i
/ nunits
;
3600 mask_element
= i
% nunits
;
3601 if (vec_index
== first_vec_index
3602 || first_vec_index
== -1)
3604 first_vec_index
= vec_index
;
3606 else if (vec_index
== second_vec_index
3607 || second_vec_index
== -1)
3609 second_vec_index
= vec_index
;
3610 mask_element
+= nunits
;
3614 if (dump_enabled_p ())
3616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3617 "permutation requires at "
3618 "least three vectors ");
3619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3622 gcc_assert (analyze_only
);
3626 gcc_assert (mask_element
>= 0
3627 && mask_element
< 2 * nunits
);
3628 if (mask_element
!= index
)
3630 mask
[index
++] = mask_element
;
3632 if (index
== nunits
)
3635 && ! can_vec_perm_p (mode
, false, &mask
))
3637 if (dump_enabled_p ())
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3641 "unsupported vect permute { ");
3642 for (i
= 0; i
< nunits
; ++i
)
3643 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3644 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3646 gcc_assert (analyze_only
);
3655 tree mask_vec
= NULL_TREE
;
3659 auto_vec
<tree
, 32> mask_elts (nunits
);
3660 for (int l
= 0; l
< nunits
; ++l
)
3661 mask_elts
.quick_push (build_int_cst (mask_element_type
,
3663 mask_vec
= build_vector (mask_type
, mask_elts
);
3666 if (second_vec_index
== -1)
3667 second_vec_index
= first_vec_index
;
3669 /* Generate the permute statement if necessary. */
3670 tree first_vec
= dr_chain
[first_vec_index
];
3671 tree second_vec
= dr_chain
[second_vec_index
];
3676 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3678 perm_dest
= make_ssa_name (perm_dest
);
3679 perm_stmt
= gimple_build_assign (perm_dest
,
3681 first_vec
, second_vec
,
3683 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3686 /* If mask was NULL_TREE generate the requested
3687 identity transform. */
3688 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3690 /* Store the vector statement in NODE. */
3691 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3695 first_vec_index
= -1;
3696 second_vec_index
= -1;
3707 /* Vectorize SLP instance tree in postorder. */
3710 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
)
3713 bool grouped_store
, is_store
;
3714 gimple_stmt_iterator si
;
3715 stmt_vec_info stmt_info
;
3716 unsigned int group_size
;
3721 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3724 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3725 vect_schedule_slp_instance (child
, instance
);
3727 /* Push SLP node def-type to stmts. */
3728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3729 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3730 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3731 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3733 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3734 stmt_info
= vinfo_for_stmt (stmt
);
3736 /* VECTYPE is the type of the destination. */
3737 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3738 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3740 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3741 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3743 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_NOTE
,vect_location
,
3746 "------>vectorizing SLP node starting from: ");
3747 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3750 /* Vectorized stmts go before the last scalar stmt which is where
3751 all uses are ready. */
3752 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3754 /* Mark the first element of the reduction chain as reduction to properly
3755 transform the node. In the analysis phase only the last element of the
3756 chain is marked as reduction. */
3757 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3758 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3760 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3761 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3764 /* Handle two-operation SLP nodes by vectorizing the group with
3765 both operations and then performing a merge. */
3766 if (SLP_TREE_TWO_OPERATORS (node
))
3768 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3769 enum tree_code ocode
= ERROR_MARK
;
3771 auto_vec_perm_indices
mask (group_size
);
3772 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3773 if (gimple_assign_rhs_code (ostmt
) != code0
)
3775 mask
.quick_push (1);
3776 ocode
= gimple_assign_rhs_code (ostmt
);
3779 mask
.quick_push (0);
3780 if (ocode
!= ERROR_MARK
)
3785 tree tmask
= NULL_TREE
;
3786 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3787 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3788 SLP_TREE_VEC_STMTS (node
).truncate (0);
3789 gimple_assign_set_rhs_code (stmt
, ocode
);
3790 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3791 gimple_assign_set_rhs_code (stmt
, code0
);
3792 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3793 SLP_TREE_VEC_STMTS (node
).truncate (0);
3794 tree meltype
= build_nonstandard_integer_type
3795 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3796 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3798 for (j
= 0; j
< v0
.length (); ++j
)
3800 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3801 auto_vec
<tree
, 32> melts (nunits
);
3802 for (l
= 0; l
< nunits
; ++l
)
3804 if (k
>= group_size
)
3806 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3807 melts
.quick_push (t
);
3809 tmask
= build_vector (mvectype
, melts
);
3811 /* ??? Not all targets support a VEC_PERM_EXPR with a
3812 constant mask that would translate to a vec_merge RTX
3813 (with their vec_perm_const_ok). We can either not
3814 vectorize in that case or let veclower do its job.
3815 Unfortunately that isn't too great and at least for
3816 plus/minus we'd eventually like to match targets
3817 vector addsub instructions. */
3819 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3821 gimple_assign_lhs (v0
[j
]),
3822 gimple_assign_lhs (v1
[j
]), tmask
);
3823 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3824 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3831 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3833 /* Restore stmt def-types. */
3834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3835 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3842 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3843 For loop vectorization this is done in vectorizable_call, but for SLP
3844 it needs to be deferred until end of vect_schedule_slp, because multiple
3845 SLP instances may refer to the same scalar stmt. */
3848 vect_remove_slp_scalar_calls (slp_tree node
)
3850 gimple
*stmt
, *new_stmt
;
3851 gimple_stmt_iterator gsi
;
3855 stmt_vec_info stmt_info
;
3857 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3861 vect_remove_slp_scalar_calls (child
);
3863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3865 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3867 stmt_info
= vinfo_for_stmt (stmt
);
3868 if (stmt_info
== NULL
3869 || is_pattern_stmt_p (stmt_info
)
3870 || !PURE_SLP_STMT (stmt_info
))
3872 lhs
= gimple_call_lhs (stmt
);
3873 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3874 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3875 set_vinfo_for_stmt (stmt
, NULL
);
3876 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3877 gsi
= gsi_for_stmt (stmt
);
3878 gsi_replace (&gsi
, new_stmt
, false);
3879 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3883 /* Generate vector code for all SLP instances in the loop/basic block. */
3886 vect_schedule_slp (vec_info
*vinfo
)
3888 vec
<slp_instance
> slp_instances
;
3889 slp_instance instance
;
3891 bool is_store
= false;
3893 slp_instances
= vinfo
->slp_instances
;
3894 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3896 /* Schedule the tree of INSTANCE. */
3897 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3899 if (dump_enabled_p ())
3900 dump_printf_loc (MSG_NOTE
, vect_location
,
3901 "vectorizing stmts using SLP.\n");
3904 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3906 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3909 gimple_stmt_iterator gsi
;
3911 /* Remove scalar call stmts. Do not do this for basic-block
3912 vectorization as not all uses may be vectorized.
3913 ??? Why should this be necessary? DCE should be able to
3914 remove the stmts itself.
3915 ??? For BB vectorization we can as well remove scalar
3916 stmts starting from the SLP tree root if they have no
3918 if (is_a
<loop_vec_info
> (vinfo
))
3919 vect_remove_slp_scalar_calls (root
);
3921 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3922 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3924 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3927 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3928 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3929 /* Free the attached stmt_vec_info and remove the stmt. */
3930 gsi
= gsi_for_stmt (store
);
3931 unlink_stmt_vdef (store
);
3932 gsi_remove (&gsi
, true);
3933 release_defs (store
);
3934 free_stmt_vec_info (store
);