1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
55 vect_free_slp_tree (child
);
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt
))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
66 SLP_TREE_CHILDREN (node
).release ();
67 SLP_TREE_SCALAR_STMTS (node
).release ();
68 SLP_TREE_VEC_STMTS (node
).release ();
69 SLP_TREE_LOAD_PERMUTATION (node
).release ();
75 /* Free the memory allocated for the SLP instance. */
78 vect_free_slp_instance (slp_instance instance
)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
81 SLP_INSTANCE_LOADS (instance
).release ();
86 /* Create an SLP node for SCALAR_STMTS. */
89 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
92 gimple
*stmt
= scalar_stmts
[0];
95 if (is_gimple_call (stmt
))
96 nops
= gimple_call_num_args (stmt
);
97 else if (is_gimple_assign (stmt
))
99 nops
= gimple_num_ops (stmt
) - 1;
100 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
103 else if (gimple_code (stmt
) == GIMPLE_PHI
)
108 node
= XNEW (struct _slp_tree
);
109 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
110 SLP_TREE_VEC_STMTS (node
).create (0);
111 SLP_TREE_CHILDREN (node
).create (nops
);
112 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
113 SLP_TREE_TWO_OPERATORS (node
) = false;
114 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
117 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec
<gimple
*> def_stmts
;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
135 enum vect_def_type first_dt
;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 static vec
<slp_oprnd_info
>
144 vect_create_oprnd_info (int nops
, int group_size
)
147 slp_oprnd_info oprnd_info
;
148 vec
<slp_oprnd_info
> oprnds_info
;
150 oprnds_info
.create (nops
);
151 for (i
= 0; i
< nops
; i
++)
153 oprnd_info
= XNEW (struct _slp_oprnd_info
);
154 oprnd_info
->def_stmts
.create (group_size
);
155 oprnd_info
->first_dt
= vect_uninitialized_def
;
156 oprnd_info
->first_op_type
= NULL_TREE
;
157 oprnd_info
->first_pattern
= false;
158 oprnd_info
->second_pattern
= false;
159 oprnds_info
.quick_push (oprnd_info
);
166 /* Free operands info. */
169 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
172 slp_oprnd_info oprnd_info
;
174 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
176 oprnd_info
->def_stmts
.release ();
177 XDELETE (oprnd_info
);
180 oprnds_info
.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
190 gimple
*next_stmt
= first_stmt
;
193 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
198 if (next_stmt
== stmt
)
200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
202 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
223 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
224 gimple
*stmt
, unsigned stmt_num
,
225 vec
<slp_oprnd_info
> *oprnds_info
)
228 unsigned int i
, number_of_oprnds
;
230 enum vect_def_type dt
= vect_uninitialized_def
;
231 bool pattern
= false;
232 slp_oprnd_info oprnd_info
;
233 int first_op_idx
= 1;
234 bool commutative
= false;
235 bool first_op_cond
= false;
236 bool first
= stmt_num
== 0;
237 bool second
= stmt_num
== 1;
239 if (is_gimple_call (stmt
))
241 number_of_oprnds
= gimple_call_num_args (stmt
);
244 else if (is_gimple_assign (stmt
))
246 enum tree_code code
= gimple_assign_rhs_code (stmt
);
247 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
253 first_op_cond
= true;
257 commutative
= commutative_tree_code (code
);
262 bool swapped
= (*swap
!= 0);
263 gcc_assert (!swapped
|| first_op_cond
);
264 for (i
= 0; i
< number_of_oprnds
; i
++)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map
= maps
[*swap
];
274 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
276 oprnd
= gimple_op (stmt
, map
[i
]);
279 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
281 oprnd_info
= (*oprnds_info
)[i
];
283 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
290 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
299 if (def_stmt
&& gimple_bb (def_stmt
)
300 && vect_stmt_in_region_p (vinfo
, def_stmt
)
301 && vinfo_for_stmt (def_stmt
)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
307 if (!first
&& !oprnd_info
->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info
->first_dt
!= vect_reduction_def
311 || (!second
&& !oprnd_info
->second_pattern
)))
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
327 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
333 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
334 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
336 if (dt
== vect_unknown_def_type
)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
340 "Unsupported pattern.\n");
344 switch (gimple_code (def_stmt
))
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
353 "unsupported defining stmt:\n");
359 oprnd_info
->second_pattern
= pattern
;
363 oprnd_info
->first_dt
= dt
;
364 oprnd_info
->first_pattern
= pattern
;
365 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info
->first_dt
!= dt
375 && !(oprnd_info
->first_dt
== vect_reduction_def
376 && dt
== vect_internal_def
)
377 && !((oprnd_info
->first_dt
== vect_external_def
378 || oprnd_info
->first_dt
== vect_constant_def
)
379 && (dt
== vect_external_def
380 || dt
== vect_constant_def
)))
381 || !types_compatible_p (oprnd_info
->first_op_type
,
384 /* Try swapping operands if we got a mismatch. */
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
395 "Build SLP failed: different types\n");
401 /* Check the types of the definitions. */
404 case vect_constant_def
:
405 case vect_external_def
:
408 case vect_reduction_def
:
409 case vect_induction_def
:
410 case vect_internal_def
:
411 oprnd_info
->def_stmts
.quick_push (def_stmt
);
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
421 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: cannot swap operands of "
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
447 tree cond
= gimple_assign_rhs1 (stmt
);
448 enum tree_code code
= TREE_CODE (cond
);
453 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
454 &TREE_OPERAND (cond
, 1));
455 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
460 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
461 gimple_assign_rhs3_ptr (stmt
));
462 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
463 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
464 gcc_assert (code
!= ERROR_MARK
);
465 TREE_SET_CODE (cond
, code
);
469 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
470 gimple_assign_rhs2_ptr (stmt
));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE
, vect_location
,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
488 as for vect_build_slp_tree. */
491 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
492 tree vectype
, unsigned int *max_nunits
)
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
499 "Build SLP failed: unsupported data-type in ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
503 /* Fatal mismatch. */
507 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a
<bb_vec_info
> (vinfo
)
510 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "Build SLP failed: unrolling required "
514 "in basic block SLP\n");
515 /* Fatal mismatch. */
519 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
521 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
526 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES
529 which stmts are not isomorphic to the first one. If MATCHES[0]
530 is false then this indicates the comparison could not be
531 carried out or the stmts will never be vectorized by SLP.
533 Note COND_EXPR is possibly ismorphic to another one after swapping its
534 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
535 the first stmt by swapping the two operands of comparison; set SWAP[i]
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
541 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
542 vec
<gimple
*> stmts
, unsigned int group_size
,
543 unsigned nops
, unsigned int *max_nunits
,
544 bool *matches
, bool *two_operators
)
547 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
548 enum tree_code first_stmt_code
= ERROR_MARK
;
549 enum tree_code alt_stmt_code
= ERROR_MARK
;
550 enum tree_code rhs_code
= ERROR_MARK
;
551 enum tree_code first_cond_code
= ERROR_MARK
;
553 bool need_same_oprnds
= false;
554 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
557 machine_mode optab_op2_mode
;
558 machine_mode vec_mode
;
560 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
562 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
568 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
574 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
577 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
580 "Build SLP failed: unvectorizable statement ");
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
583 /* Fatal mismatch. */
588 lhs
= gimple_get_lhs (stmt
);
589 if (lhs
== NULL_TREE
)
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
594 "Build SLP failed: not GIMPLE_ASSIGN nor "
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
598 /* Fatal mismatch. */
603 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
604 vectype
= get_vectype_for_scalar_type (scalar_type
);
605 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
608 /* Fatal mismatch. */
613 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
615 rhs_code
= CALL_EXPR
;
616 if (gimple_call_internal_p (call_stmt
)
617 || gimple_call_tail_p (call_stmt
)
618 || gimple_call_noreturn_p (call_stmt
)
619 || !gimple_call_nothrow_p (call_stmt
)
620 || gimple_call_chain (call_stmt
))
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
625 "Build SLP failed: unsupported call type ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
629 /* Fatal mismatch. */
635 rhs_code
= gimple_assign_rhs_code (stmt
);
637 /* Check the operation. */
640 first_stmt_code
= rhs_code
;
642 /* Shift arguments should be equal in all the packed stmts for a
643 vector shift with scalar shift operand. */
644 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
645 || rhs_code
== LROTATE_EXPR
646 || rhs_code
== RROTATE_EXPR
)
648 vec_mode
= TYPE_MODE (vectype
);
650 /* First see if we have a vector/vector shift. */
651 optab
= optab_for_tree_code (rhs_code
, vectype
,
655 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
657 /* No vector/vector shift, try for a vector/scalar shift. */
658 optab
= optab_for_tree_code (rhs_code
, vectype
,
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
665 "Build SLP failed: no optab.\n");
666 /* Fatal mismatch. */
670 icode
= (int) optab_handler (optab
, vec_mode
);
671 if (icode
== CODE_FOR_nothing
)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
676 "op not supported by target.\n");
677 /* Fatal mismatch. */
681 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
682 if (!VECTOR_MODE_P (optab_op2_mode
))
684 need_same_oprnds
= true;
685 first_op1
= gimple_assign_rhs2 (stmt
);
689 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
691 need_same_oprnds
= true;
692 first_op1
= gimple_assign_rhs2 (stmt
);
697 if (first_stmt_code
!= rhs_code
698 && alt_stmt_code
== ERROR_MARK
)
699 alt_stmt_code
= rhs_code
;
700 if (first_stmt_code
!= rhs_code
701 && (first_stmt_code
!= IMAGPART_EXPR
702 || rhs_code
!= REALPART_EXPR
)
703 && (first_stmt_code
!= REALPART_EXPR
704 || rhs_code
!= IMAGPART_EXPR
)
705 /* Handle mismatches in plus/minus by computing both
706 and merging the results. */
707 && !((first_stmt_code
== PLUS_EXPR
708 || first_stmt_code
== MINUS_EXPR
)
709 && (alt_stmt_code
== PLUS_EXPR
710 || alt_stmt_code
== MINUS_EXPR
)
711 && rhs_code
== alt_stmt_code
)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
713 && (first_stmt_code
== ARRAY_REF
714 || first_stmt_code
== BIT_FIELD_REF
715 || first_stmt_code
== INDIRECT_REF
716 || first_stmt_code
== COMPONENT_REF
717 || first_stmt_code
== MEM_REF
)))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "Build SLP failed: different operation "
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
735 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
740 "Build SLP failed: different shift "
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
748 if (rhs_code
== CALL_EXPR
)
750 gimple
*first_stmt
= stmts
[0];
751 if (gimple_call_num_args (stmt
) != nops
752 || !operand_equal_p (gimple_call_fn (first_stmt
),
753 gimple_call_fn (stmt
), 0)
754 || gimple_call_fntype (first_stmt
)
755 != gimple_call_fntype (stmt
))
757 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
760 "Build SLP failed: different calls in ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
770 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
773 if (REFERENCE_CLASS_P (lhs
))
781 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load
!= first_load
)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
802 prev_first_load
= first_load
;
804 } /* Grouped access. */
807 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
809 /* Not grouped load. */
810 if (dump_enabled_p ())
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
817 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */
823 /* Not memory operation. */
824 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
825 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
826 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
828 && rhs_code
!= CALL_EXPR
)
830 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
833 "Build SLP failed: operation");
834 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
837 /* Fatal mismatch. */
842 if (rhs_code
== COND_EXPR
)
844 tree cond_expr
= gimple_assign_rhs1 (stmt
);
845 enum tree_code cond_code
= TREE_CODE (cond_expr
);
846 enum tree_code swap_code
= ERROR_MARK
;
847 enum tree_code invert_code
= ERROR_MARK
;
850 first_cond_code
= TREE_CODE (cond_expr
);
851 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
853 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
854 swap_code
= swap_tree_comparison (cond_code
);
855 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
858 if (first_cond_code
== cond_code
)
860 /* Isomorphic can be achieved by swapping. */
861 else if (first_cond_code
== swap_code
)
863 /* Isomorphic can be achieved by inverting. */
864 else if (first_cond_code
== invert_code
)
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
871 "Build SLP failed: different"
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
885 for (i
= 0; i
< group_size
; ++i
)
889 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */
891 if (alt_stmt_code
!= ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
894 unsigned int count
= TYPE_VECTOR_SUBPARTS (vectype
);
895 auto_vec_perm_indices
sel (count
);
896 for (i
= 0; i
< count
; ++i
)
898 unsigned int elt
= i
;
899 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
901 sel
.quick_push (elt
);
903 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, &sel
))
905 for (i
= 0; i
< group_size
; ++i
)
906 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
909 if (dump_enabled_p ())
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
912 "Build SLP failed: different operation "
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
924 *two_operators
= true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */
935 typedef vec
<gimple
*> value_type
;
936 typedef vec
<gimple
*> compare_type
;
937 static inline hashval_t
hash (value_type
);
938 static inline bool equal (value_type existing
, value_type candidate
);
939 static inline bool is_empty (value_type x
) { return !x
.exists (); }
940 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
941 static inline void mark_empty (value_type
&x
) { x
.release (); }
942 static inline void mark_deleted (value_type
&x
) { x
.release (); }
943 static inline void remove (value_type
&x
) { x
.release (); }
946 bst_traits::hash (value_type x
)
949 for (unsigned i
= 0; i
< x
.length (); ++i
)
950 h
.add_int (gimple_uid (x
[i
]));
954 bst_traits::equal (value_type existing
, value_type candidate
)
956 if (existing
.length () != candidate
.length ())
958 for (unsigned i
= 0; i
< existing
.length (); ++i
)
959 if (existing
[i
] != candidate
[i
])
964 static hash_set
<vec
<gimple
*>, bst_traits
> *bst_fail
;
967 vect_build_slp_tree_2 (vec_info
*vinfo
,
968 vec
<gimple
*> stmts
, unsigned int group_size
,
969 unsigned int *max_nunits
,
970 vec
<slp_tree
> *loads
,
971 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
972 unsigned max_tree_size
);
975 vect_build_slp_tree (vec_info
*vinfo
,
976 vec
<gimple
*> stmts
, unsigned int group_size
,
977 unsigned int *max_nunits
,
978 vec
<slp_tree
> *loads
,
979 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
980 unsigned max_tree_size
)
982 if (bst_fail
->contains (stmts
))
984 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
985 loads
, matches
, npermutes
, tree_size
,
987 /* When SLP build fails for stmts record this, otherwise SLP build
988 can be exponential in time when we allow to construct parts from
989 scalars, see PR81723. */
993 x
.create (stmts
.length ());
1000 /* Recursively build an SLP tree starting from NODE.
1001 Fail (and return a value not equal to zero) if def-stmts are not
1002 isomorphic, require data permutation or are of unsupported types of
1003 operation. Otherwise, return 0.
1004 The value returned is the depth in the SLP tree where a mismatch
1008 vect_build_slp_tree_2 (vec_info
*vinfo
,
1009 vec
<gimple
*> stmts
, unsigned int group_size
,
1010 unsigned int *max_nunits
,
1011 vec
<slp_tree
> *loads
,
1012 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1013 unsigned max_tree_size
)
1015 unsigned nops
, i
, this_tree_size
= 0, this_max_nunits
= *max_nunits
;
1022 if (is_gimple_call (stmt
))
1023 nops
= gimple_call_num_args (stmt
);
1024 else if (is_gimple_assign (stmt
))
1026 nops
= gimple_num_ops (stmt
) - 1;
1027 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1030 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1035 /* If the SLP node is a PHI (induction or reduction), terminate
1037 if (gimple_code (stmt
) == GIMPLE_PHI
)
1039 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1040 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1041 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1045 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1046 /* Induction from different IVs is not supported. */
1047 if (def_type
== vect_induction_def
)
1049 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1050 if (stmt
!= stmts
[0])
1055 /* Else def types have to match. */
1056 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1058 /* But for reduction chains only check on the first stmt. */
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1066 node
= vect_create_new_slp_node (stmts
);
1071 bool two_operators
= false;
1072 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1073 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1074 stmts
, group_size
, nops
,
1075 &this_max_nunits
, matches
, &two_operators
))
1078 /* If the SLP node is a load, terminate the recursion. */
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1082 *max_nunits
= this_max_nunits
;
1083 node
= vect_create_new_slp_node (stmts
);
1084 loads
->safe_push (node
);
1088 /* Get at the operands, verifying they are compatible. */
1089 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1090 slp_oprnd_info oprnd_info
;
1091 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1093 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1094 stmt
, i
, &oprnds_info
);
1096 matches
[(res
== -1) ? 0 : i
] = false;
1100 for (i
= 0; i
< group_size
; ++i
)
1103 vect_free_oprnd_info (oprnds_info
);
1107 auto_vec
<slp_tree
, 4> children
;
1108 auto_vec
<slp_tree
> this_loads
;
1113 max_tree_size
-= *tree_size
;
1115 /* Create SLP_TREE nodes for the definition node/s. */
1116 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1119 unsigned old_nloads
= this_loads
.length ();
1120 unsigned old_tree_size
= this_tree_size
;
1123 if (oprnd_info
->first_dt
!= vect_internal_def
1124 && oprnd_info
->first_dt
!= vect_reduction_def
1125 && oprnd_info
->first_dt
!= vect_induction_def
)
1128 if (++this_tree_size
> max_tree_size
)
1130 FOR_EACH_VEC_ELT (children
, j
, child
)
1131 vect_free_slp_tree (child
);
1132 vect_free_oprnd_info (oprnds_info
);
1136 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1137 group_size
, &this_max_nunits
,
1138 &this_loads
, matches
, npermutes
,
1140 max_tree_size
)) != NULL
)
1142 /* If we have all children of child built up from scalars then just
1143 throw that away and build it up this node from scalars. */
1144 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to
1146 do extra work to cancel the pattern so the uses see the
1148 && !is_pattern_stmt_p
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1151 slp_tree grandchild
;
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1154 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1159 this_loads
.truncate (old_nloads
);
1160 this_tree_size
= old_tree_size
;
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1162 vect_free_slp_tree (grandchild
);
1163 SLP_TREE_CHILDREN (child
).truncate (0);
1165 dump_printf_loc (MSG_NOTE
, vect_location
,
1166 "Building parent vector operands from "
1167 "scalars instead\n");
1168 oprnd_info
->def_stmts
= vNULL
;
1169 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1170 children
.safe_push (child
);
1175 oprnd_info
->def_stmts
= vNULL
;
1176 children
.safe_push (child
);
1180 /* If the SLP build failed fatally and we analyze a basic-block
1181 simply treat nodes we fail to build as externally defined
1182 (and thus build vectors from the scalar defs).
1183 The cost model will reject outright expensive cases.
1184 ??? This doesn't treat cases where permutation ultimatively
1185 fails (or we don't try permutation below). Ideally we'd
1186 even compute a permutation that will end up with the maximum
1188 if (is_a
<bb_vec_info
> (vinfo
)
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to
1191 do extra work to cancel the pattern so the uses see the
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1195 dump_printf_loc (MSG_NOTE
, vect_location
,
1196 "Building vector operands from scalars\n");
1197 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1198 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1199 children
.safe_push (child
);
1200 oprnd_info
->def_stmts
= vNULL
;
1204 /* If the SLP build for operand zero failed and operand zero
1205 and one can be commutated try that for the scalar stmts
1206 that failed the match. */
1208 /* A first scalar stmt mismatch signals a fatal mismatch. */
1210 /* ??? For COND_EXPRs we can swap the comparison operands
1211 as well as the arms under some constraints. */
1213 && oprnds_info
[1]->first_dt
== vect_internal_def
1214 && is_gimple_assign (stmt
)
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1217 /* Do so only if the number of not successful permutes was nor more
1218 than a cut-ff as re-trying the recursive match on
1219 possibly each level of the tree would expose exponential
1223 /* Verify if we can safely swap or if we committed to a specific
1224 operand order already. */
1225 for (j
= 0; j
< group_size
; ++j
)
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1230 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1233 "Build SLP failed: cannot swap operands "
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1241 /* Swap mismatched definition stmts. */
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "Re-trying with swapped operands of stmts ");
1244 for (j
= 0; j
< group_size
; ++j
)
1247 std::swap (oprnds_info
[0]->def_stmts
[j
],
1248 oprnds_info
[1]->def_stmts
[j
]);
1249 dump_printf (MSG_NOTE
, "%d ", j
);
1251 dump_printf (MSG_NOTE
, "\n");
1252 /* And try again with scratch 'matches' ... */
1253 bool *tem
= XALLOCAVEC (bool, group_size
);
1254 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1255 group_size
, &this_max_nunits
,
1256 &this_loads
, tem
, npermutes
,
1258 max_tree_size
)) != NULL
)
1260 /* ... so if successful we can apply the operand swapping
1261 to the GIMPLE IL. This is necessary because for example
1262 vect_get_slp_defs uses operand indexes and thus expects
1263 canonical operand order. This is also necessary even
1264 if we end up building the operand from scalars as
1265 we'll continue to process swapped operand two. */
1266 for (j
= 0; j
< group_size
; ++j
)
1268 gimple
*stmt
= stmts
[j
];
1269 gimple_set_plf (stmt
, GF_PLF_1
, false);
1271 for (j
= 0; j
< group_size
; ++j
)
1273 gimple
*stmt
= stmts
[j
];
1276 /* Avoid swapping operands twice. */
1277 if (gimple_plf (stmt
, GF_PLF_1
))
1279 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1280 gimple_assign_rhs2_ptr (stmt
));
1281 gimple_set_plf (stmt
, GF_PLF_1
, true);
1284 /* Verify we swap all duplicates or none. */
1286 for (j
= 0; j
< group_size
; ++j
)
1288 gimple
*stmt
= stmts
[j
];
1289 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1292 /* If we have all children of child built up from scalars then
1293 just throw that away and build it up this node from scalars. */
1294 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1295 /* ??? Rejecting patterns this way doesn't work. We'd have
1296 to do extra work to cancel the pattern so the uses see the
1298 && !is_pattern_stmt_p
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1302 slp_tree grandchild
;
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1305 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1310 this_loads
.truncate (old_nloads
);
1311 this_tree_size
= old_tree_size
;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1313 vect_free_slp_tree (grandchild
);
1314 SLP_TREE_CHILDREN (child
).truncate (0);
1316 dump_printf_loc (MSG_NOTE
, vect_location
,
1317 "Building parent vector operands from "
1318 "scalars instead\n");
1319 oprnd_info
->def_stmts
= vNULL
;
1320 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1321 children
.safe_push (child
);
1326 oprnd_info
->def_stmts
= vNULL
;
1327 children
.safe_push (child
);
1335 gcc_assert (child
== NULL
);
1336 FOR_EACH_VEC_ELT (children
, j
, child
)
1337 vect_free_slp_tree (child
);
1338 vect_free_oprnd_info (oprnds_info
);
1342 vect_free_oprnd_info (oprnds_info
);
1345 *tree_size
+= this_tree_size
;
1346 *max_nunits
= this_max_nunits
;
1347 loads
->safe_splice (this_loads
);
1349 node
= vect_create_new_slp_node (stmts
);
1350 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1351 SLP_TREE_CHILDREN (node
).splice (children
);
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1358 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1364 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1365 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1366 ? " (external)" : "");
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1369 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1370 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1373 vect_print_slp_tree (dump_kind
, loc
, child
);
1377 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1378 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1379 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1380 stmts in NODE are to be marked. */
1383 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1389 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1393 if (j
< 0 || i
== j
)
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1397 vect_mark_slp_stmts (child
, mark
, j
);
1401 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1404 vect_mark_slp_stmts_relevant (slp_tree node
)
1408 stmt_vec_info stmt_info
;
1411 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1416 stmt_info
= vinfo_for_stmt (stmt
);
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1418 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1419 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1422 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1423 vect_mark_slp_stmts_relevant (child
);
1427 /* Rearrange the statements of NODE according to PERMUTATION. */
1430 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1431 vec
<unsigned> permutation
)
1434 vec
<gimple
*> tmp_stmts
;
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1439 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1441 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1442 tmp_stmts
.create (group_size
);
1443 tmp_stmts
.quick_grow_cleared (group_size
);
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1446 tmp_stmts
[permutation
[i
]] = stmt
;
1448 SLP_TREE_SCALAR_STMTS (node
).release ();
1449 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1453 /* Attempt to reorder stmts in a reduction chain so that we don't
1454 require any load permutation. Return true if that was possible,
1455 otherwise return false. */
1458 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1460 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1463 slp_tree node
, load
;
1465 /* Compare all the permutation sequences to the first one. We know
1466 that at least one load is permuted. */
1467 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1468 if (!node
->load_permutation
.exists ())
1470 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1472 if (!load
->load_permutation
.exists ())
1474 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1475 if (lidx
!= node
->load_permutation
[j
])
1479 /* Check that the loads in the first sequence are different and there
1480 are no gaps between them. */
1481 auto_sbitmap
load_index (group_size
);
1482 bitmap_clear (load_index
);
1483 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1485 if (lidx
>= group_size
)
1487 if (bitmap_bit_p (load_index
, lidx
))
1490 bitmap_set_bit (load_index
, lidx
);
1492 for (i
= 0; i
< group_size
; i
++)
1493 if (!bitmap_bit_p (load_index
, i
))
1496 /* This permutation is valid for reduction. Since the order of the
1497 statements in the nodes is not important unless they are memory
1498 accesses, we can rearrange the statements in all the nodes
1499 according to the order of the loads. */
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1501 node
->load_permutation
);
1503 /* We are done, no actual permutations need to be generated. */
1504 unsigned int unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1507 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1508 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1509 /* But we have to keep those permutations that are required because
1510 of handling of gaps. */
1511 if (unrolling_factor
== 1
1512 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1514 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1516 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1517 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1523 /* Check if the required load permutations in the SLP instance
1524 SLP_INSTN are supported. */
1527 vect_supported_load_permutation_p (slp_instance slp_instn
)
1529 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1530 unsigned int i
, j
, k
, next
;
1532 gimple
*stmt
, *load
, *next_load
;
1534 if (dump_enabled_p ())
1536 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1538 if (node
->load_permutation
.exists ())
1539 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1540 dump_printf (MSG_NOTE
, "%d ", next
);
1542 for (k
= 0; k
< group_size
; ++k
)
1543 dump_printf (MSG_NOTE
, "%d ", k
);
1544 dump_printf (MSG_NOTE
, "\n");
1547 /* In case of reduction every load permutation is allowed, since the order
1548 of the reduction statements is not important (as opposed to the case of
1549 grouped stores). The only condition we need to check is that all the
1550 load nodes are of the same size and have the same permutation (and then
1551 rearrange all the nodes of the SLP instance according to this
1554 /* Check that all the load nodes are of the same size. */
1555 /* ??? Can't we assert this? */
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1557 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1560 node
= SLP_INSTANCE_TREE (slp_instn
);
1561 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1563 /* Reduction (there are no data-refs in the root).
1564 In reduction chain the order of the loads is not important. */
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1567 vect_attempt_slp_rearrange_stmts (slp_instn
);
1569 /* In basic block vectorization we allow any subchain of an interleaving
1571 FORNOW: not supported in loop SLP because of realignment compications. */
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1574 /* Check whether the loads in an instance form a subchain and thus
1575 no permutation is necessary. */
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1578 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1580 bool subchain_p
= true;
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1585 && (next_load
!= load
1586 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1591 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1594 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1597 stmt_vec_info group_info
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1599 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1602 unsigned k
, maxk
= 0;
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1606 /* In BB vectorization we may not actually use a loaded vector
1607 accessing elements in excess of GROUP_SIZE. */
1608 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1611 "BB vectorization with gaps at the end of "
1612 "a load is not supported\n");
1616 /* Verify the permutation can be generated. */
1619 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1620 1, slp_instn
, true, &n_perms
))
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1624 "unsupported load permutation\n");
1632 /* For loop vectorization verify we can generate the permutation. */
1634 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1635 if (node
->load_permutation
.exists ()
1636 && !vect_transform_slp_perm_load
1638 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true,
1646 /* Find the last store in SLP INSTANCE. */
1649 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1651 gimple
*last
= NULL
, *stmt
;
1653 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1655 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1656 if (is_pattern_stmt_p (stmt_vinfo
))
1657 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1659 last
= get_later_stmt (stmt
, last
);
1665 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1668 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1669 stmt_vector_for_cost
*prologue_cost_vec
,
1670 stmt_vector_for_cost
*body_cost_vec
,
1671 unsigned ncopies_for_cost
)
1676 stmt_vec_info stmt_info
;
1679 /* Recurse down the SLP tree. */
1680 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1681 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1682 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1683 body_cost_vec
, ncopies_for_cost
);
1685 /* Look at the first scalar stmt to determine the cost. */
1686 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1687 stmt_info
= vinfo_for_stmt (stmt
);
1688 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1690 vect_memory_access_type memory_access_type
1691 = (STMT_VINFO_STRIDED_P (stmt_info
)
1694 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1695 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1696 memory_access_type
, vect_uninitialized_def
,
1697 node
, prologue_cost_vec
, body_cost_vec
);
1700 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1701 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1703 /* If the load is permuted then the alignment is determined by
1704 the first group element not by the first scalar stmt DR. */
1705 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1706 stmt_info
= vinfo_for_stmt (stmt
);
1707 /* Record the cost for the permutation. */
1709 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1710 ncopies_for_cost
, instance
, true,
1712 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1713 stmt_info
, 0, vect_body
);
1715 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1716 /* And adjust the number of loads performed. This handles
1717 redundancies as well as loads that are later dead. */
1718 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1719 bitmap_clear (perm
);
1720 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1721 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1722 ncopies_for_cost
= 0;
1723 bool load_seen
= false;
1724 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1726 if (i
% nunits
== 0)
1732 if (bitmap_bit_p (perm
, i
))
1737 gcc_assert (ncopies_for_cost
1738 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1739 + nunits
- 1) / nunits
);
1740 ncopies_for_cost
*= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1742 /* Record the cost for the vector loads. */
1743 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1744 memory_access_type
, node
, prologue_cost_vec
,
1749 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1751 /* ncopies_for_cost is the number of IVs we generate. */
1752 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1753 stmt_info
, 0, vect_body
);
1755 /* Prologue cost for the initial values and step vector. */
1756 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1758 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1760 ? vector_load
: vec_construct
,
1761 stmt_info
, 0, vect_prologue
);
1762 record_stmt_cost (prologue_cost_vec
, 1,
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1765 ? vector_load
: vec_construct
,
1766 stmt_info
, 0, vect_prologue
);
1768 /* ??? No easy way to get at the actual number of vector stmts
1769 to be geneated and thus the derived IVs. */
1773 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1774 stmt_info
, 0, vect_body
);
1775 if (SLP_TREE_TWO_OPERATORS (node
))
1777 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1778 stmt_info
, 0, vect_body
);
1779 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1780 stmt_info
, 0, vect_body
);
1784 /* Push SLP node def-type to stmts. */
1785 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1786 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1787 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1788 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1790 /* Scan operands and account for prologue cost of constants/externals.
1791 ??? This over-estimates cost for multiple uses and should be
1793 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1794 lhs
= gimple_get_lhs (stmt
);
1795 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1797 tree op
= gimple_op (stmt
, i
);
1799 enum vect_def_type dt
;
1800 if (!op
|| op
== lhs
)
1802 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1804 /* Without looking at the actual initializer a vector of
1805 constants can be implemented as load from the constant pool.
1806 ??? We need to pass down stmt_info for a vector type
1807 even if it points to the wrong stmt. */
1808 if (dt
== vect_constant_def
)
1809 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1810 stmt_info
, 0, vect_prologue
);
1811 else if (dt
== vect_external_def
)
1812 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1813 stmt_info
, 0, vect_prologue
);
1817 /* Restore stmt def-types. */
1818 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1819 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1820 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1821 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1824 /* Compute the cost for the SLP instance INSTANCE. */
1827 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1829 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1830 unsigned ncopies_for_cost
;
1831 stmt_info_for_cost
*si
;
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_NOTE
, vect_location
,
1836 "=== vect_analyze_slp_cost ===\n");
1838 /* Calculate the number of vector stmts to create based on the unrolling
1839 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1840 GROUP_SIZE / NUNITS otherwise. */
1841 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1842 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1843 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1844 /* Adjust the group_size by the vectorization factor which is always one
1845 for basic-block vectorization. */
1846 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1847 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1848 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1849 /* For reductions look at a reduction operand in case the reduction
1850 operation is widening like DOT_PROD or SAD. */
1851 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1853 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1854 switch (gimple_assign_rhs_code (stmt
))
1858 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1859 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1864 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1866 prologue_cost_vec
.create (10);
1867 body_cost_vec
.create (10);
1868 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1869 &prologue_cost_vec
, &body_cost_vec
,
1872 /* Record the prologue costs, which were delayed until we were
1873 sure that SLP was successful. */
1874 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1876 struct _stmt_vec_info
*stmt_info
1877 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1878 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1879 si
->misalign
, vect_prologue
);
1882 /* Record the instance's instructions in the target cost model. */
1883 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1885 struct _stmt_vec_info
*stmt_info
1886 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1887 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1888 si
->misalign
, vect_body
);
1891 prologue_cost_vec
.release ();
1892 body_cost_vec
.release ();
1895 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1896 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1897 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1898 containing the remainder.
1899 Return the first stmt in the second group. */
1902 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1904 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1905 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1906 gcc_assert (group1_size
> 0);
1907 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1908 gcc_assert (group2_size
> 0);
1909 GROUP_SIZE (first_vinfo
) = group1_size
;
1911 gimple
*stmt
= first_stmt
;
1912 for (unsigned i
= group1_size
; i
> 1; i
--)
1914 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1915 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1917 /* STMT is now the last element of the first group. */
1918 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1919 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1921 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1922 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1924 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1925 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1928 /* For the second group, the GROUP_GAP is that before the original group,
1929 plus skipping over the first vector. */
1930 GROUP_GAP (vinfo_for_stmt (group2
)) =
1931 GROUP_GAP (first_vinfo
) + group1_size
;
1933 /* GROUP_GAP of the first group now has to skip over the second group too. */
1934 GROUP_GAP (first_vinfo
) += group2_size
;
1936 if (dump_enabled_p ())
1937 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1938 group1_size
, group2_size
);
1943 /* Analyze an SLP instance starting from a group of grouped stores. Call
1944 vect_build_slp_tree to build a tree of packed stmts if possible.
1945 Return FALSE if it's impossible to SLP any stmt in the loop. */
1948 vect_analyze_slp_instance (vec_info
*vinfo
,
1949 gimple
*stmt
, unsigned max_tree_size
)
1951 slp_instance new_instance
;
1953 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1954 unsigned int unrolling_factor
= 1, nunits
;
1955 tree vectype
, scalar_type
= NULL_TREE
;
1958 unsigned int max_nunits
= 0;
1959 vec
<slp_tree
> loads
;
1960 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1961 vec
<gimple
*> scalar_stmts
;
1963 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1967 scalar_type
= TREE_TYPE (DR_REF (dr
));
1968 vectype
= get_vectype_for_scalar_type (scalar_type
);
1972 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1973 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1976 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1980 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1981 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1982 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1987 if (dump_enabled_p ())
1989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1990 "Build SLP failed: unsupported data-type ");
1991 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1992 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1997 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1999 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2000 scalar_stmts
.create (group_size
);
2002 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2004 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2007 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2008 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2009 scalar_stmts
.safe_push (
2010 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2012 scalar_stmts
.safe_push (next
);
2013 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2015 /* Mark the first element of the reduction chain as reduction to properly
2016 transform the node. In the reduction analysis phase only the last
2017 element of the chain is marked as reduction. */
2018 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2019 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2023 /* Collect reduction statements. */
2024 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2025 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2026 scalar_stmts
.safe_push (next
);
2029 loads
.create (group_size
);
2031 /* Build the tree for the SLP instance. */
2032 bool *matches
= XALLOCAVEC (bool, group_size
);
2033 unsigned npermutes
= 0;
2034 bst_fail
= new hash_set
<vec
<gimple
*>, bst_traits
> ();
2035 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2036 &max_nunits
, &loads
, matches
, &npermutes
,
2037 NULL
, max_tree_size
);
2041 /* Calculate the unrolling factor based on the smallest type. */
2043 = least_common_multiple (max_nunits
, group_size
) / group_size
;
2045 if (unrolling_factor
!= 1
2046 && is_a
<bb_vec_info
> (vinfo
))
2049 if (max_nunits
> group_size
)
2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2052 "Build SLP failed: store group "
2053 "size not a multiple of the vector size "
2054 "in basic block SLP\n");
2055 vect_free_slp_tree (node
);
2059 /* Fatal mismatch. */
2060 matches
[group_size
/max_nunits
* max_nunits
] = false;
2061 vect_free_slp_tree (node
);
2066 /* Create a new SLP instance. */
2067 new_instance
= XNEW (struct _slp_instance
);
2068 SLP_INSTANCE_TREE (new_instance
) = node
;
2069 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2070 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2071 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2073 /* Compute the load permutation. */
2075 bool loads_permuted
= false;
2076 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2078 vec
<unsigned> load_permutation
;
2080 gimple
*load
, *first_stmt
;
2081 bool this_load_permuted
= false;
2082 load_permutation
.create (group_size
);
2083 first_stmt
= GROUP_FIRST_ELEMENT
2084 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2085 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2087 int load_place
= vect_get_place_in_interleaving_chain
2089 gcc_assert (load_place
!= -1);
2090 if (load_place
!= j
)
2091 this_load_permuted
= true;
2092 load_permutation
.safe_push (load_place
);
2094 if (!this_load_permuted
2095 /* The load requires permutation when unrolling exposes
2096 a gap either because the group is larger than the SLP
2097 group-size or because there is a gap between the groups. */
2098 && (unrolling_factor
== 1
2099 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2100 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2102 load_permutation
.release ();
2105 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2106 loads_permuted
= true;
2111 if (!vect_supported_load_permutation_p (new_instance
))
2113 if (dump_enabled_p ())
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2116 "Build SLP failed: unsupported load "
2118 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2121 vect_free_slp_instance (new_instance
);
2126 /* If the loads and stores can be handled with load/store-lan
2127 instructions do not generate this SLP instance. */
2128 if (is_a
<loop_vec_info
> (vinfo
)
2130 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2133 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2135 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2136 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2137 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2138 /* Use SLP for strided accesses (or if we
2139 can't load-lanes). */
2140 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2141 || ! vect_load_lanes_supported
2142 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2143 GROUP_SIZE (stmt_vinfo
)))
2146 if (i
== loads
.length ())
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2150 "Built SLP cancelled: can use "
2151 "load/store-lanes\n");
2152 vect_free_slp_instance (new_instance
);
2157 vinfo
->slp_instances
.safe_push (new_instance
);
2159 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE
, vect_location
,
2162 "Final SLP tree for instance:\n");
2163 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2171 /* Failed to SLP. */
2172 /* Free the allocated memory. */
2173 scalar_stmts
.release ();
2177 /* For basic block SLP, try to break the group up into multiples of the
2179 if (is_a
<bb_vec_info
> (vinfo
)
2180 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2181 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2183 /* We consider breaking the group only on VF boundaries from the existing
2185 for (i
= 0; i
< group_size
; i
++)
2186 if (!matches
[i
]) break;
2188 if (i
>= nunits
&& i
< group_size
)
2190 /* Split into two groups at the first vector boundary before i. */
2191 gcc_assert ((nunits
& (nunits
- 1)) == 0);
2192 unsigned group1_size
= i
& ~(nunits
- 1);
2194 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2195 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2196 /* If the first non-match was in the middle of a vector,
2197 skip the rest of that vector. */
2198 if (group1_size
< i
)
2200 i
= group1_size
+ nunits
;
2202 rest
= vect_split_slp_store_group (rest
, nunits
);
2205 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2208 /* Even though the first vector did not all match, we might be able to SLP
2209 (some) of the remainder. FORNOW ignore this possibility. */
2216 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2217 trees of packed scalar stmts if SLP is possible. */
2220 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2223 gimple
*first_element
;
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2228 /* Find SLP sequences starting from groups of grouped stores. */
2229 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2230 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2232 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2234 if (loop_vinfo
->reduction_chains
.length () > 0)
2236 /* Find SLP sequences starting from reduction chains. */
2237 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2238 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2241 /* Dissolve reduction chain group. */
2242 gimple
*next
, *stmt
= first_element
;
2245 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2246 next
= GROUP_NEXT_ELEMENT (vinfo
);
2247 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2248 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2251 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2252 = vect_internal_def
;
2256 /* Find SLP sequences starting from groups of reductions. */
2257 if (loop_vinfo
->reductions
.length () > 1)
2258 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2266 /* For each possible SLP instance decide whether to SLP it and calculate overall
2267 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2268 least one instance. */
2271 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2273 unsigned int i
, unrolling_factor
= 1;
2274 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2275 slp_instance instance
;
2276 int decided_to_slp
= 0;
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2282 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2284 /* FORNOW: SLP if you can. */
2285 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2286 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2288 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2289 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2290 loop-based vectorization. Such stmts will be marked as HYBRID. */
2291 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2295 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2297 if (decided_to_slp
&& dump_enabled_p ())
2298 dump_printf_loc (MSG_NOTE
, vect_location
,
2299 "Decided to SLP %d instances. Unrolling factor %d\n",
2300 decided_to_slp
, unrolling_factor
);
2302 return (decided_to_slp
> 0);
2306 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2307 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2310 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2312 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2313 imm_use_iterator imm_iter
;
2315 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2317 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2318 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2321 /* Propagate hybrid down the SLP tree. */
2322 if (stype
== hybrid
)
2324 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2328 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2329 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2330 /* If we get a pattern stmt here we have to use the LHS of the
2331 original stmt for immediate uses. */
2332 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2333 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2334 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2336 if (gimple_code (stmt
) == GIMPLE_PHI
)
2337 def
= gimple_phi_result (stmt
);
2339 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2341 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2343 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2345 use_vinfo
= vinfo_for_stmt (use_stmt
);
2346 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2347 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2348 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2349 if (!STMT_SLP_TYPE (use_vinfo
)
2350 && (STMT_VINFO_RELEVANT (use_vinfo
)
2351 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2352 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2353 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2355 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2358 "def in non-SLP stmt: ");
2359 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2367 && !HYBRID_SLP_STMT (stmt_vinfo
))
2369 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2372 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2374 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2377 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2378 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2379 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2382 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2385 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2387 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2388 struct loop
*loopp
= (struct loop
*)wi
->info
;
2393 if (TREE_CODE (*tp
) == SSA_NAME
2394 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2396 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2397 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2398 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2400 if (dump_enabled_p ())
2402 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2403 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2405 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2413 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2416 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2417 /* If the stmt is in a SLP instance then this isn't a reason
2418 to mark use definitions in other SLP instances as hybrid. */
2419 if (! STMT_SLP_TYPE (use_vinfo
)
2420 && (STMT_VINFO_RELEVANT (use_vinfo
)
2421 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2422 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2423 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2430 /* Find stmts that must be both vectorized and SLPed. */
2433 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2436 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2437 slp_instance instance
;
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2443 /* First walk all pattern stmt in the loop and mark defs of uses as
2444 hybrid because immediate uses in them are not recorded. */
2445 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2447 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2448 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2451 gimple
*stmt
= gsi_stmt (gsi
);
2452 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2453 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2456 memset (&wi
, 0, sizeof (wi
));
2457 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2458 gimple_stmt_iterator gsi2
2459 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2460 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2461 vect_detect_hybrid_slp_1
, &wi
);
2462 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2463 vect_detect_hybrid_slp_2
,
2464 vect_detect_hybrid_slp_1
, &wi
);
2469 /* Then walk the SLP instance trees marking stmts with uses in
2470 non-SLP stmts as hybrid, also propagating hybrid down the
2471 SLP tree, collecting the above info on-the-fly. */
2472 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2474 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2475 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2481 /* Initialize a bb_vec_info struct for the statements between
2482 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2484 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2485 gimple_stmt_iterator region_end_in
)
2486 : vec_info (vec_info::bb
, init_cost (NULL
)),
2487 bb (gsi_bb (region_begin_in
)),
2488 region_begin (region_begin_in
),
2489 region_end (region_end_in
)
2491 gimple_stmt_iterator gsi
;
2493 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2496 gimple
*stmt
= gsi_stmt (gsi
);
2497 gimple_set_uid (stmt
, 0);
2498 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2505 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2506 stmts in the basic block. */
2508 _bb_vec_info::~_bb_vec_info ()
2510 for (gimple_stmt_iterator si
= region_begin
;
2511 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2513 gimple
*stmt
= gsi_stmt (si
);
2514 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2517 /* Free stmt_vec_info. */
2518 free_stmt_vec_info (stmt
);
2520 /* Reset region marker. */
2521 gimple_set_uid (stmt
, -1);
2528 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2529 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2531 Return true if the operations are supported. */
2534 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2535 slp_instance node_instance
)
2542 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2545 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2546 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2549 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2550 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2551 gcc_assert (stmt_info
);
2552 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2554 /* For BB vectorization vector types are assigned here.
2555 Memory accesses already got their vector type assigned
2556 in vect_analyze_data_refs. */
2557 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2559 && ! STMT_VINFO_DATA_REF (stmt_info
))
2561 gcc_assert (PURE_SLP_STMT (stmt_info
));
2563 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2564 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_NOTE
, vect_location
,
2567 "get vectype for scalar type: ");
2568 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2569 dump_printf (MSG_NOTE
, "\n");
2572 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2575 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2578 "not SLPed: unsupported data-type ");
2579 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2581 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2586 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2589 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2590 dump_printf (MSG_NOTE
, "\n");
2594 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2595 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2598 /* Calculate the number of vector statements to be created for the
2599 scalar stmts in this node. For SLP reductions it is equal to the
2600 number of vector statements in the children (which has already been
2601 calculated by the recursive call). Otherwise it is the number of
2602 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2603 VF divided by the number of elements in a vector. */
2604 if (GROUP_FIRST_ELEMENT (stmt_info
)
2605 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2606 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2607 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2611 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2612 vf
= loop_vinfo
->vectorization_factor
;
2615 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2616 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2617 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2618 = vf
* group_size
/ TYPE_VECTOR_SUBPARTS (vectype
);
2621 /* Push SLP node def-type to stmt operands. */
2622 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2623 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2624 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2625 = SLP_TREE_DEF_TYPE (child
);
2626 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2627 /* Restore def-types. */
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2629 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2631 = vect_internal_def
;
2639 /* Analyze statements in SLP instances of VINFO. Return true if the
2640 operations are supported. */
2643 vect_slp_analyze_operations (vec_info
*vinfo
)
2645 slp_instance instance
;
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_NOTE
, vect_location
,
2650 "=== vect_slp_analyze_operations ===\n");
2652 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2654 if (!vect_slp_analyze_node_operations (vinfo
,
2655 SLP_INSTANCE_TREE (instance
),
2658 dump_printf_loc (MSG_NOTE
, vect_location
,
2659 "removing SLP instance operations starting from: ");
2660 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2661 SLP_TREE_SCALAR_STMTS
2662 (SLP_INSTANCE_TREE (instance
))[0], 0);
2663 vect_free_slp_instance (instance
);
2664 vinfo
->slp_instances
.ordered_remove (i
);
2668 /* Compute the costs of the SLP instance. */
2669 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2674 return !vinfo
->slp_instances
.is_empty ();
2678 /* Compute the scalar cost of the SLP node NODE and its children
2679 and return it. Do not account defs that are marked in LIFE and
2680 update LIFE according to uses of NODE. */
2683 vect_bb_slp_scalar_cost (basic_block bb
,
2684 slp_tree node
, vec
<bool, va_heap
> *life
)
2686 unsigned scalar_cost
= 0;
2691 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2694 ssa_op_iter op_iter
;
2695 def_operand_p def_p
;
2696 stmt_vec_info stmt_info
;
2701 /* If there is a non-vectorized use of the defs then the scalar
2702 stmt is kept live in which case we do not account it or any
2703 required defs in the SLP children in the scalar cost. This
2704 way we make the vectorization more costly when compared to
2706 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2708 imm_use_iterator use_iter
;
2710 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2711 if (!is_gimple_debug (use_stmt
)
2712 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2714 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2717 BREAK_FROM_IMM_USE_STMT (use_iter
);
2723 /* Count scalar stmts only once. */
2724 if (gimple_visited_p (stmt
))
2726 gimple_set_visited (stmt
, true);
2728 stmt_info
= vinfo_for_stmt (stmt
);
2729 if (STMT_VINFO_DATA_REF (stmt_info
))
2731 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2732 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2734 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2737 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2739 scalar_cost
+= stmt_cost
;
2742 auto_vec
<bool, 20> subtree_life
;
2743 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2745 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2747 /* Do not directly pass LIFE to the recursive call, copy it to
2748 confine changes in the callee to the current child/subtree. */
2749 subtree_life
.safe_splice (*life
);
2750 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2751 subtree_life
.truncate (0);
2758 /* Check if vectorization of the basic block is profitable. */
2761 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2763 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2764 slp_instance instance
;
2766 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2767 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2769 /* Calculate scalar cost. */
2770 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2772 auto_vec
<bool, 20> life
;
2773 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2774 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2775 SLP_INSTANCE_TREE (instance
),
2779 /* Unset visited flag. */
2780 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2781 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2782 gimple_set_visited (gsi_stmt (gsi
), false);
2784 /* Complete the target-specific cost calculation. */
2785 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2786 &vec_inside_cost
, &vec_epilogue_cost
);
2788 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2790 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2793 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2795 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2796 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2797 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2800 /* Vectorization is profitable if its cost is more than the cost of scalar
2801 version. Note that we err on the vector side for equal cost because
2802 the cost estimate is otherwise quite pessimistic (constant uses are
2803 free on the scalar side but cost a load on the vector side for
2805 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2811 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2812 if so and sets fatal to true if failure is independent of
2813 current_vector_size. */
2816 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2817 gimple_stmt_iterator region_end
,
2818 vec
<data_reference_p
> datarefs
, int n_stmts
,
2821 bb_vec_info bb_vinfo
;
2822 slp_instance instance
;
2826 /* The first group of checks is independent of the vector size. */
2829 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2831 if (dump_enabled_p ())
2832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2833 "not vectorized: too many instructions in "
2835 free_data_refs (datarefs
);
2839 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2843 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2845 /* Analyze the data references. */
2847 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2851 "not vectorized: unhandled data-ref in basic "
2858 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2862 "not vectorized: not enough data-refs in "
2869 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2871 if (dump_enabled_p ())
2872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2873 "not vectorized: unhandled data access in "
2880 /* If there are no grouped stores in the region there is no need
2881 to continue with pattern recog as vect_analyze_slp will fail
2883 if (bb_vinfo
->grouped_stores
.is_empty ())
2885 if (dump_enabled_p ())
2886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2887 "not vectorized: no grouped stores in "
2894 /* While the rest of the analysis below depends on it in some way. */
2897 vect_pattern_recog (bb_vinfo
);
2899 /* Check the SLP opportunities in the basic block, analyze and build SLP
2901 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2903 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2906 "Failed to SLP the basic block.\n");
2907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2908 "not vectorized: failed to find SLP opportunities "
2909 "in basic block.\n");
2916 vect_record_base_alignments (bb_vinfo
);
2918 /* Analyze and verify the alignment of data references and the
2919 dependence in the SLP instances. */
2920 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2922 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2923 || ! vect_slp_analyze_instance_dependence (instance
))
2925 dump_printf_loc (MSG_NOTE
, vect_location
,
2926 "removing SLP instance operations starting from: ");
2927 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2928 SLP_TREE_SCALAR_STMTS
2929 (SLP_INSTANCE_TREE (instance
))[0], 0);
2930 vect_free_slp_instance (instance
);
2931 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2935 /* Mark all the statements that we want to vectorize as pure SLP and
2937 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2938 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2942 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2948 if (!vect_slp_analyze_operations (bb_vinfo
))
2950 if (dump_enabled_p ())
2951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2952 "not vectorized: bad operation in basic block.\n");
2958 /* Cost model: check if the vectorization is worthwhile. */
2959 if (!unlimited_cost_model (NULL
)
2960 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2962 if (dump_enabled_p ())
2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2964 "not vectorized: vectorization is not "
2971 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE
, vect_location
,
2973 "Basic block will be vectorized using SLP\n");
2979 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2980 true if anything in the basic-block was vectorized. */
2983 vect_slp_bb (basic_block bb
)
2985 bb_vec_info bb_vinfo
;
2986 gimple_stmt_iterator gsi
;
2987 unsigned int vector_sizes
;
2988 bool any_vectorized
= false;
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2993 /* Autodetect first vector size we try. */
2994 current_vector_size
= 0;
2995 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2997 gsi
= gsi_start_bb (bb
);
3001 if (gsi_end_p (gsi
))
3004 gimple_stmt_iterator region_begin
= gsi
;
3005 vec
<data_reference_p
> datarefs
= vNULL
;
3008 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3010 gimple
*stmt
= gsi_stmt (gsi
);
3011 if (is_gimple_debug (stmt
))
3015 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3016 vect_location
= gimple_location (stmt
);
3018 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3022 /* Skip leading unhandled stmts. */
3023 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3029 gimple_stmt_iterator region_end
= gsi
;
3031 bool vectorized
= false;
3033 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3034 datarefs
, insns
, fatal
);
3036 && dbg_cnt (vect_slp
))
3038 if (dump_enabled_p ())
3039 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3041 vect_schedule_slp (bb_vinfo
);
3043 if (dump_enabled_p ())
3044 dump_printf_loc (MSG_NOTE
, vect_location
,
3045 "basic block part vectorized\n");
3051 any_vectorized
|= vectorized
;
3053 vector_sizes
&= ~current_vector_size
;
3055 || vector_sizes
== 0
3056 || current_vector_size
== 0
3057 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3058 vector sizes will fail do not bother iterating. */
3061 if (gsi_end_p (region_end
))
3064 /* Skip the unhandled stmt. */
3067 /* And reset vector sizes. */
3068 current_vector_size
= 0;
3069 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
3073 /* Try the next biggest vector size. */
3074 current_vector_size
= 1 << floor_log2 (vector_sizes
);
3075 if (dump_enabled_p ())
3076 dump_printf_loc (MSG_NOTE
, vect_location
,
3077 "***** Re-trying analysis with "
3078 "vector size %d\n", current_vector_size
);
3085 return any_vectorized
;
3089 /* Return 1 if vector type of boolean constant which is OPNUM
3090 operand in statement STMT is a boolean vector. */
3093 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3095 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3096 enum tree_code code
= gimple_expr_code (stmt
);
3099 enum vect_def_type dt
;
3101 /* For comparison and COND_EXPR type is chosen depending
3102 on the other comparison operand. */
3103 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3106 op
= gimple_assign_rhs1 (stmt
);
3108 op
= gimple_assign_rhs2 (stmt
);
3110 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3114 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3117 if (code
== COND_EXPR
)
3119 tree cond
= gimple_assign_rhs1 (stmt
);
3121 if (TREE_CODE (cond
) == SSA_NAME
)
3124 op
= TREE_OPERAND (cond
, 1);
3126 op
= TREE_OPERAND (cond
, 0);
3128 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3132 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3135 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3139 /* For constant and loop invariant defs of SLP_NODE this function returns
3140 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3141 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3142 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3143 REDUC_INDEX is the index of the reduction operand in the statements, unless
3147 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3148 vec
<tree
> *vec_oprnds
,
3149 unsigned int op_num
, unsigned int number_of_vectors
)
3151 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3152 gimple
*stmt
= stmts
[0];
3153 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3156 unsigned j
, number_of_places_left_in_vector
;
3159 int group_size
= stmts
.length ();
3160 unsigned int vec_num
, i
;
3161 unsigned number_of_copies
= 1;
3163 voprnds
.create (number_of_vectors
);
3164 bool constant_p
, is_store
;
3165 tree neutral_op
= NULL
;
3166 enum tree_code code
= gimple_expr_code (stmt
);
3167 gimple_seq ctor_seq
= NULL
;
3169 /* Check if vector type is a boolean vector. */
3170 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3171 && vect_mask_constant_operand_p (stmt
, op_num
))
3173 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3175 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3176 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3178 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3181 op
= gimple_assign_rhs1 (stmt
);
3188 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3189 created vectors. It is greater than 1 if unrolling is performed.
3191 For example, we have two scalar operands, s1 and s2 (e.g., group of
3192 strided accesses of size two), while NUNITS is four (i.e., four scalars
3193 of this type can be packed in a vector). The output vector will contain
3194 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3197 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3198 containing the operands.
3200 For example, NUNITS is four as before, and the group size is 8
3201 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3202 {s5, s6, s7, s8}. */
3204 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3206 number_of_places_left_in_vector
= nunits
;
3208 auto_vec
<tree
, 32> elts (nunits
);
3209 elts
.quick_grow (nunits
);
3210 bool place_after_defs
= false;
3211 for (j
= 0; j
< number_of_copies
; j
++)
3213 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3216 op
= gimple_assign_rhs1 (stmt
);
3223 tree cond
= gimple_assign_rhs1 (stmt
);
3224 if (TREE_CODE (cond
) == SSA_NAME
)
3225 op
= gimple_op (stmt
, op_num
+ 1);
3226 else if (op_num
== 0 || op_num
== 1)
3227 op
= TREE_OPERAND (cond
, op_num
);
3231 op
= gimple_assign_rhs2 (stmt
);
3233 op
= gimple_assign_rhs3 (stmt
);
3239 op
= gimple_call_arg (stmt
, op_num
);
3246 op
= gimple_op (stmt
, op_num
+ 1);
3247 /* Unlike the other binary operators, shifts/rotates have
3248 the shift count being int, instead of the same type as
3249 the lhs, so make sure the scalar is the right type if
3250 we are dealing with vectors of
3251 long long/long/short/char. */
3252 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3253 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3257 op
= gimple_op (stmt
, op_num
+ 1);
3262 /* Create 'vect_ = {op0,op1,...,opn}'. */
3263 number_of_places_left_in_vector
--;
3265 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3267 if (CONSTANT_CLASS_P (op
))
3269 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3271 /* Can't use VIEW_CONVERT_EXPR for booleans because
3272 of possibly different sizes of scalar value and
3274 if (integer_zerop (op
))
3275 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3276 else if (integer_onep (op
))
3277 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3282 op
= fold_unary (VIEW_CONVERT_EXPR
,
3283 TREE_TYPE (vector_type
), op
);
3284 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3288 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3290 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3293 = build_all_ones_cst (TREE_TYPE (vector_type
));
3295 = build_zero_cst (TREE_TYPE (vector_type
));
3296 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3297 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3303 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3306 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3309 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3313 elts
[number_of_places_left_in_vector
] = op
;
3314 if (!CONSTANT_CLASS_P (op
))
3316 if (TREE_CODE (orig_op
) == SSA_NAME
3317 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3318 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3319 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3320 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3321 place_after_defs
= true;
3323 if (number_of_places_left_in_vector
== 0)
3326 vec_cst
= build_vector (vector_type
, elts
);
3329 vec
<constructor_elt
, va_gc
> *v
;
3331 vec_alloc (v
, nunits
);
3332 for (k
= 0; k
< nunits
; ++k
)
3333 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3334 vec_cst
= build_constructor (vector_type
, v
);
3337 gimple_stmt_iterator gsi
;
3338 if (place_after_defs
)
3341 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3342 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3345 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3346 if (ctor_seq
!= NULL
)
3348 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3349 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3353 voprnds
.quick_push (init
);
3354 place_after_defs
= false;
3355 number_of_places_left_in_vector
= nunits
;
3361 /* Since the vectors are created in the reverse order, we should invert
3363 vec_num
= voprnds
.length ();
3364 for (j
= vec_num
; j
!= 0; j
--)
3366 vop
= voprnds
[j
- 1];
3367 vec_oprnds
->quick_push (vop
);
3372 /* In case that VF is greater than the unrolling factor needed for the SLP
3373 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3374 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3375 to replicate the vectors. */
3376 while (number_of_vectors
> vec_oprnds
->length ())
3378 tree neutral_vec
= NULL
;
3383 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3385 vec_oprnds
->quick_push (neutral_vec
);
3389 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3390 vec_oprnds
->quick_push (vop
);
3396 /* Get vectorized definitions from SLP_NODE that contains corresponding
3397 vectorized def-stmts. */
3400 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3403 gimple
*vec_def_stmt
;
3406 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3408 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3410 gcc_assert (vec_def_stmt
);
3411 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3412 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3414 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3415 vec_oprnds
->quick_push (vec_oprnd
);
3420 /* Get vectorized definitions for SLP_NODE.
3421 If the scalar definitions are loop invariants or constants, collect them and
3422 call vect_get_constant_vectors() to create vector stmts.
3423 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3424 must be stored in the corresponding child of SLP_NODE, and we call
3425 vect_get_slp_vect_defs () to retrieve them. */
3428 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3429 vec
<vec
<tree
> > *vec_oprnds
)
3432 int number_of_vects
= 0, i
;
3433 unsigned int child_index
= 0;
3434 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3435 slp_tree child
= NULL
;
3438 bool vectorized_defs
;
3440 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3441 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3443 /* For each operand we check if it has vectorized definitions in a child
3444 node or we need to create them (for invariants and constants). We
3445 check if the LHS of the first stmt of the next child matches OPRND.
3446 If it does, we found the correct child. Otherwise, we call
3447 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3448 to check this child node for the next operand. */
3449 vectorized_defs
= false;
3450 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3452 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3454 /* We have to check both pattern and original def, if available. */
3455 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3457 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3459 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3462 if (gimple_code (first_def
) == GIMPLE_PHI
)
3463 first_def_op
= gimple_phi_result (first_def
);
3465 first_def_op
= gimple_get_lhs (first_def
);
3466 if (operand_equal_p (oprnd
, first_def_op
, 0)
3468 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3470 /* The number of vector defs is determined by the number of
3471 vector statements in the node from which we get those
3473 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3474 vectorized_defs
= true;
3482 if (!vectorized_defs
)
3486 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3487 /* Number of vector stmts was calculated according to LHS in
3488 vect_schedule_slp_instance (), fix it by replacing LHS with
3489 RHS, if necessary. See vect_get_smallest_scalar_type () for
3491 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3493 if (rhs_size_unit
!= lhs_size_unit
)
3495 number_of_vects
*= rhs_size_unit
;
3496 number_of_vects
/= lhs_size_unit
;
3501 /* Allocate memory for vectorized defs. */
3503 vec_defs
.create (number_of_vects
);
3505 /* For reduction defs we call vect_get_constant_vectors (), since we are
3506 looking for initial loop invariant values. */
3507 if (vectorized_defs
)
3508 /* The defs are already vectorized. */
3509 vect_get_slp_vect_defs (child
, &vec_defs
);
3511 /* Build vectors from scalar defs. */
3512 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3515 vec_oprnds
->quick_push (vec_defs
);
3519 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3520 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3521 permute statements for the SLP node NODE of the SLP instance
3522 SLP_NODE_INSTANCE. */
3525 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3526 gimple_stmt_iterator
*gsi
, int vf
,
3527 slp_instance slp_node_instance
, bool analyze_only
,
3530 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3531 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3532 tree mask_element_type
= NULL_TREE
, mask_type
;
3533 int nunits
, vec_index
= 0;
3534 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3535 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3539 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3542 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3544 mode
= TYPE_MODE (vectype
);
3546 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3547 same size as the vector element being permuted. */
3548 mask_element_type
= lang_hooks
.types
.type_for_mode
3549 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3550 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3551 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3552 auto_vec_perm_indices
mask (nunits
);
3553 mask
.quick_grow (nunits
);
3555 /* Initialize the vect stmts of NODE to properly insert the generated
3558 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3559 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3560 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3562 /* Generate permutation masks for every NODE. Number of masks for each NODE
3563 is equal to GROUP_SIZE.
3564 E.g., we have a group of three nodes with three loads from the same
3565 location in each node, and the vector size is 4. I.e., we have a
3566 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3567 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3568 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3571 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3572 The last mask is illegal since we assume two operands for permute
3573 operation, and the mask element values can't be outside that range.
3574 Hence, the last mask must be converted into {2,5,5,5}.
3575 For the first two permutations we need the first and the second input
3576 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3577 we need the second and the third vectors: {b1,c1,a2,b2} and
3580 int vect_stmts_counter
= 0;
3582 int first_vec_index
= -1;
3583 int second_vec_index
= -1;
3587 for (int j
= 0; j
< vf
; j
++)
3589 for (int k
= 0; k
< group_size
; k
++)
3591 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3592 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3593 vec_index
= i
/ nunits
;
3594 mask_element
= i
% nunits
;
3595 if (vec_index
== first_vec_index
3596 || first_vec_index
== -1)
3598 first_vec_index
= vec_index
;
3600 else if (vec_index
== second_vec_index
3601 || second_vec_index
== -1)
3603 second_vec_index
= vec_index
;
3604 mask_element
+= nunits
;
3608 if (dump_enabled_p ())
3610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3611 "permutation requires at "
3612 "least three vectors ");
3613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3619 gcc_assert (mask_element
>= 0
3620 && mask_element
< 2 * nunits
);
3621 if (mask_element
!= index
)
3623 mask
[index
++] = mask_element
;
3625 if (index
== nunits
)
3628 && ! can_vec_perm_p (mode
, false, &mask
))
3630 if (dump_enabled_p ())
3632 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3634 "unsupported vect permute { ");
3635 for (i
= 0; i
< nunits
; ++i
)
3636 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ", mask
[i
]);
3637 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3647 tree mask_vec
= NULL_TREE
;
3651 auto_vec
<tree
, 32> mask_elts (nunits
);
3652 for (int l
= 0; l
< nunits
; ++l
)
3653 mask_elts
.quick_push (build_int_cst (mask_element_type
,
3655 mask_vec
= build_vector (mask_type
, mask_elts
);
3658 if (second_vec_index
== -1)
3659 second_vec_index
= first_vec_index
;
3661 /* Generate the permute statement if necessary. */
3662 tree first_vec
= dr_chain
[first_vec_index
];
3663 tree second_vec
= dr_chain
[second_vec_index
];
3668 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3670 perm_dest
= make_ssa_name (perm_dest
);
3671 perm_stmt
= gimple_build_assign (perm_dest
,
3673 first_vec
, second_vec
,
3675 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3678 /* If mask was NULL_TREE generate the requested
3679 identity transform. */
3680 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3682 /* Store the vector statement in NODE. */
3683 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3687 first_vec_index
= -1;
3688 second_vec_index
= -1;
3699 /* Vectorize SLP instance tree in postorder. */
3702 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
)
3705 bool grouped_store
, is_store
;
3706 gimple_stmt_iterator si
;
3707 stmt_vec_info stmt_info
;
3708 unsigned int group_size
;
3713 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3716 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3717 vect_schedule_slp_instance (child
, instance
);
3719 /* Push SLP node def-type to stmts. */
3720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3721 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3723 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3725 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3726 stmt_info
= vinfo_for_stmt (stmt
);
3728 /* VECTYPE is the type of the destination. */
3729 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3730 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3732 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3733 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3735 if (dump_enabled_p ())
3737 dump_printf_loc (MSG_NOTE
,vect_location
,
3738 "------>vectorizing SLP node starting from: ");
3739 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3742 /* Vectorized stmts go before the last scalar stmt which is where
3743 all uses are ready. */
3744 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3746 /* Mark the first element of the reduction chain as reduction to properly
3747 transform the node. In the analysis phase only the last element of the
3748 chain is marked as reduction. */
3749 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3750 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3752 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3753 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3756 /* Handle two-operation SLP nodes by vectorizing the group with
3757 both operations and then performing a merge. */
3758 if (SLP_TREE_TWO_OPERATORS (node
))
3760 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3761 enum tree_code ocode
= ERROR_MARK
;
3763 auto_vec_perm_indices
mask (group_size
);
3764 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3765 if (gimple_assign_rhs_code (ostmt
) != code0
)
3767 mask
.quick_push (1);
3768 ocode
= gimple_assign_rhs_code (ostmt
);
3771 mask
.quick_push (0);
3772 if (ocode
!= ERROR_MARK
)
3777 tree tmask
= NULL_TREE
;
3778 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3779 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3780 SLP_TREE_VEC_STMTS (node
).truncate (0);
3781 gimple_assign_set_rhs_code (stmt
, ocode
);
3782 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3783 gimple_assign_set_rhs_code (stmt
, code0
);
3784 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3785 SLP_TREE_VEC_STMTS (node
).truncate (0);
3786 tree meltype
= build_nonstandard_integer_type
3787 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3788 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3790 for (j
= 0; j
< v0
.length (); ++j
)
3792 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3793 auto_vec
<tree
, 32> melts (nunits
);
3794 for (l
= 0; l
< nunits
; ++l
)
3796 if (k
>= group_size
)
3798 tree t
= build_int_cst (meltype
, mask
[k
++] * nunits
+ l
);
3799 melts
.quick_push (t
);
3801 tmask
= build_vector (mvectype
, melts
);
3803 /* ??? Not all targets support a VEC_PERM_EXPR with a
3804 constant mask that would translate to a vec_merge RTX
3805 (with their vec_perm_const_ok). We can either not
3806 vectorize in that case or let veclower do its job.
3807 Unfortunately that isn't too great and at least for
3808 plus/minus we'd eventually like to match targets
3809 vector addsub instructions. */
3811 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3813 gimple_assign_lhs (v0
[j
]),
3814 gimple_assign_lhs (v1
[j
]), tmask
);
3815 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3816 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3823 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3825 /* Restore stmt def-types. */
3826 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3827 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3828 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3829 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3834 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3835 For loop vectorization this is done in vectorizable_call, but for SLP
3836 it needs to be deferred until end of vect_schedule_slp, because multiple
3837 SLP instances may refer to the same scalar stmt. */
3840 vect_remove_slp_scalar_calls (slp_tree node
)
3842 gimple
*stmt
, *new_stmt
;
3843 gimple_stmt_iterator gsi
;
3847 stmt_vec_info stmt_info
;
3849 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3852 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3853 vect_remove_slp_scalar_calls (child
);
3855 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3857 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3859 stmt_info
= vinfo_for_stmt (stmt
);
3860 if (stmt_info
== NULL
3861 || is_pattern_stmt_p (stmt_info
)
3862 || !PURE_SLP_STMT (stmt_info
))
3864 lhs
= gimple_call_lhs (stmt
);
3865 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3866 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3867 set_vinfo_for_stmt (stmt
, NULL
);
3868 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3869 gsi
= gsi_for_stmt (stmt
);
3870 gsi_replace (&gsi
, new_stmt
, false);
3871 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3875 /* Generate vector code for all SLP instances in the loop/basic block. */
3878 vect_schedule_slp (vec_info
*vinfo
)
3880 vec
<slp_instance
> slp_instances
;
3881 slp_instance instance
;
3883 bool is_store
= false;
3885 slp_instances
= vinfo
->slp_instances
;
3886 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3888 /* Schedule the tree of INSTANCE. */
3889 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3891 if (dump_enabled_p ())
3892 dump_printf_loc (MSG_NOTE
, vect_location
,
3893 "vectorizing stmts using SLP.\n");
3896 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3898 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3901 gimple_stmt_iterator gsi
;
3903 /* Remove scalar call stmts. Do not do this for basic-block
3904 vectorization as not all uses may be vectorized.
3905 ??? Why should this be necessary? DCE should be able to
3906 remove the stmts itself.
3907 ??? For BB vectorization we can as well remove scalar
3908 stmts starting from the SLP tree root if they have no
3910 if (is_a
<loop_vec_info
> (vinfo
))
3911 vect_remove_slp_scalar_calls (root
);
3913 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3914 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3916 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3919 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3920 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3921 /* Free the attached stmt_vec_info and remove the stmt. */
3922 gsi
= gsi_for_stmt (store
);
3923 unlink_stmt_vdef (store
);
3924 gsi_remove (&gsi
, true);
3925 release_defs (store
);
3926 free_stmt_vec_info (store
);