1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49 vect_free_slp_tree (slp_tree node
)
57 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
58 vect_free_slp_tree (child
);
60 SLP_TREE_CHILDREN (node
).release ();
61 SLP_TREE_SCALAR_STMTS (node
).release ();
62 SLP_TREE_VEC_STMTS (node
).release ();
63 SLP_TREE_LOAD_PERMUTATION (node
).release ();
69 /* Free the memory allocated for the SLP instance. */
72 vect_free_slp_instance (slp_instance instance
)
74 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
75 SLP_INSTANCE_LOADS (instance
).release ();
80 /* Create an SLP node for SCALAR_STMTS. */
83 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
86 gimple
*stmt
= scalar_stmts
[0];
89 if (is_gimple_call (stmt
))
90 nops
= gimple_call_num_args (stmt
);
91 else if (is_gimple_assign (stmt
))
93 nops
= gimple_num_ops (stmt
) - 1;
94 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
100 node
= XNEW (struct _slp_tree
);
101 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
102 SLP_TREE_VEC_STMTS (node
).create (0);
103 SLP_TREE_CHILDREN (node
).create (nops
);
104 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
105 SLP_TREE_TWO_OPERATORS (node
) = false;
111 /* This structure is used in creation of an SLP tree. Each instance
112 corresponds to the same operand in a group of scalar stmts in an SLP
114 typedef struct _slp_oprnd_info
116 /* Def-stmts for the operands. */
117 vec
<gimple
*> def_stmts
;
118 /* Information about the first statement, its vector def-type, type, the
119 operand itself in case it's constant, and an indication if it's a pattern
121 enum vect_def_type first_dt
;
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130 static vec
<slp_oprnd_info
>
131 vect_create_oprnd_info (int nops
, int group_size
)
134 slp_oprnd_info oprnd_info
;
135 vec
<slp_oprnd_info
> oprnds_info
;
137 oprnds_info
.create (nops
);
138 for (i
= 0; i
< nops
; i
++)
140 oprnd_info
= XNEW (struct _slp_oprnd_info
);
141 oprnd_info
->def_stmts
.create (group_size
);
142 oprnd_info
->first_dt
= vect_uninitialized_def
;
143 oprnd_info
->first_op_type
= NULL_TREE
;
144 oprnd_info
->first_pattern
= false;
145 oprnd_info
->second_pattern
= false;
146 oprnds_info
.quick_push (oprnd_info
);
153 /* Free operands info. */
156 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
159 slp_oprnd_info oprnd_info
;
161 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
163 oprnd_info
->def_stmts
.release ();
164 XDELETE (oprnd_info
);
167 oprnds_info
.release ();
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
175 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
177 gimple
*next_stmt
= first_stmt
;
180 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
185 if (next_stmt
== stmt
)
187 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
189 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
197 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
198 they are of a valid type and that they match the defs of the first stmt of
199 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
200 return -1, if the error could be corrected by swapping operands of the
201 operation return 1, if everything is ok return 0. */
204 vect_get_and_check_slp_defs (vec_info
*vinfo
,
205 gimple
*stmt
, unsigned stmt_num
,
206 vec
<slp_oprnd_info
> *oprnds_info
)
209 unsigned int i
, number_of_oprnds
;
211 enum vect_def_type dt
= vect_uninitialized_def
;
212 bool pattern
= false;
213 slp_oprnd_info oprnd_info
;
214 int first_op_idx
= 1;
215 bool commutative
= false;
216 bool first_op_cond
= false;
217 bool first
= stmt_num
== 0;
218 bool second
= stmt_num
== 1;
220 if (is_gimple_call (stmt
))
222 number_of_oprnds
= gimple_call_num_args (stmt
);
225 else if (is_gimple_assign (stmt
))
227 enum tree_code code
= gimple_assign_rhs_code (stmt
);
228 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
229 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
231 first_op_cond
= true;
236 commutative
= commutative_tree_code (code
);
241 bool swapped
= false;
242 for (i
= 0; i
< number_of_oprnds
; i
++)
247 if (i
== 0 || i
== 1)
248 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
),
251 oprnd
= gimple_op (stmt
, first_op_idx
+ i
- 1);
254 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
256 oprnd_info
= (*oprnds_info
)[i
];
258 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
260 if (dump_enabled_p ())
262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
263 "Build SLP failed: can't analyze def for ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
265 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
271 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
272 from the pattern. Check that all the stmts of the node are in the
274 if (def_stmt
&& gimple_bb (def_stmt
)
275 && vect_stmt_in_region_p (vinfo
, def_stmt
)
276 && vinfo_for_stmt (def_stmt
)
277 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
278 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
279 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
282 if (!first
&& !oprnd_info
->first_pattern
283 /* Allow different pattern state for the defs of the
284 first stmt in reduction chains. */
285 && (oprnd_info
->first_dt
!= vect_reduction_def
286 || (!second
&& !oprnd_info
->second_pattern
)))
296 if (dump_enabled_p ())
298 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
299 "Build SLP failed: some of the stmts"
300 " are in a pattern, and others are not ");
301 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
302 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
308 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
309 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
311 if (dt
== vect_unknown_def_type
)
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
315 "Unsupported pattern.\n");
319 switch (gimple_code (def_stmt
))
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
328 "unsupported defining stmt:\n");
334 oprnd_info
->second_pattern
= pattern
;
338 oprnd_info
->first_dt
= dt
;
339 oprnd_info
->first_pattern
= pattern
;
340 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
344 /* Not first stmt of the group, check that the def-stmt/s match
345 the def-stmt/s of the first stmt. Allow different definition
346 types for reduction chains: the first stmt must be a
347 vect_reduction_def (a phi node), and the rest
348 vect_internal_def. */
349 if (((oprnd_info
->first_dt
!= dt
350 && !(oprnd_info
->first_dt
== vect_reduction_def
351 && dt
== vect_internal_def
)
352 && !((oprnd_info
->first_dt
== vect_external_def
353 || oprnd_info
->first_dt
== vect_constant_def
)
354 && (dt
== vect_external_def
355 || dt
== vect_constant_def
)))
356 || !types_compatible_p (oprnd_info
->first_op_type
,
359 /* Try swapping operands if we got a mismatch. */
368 if (dump_enabled_p ())
369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
370 "Build SLP failed: different types\n");
376 /* Check the types of the definitions. */
379 case vect_constant_def
:
380 case vect_external_def
:
381 case vect_reduction_def
:
384 case vect_internal_def
:
385 oprnd_info
->def_stmts
.quick_push (def_stmt
);
389 /* FORNOW: Not supported. */
390 if (dump_enabled_p ())
392 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
393 "Build SLP failed: illegal type of def ");
394 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
395 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
407 tree cond
= gimple_assign_rhs1 (stmt
);
408 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
409 &TREE_OPERAND (cond
, 1));
410 TREE_SET_CODE (cond
, swap_tree_comparison (TREE_CODE (cond
)));
413 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
414 gimple_assign_rhs2_ptr (stmt
));
421 /* Verify if the scalar stmts STMTS are isomorphic, require data
422 permutation or are of unsupported types of operation. Return
423 true if they are, otherwise return false and indicate in *MATCHES
424 which stmts are not isomorphic to the first one. If MATCHES[0]
425 is false then this indicates the comparison could not be
426 carried out or the stmts will never be vectorized by SLP. */
429 vect_build_slp_tree_1 (vec_info
*vinfo
,
430 vec
<gimple
*> stmts
, unsigned int group_size
,
431 unsigned nops
, unsigned int *max_nunits
,
432 unsigned int vectorization_factor
, bool *matches
,
436 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
437 enum tree_code first_stmt_code
= ERROR_MARK
;
438 enum tree_code alt_stmt_code
= ERROR_MARK
;
439 enum tree_code rhs_code
= ERROR_MARK
;
440 enum tree_code first_cond_code
= ERROR_MARK
;
442 bool need_same_oprnds
= false;
443 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
446 machine_mode optab_op2_mode
;
447 machine_mode vec_mode
;
449 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
452 /* For every stmt in NODE find its def stmt/s. */
453 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
457 if (dump_enabled_p ())
459 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
460 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
461 dump_printf (MSG_NOTE
, "\n");
464 /* Fail to vectorize statements marked as unvectorizable. */
465 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
470 "Build SLP failed: unvectorizable statement ");
471 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
472 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
474 /* Fatal mismatch. */
479 lhs
= gimple_get_lhs (stmt
);
480 if (lhs
== NULL_TREE
)
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
485 "Build SLP failed: not GIMPLE_ASSIGN nor "
487 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
488 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
490 /* Fatal mismatch. */
495 if (is_gimple_assign (stmt
)
496 && gimple_assign_rhs_code (stmt
) == COND_EXPR
497 && (cond
= gimple_assign_rhs1 (stmt
))
498 && !COMPARISON_CLASS_P (cond
))
500 if (dump_enabled_p ())
502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
503 "Build SLP failed: condition is not "
505 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
506 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
508 /* Fatal mismatch. */
513 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
514 vectype
= get_vectype_for_scalar_type (scalar_type
);
517 if (dump_enabled_p ())
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
520 "Build SLP failed: unsupported data-type ");
521 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
523 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
525 /* Fatal mismatch. */
530 /* If populating the vector type requires unrolling then fail
531 before adjusting *max_nunits for basic-block vectorization. */
532 if (is_a
<bb_vec_info
> (vinfo
)
533 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
535 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
536 "Build SLP failed: unrolling required "
537 "in basic block SLP\n");
538 /* Fatal mismatch. */
543 /* In case of multiple types we need to detect the smallest type. */
544 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
546 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
547 if (is_a
<bb_vec_info
> (vinfo
))
548 vectorization_factor
= *max_nunits
;
551 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
553 rhs_code
= CALL_EXPR
;
554 if (gimple_call_internal_p (call_stmt
)
555 || gimple_call_tail_p (call_stmt
)
556 || gimple_call_noreturn_p (call_stmt
)
557 || !gimple_call_nothrow_p (call_stmt
)
558 || gimple_call_chain (call_stmt
))
560 if (dump_enabled_p ())
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
563 "Build SLP failed: unsupported call type ");
564 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
566 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
568 /* Fatal mismatch. */
574 rhs_code
= gimple_assign_rhs_code (stmt
);
576 /* Check the operation. */
579 first_stmt_code
= rhs_code
;
581 /* Shift arguments should be equal in all the packed stmts for a
582 vector shift with scalar shift operand. */
583 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
584 || rhs_code
== LROTATE_EXPR
585 || rhs_code
== RROTATE_EXPR
)
587 vec_mode
= TYPE_MODE (vectype
);
589 /* First see if we have a vector/vector shift. */
590 optab
= optab_for_tree_code (rhs_code
, vectype
,
594 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
596 /* No vector/vector shift, try for a vector/scalar shift. */
597 optab
= optab_for_tree_code (rhs_code
, vectype
,
602 if (dump_enabled_p ())
603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
604 "Build SLP failed: no optab.\n");
605 /* Fatal mismatch. */
609 icode
= (int) optab_handler (optab
, vec_mode
);
610 if (icode
== CODE_FOR_nothing
)
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
615 "op not supported by target.\n");
616 /* Fatal mismatch. */
620 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
621 if (!VECTOR_MODE_P (optab_op2_mode
))
623 need_same_oprnds
= true;
624 first_op1
= gimple_assign_rhs2 (stmt
);
628 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
630 need_same_oprnds
= true;
631 first_op1
= gimple_assign_rhs2 (stmt
);
636 if (first_stmt_code
!= rhs_code
637 && alt_stmt_code
== ERROR_MARK
)
638 alt_stmt_code
= rhs_code
;
639 if (first_stmt_code
!= rhs_code
640 && (first_stmt_code
!= IMAGPART_EXPR
641 || rhs_code
!= REALPART_EXPR
)
642 && (first_stmt_code
!= REALPART_EXPR
643 || rhs_code
!= IMAGPART_EXPR
)
644 /* Handle mismatches in plus/minus by computing both
645 and merging the results. */
646 && !((first_stmt_code
== PLUS_EXPR
647 || first_stmt_code
== MINUS_EXPR
)
648 && (alt_stmt_code
== PLUS_EXPR
649 || alt_stmt_code
== MINUS_EXPR
)
650 && rhs_code
== alt_stmt_code
)
651 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
652 && (first_stmt_code
== ARRAY_REF
653 || first_stmt_code
== BIT_FIELD_REF
654 || first_stmt_code
== INDIRECT_REF
655 || first_stmt_code
== COMPONENT_REF
656 || first_stmt_code
== MEM_REF
)))
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
661 "Build SLP failed: different operation "
663 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
674 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
679 "Build SLP failed: different shift "
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
682 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
688 if (rhs_code
== CALL_EXPR
)
690 gimple
*first_stmt
= stmts
[0];
691 if (gimple_call_num_args (stmt
) != nops
692 || !operand_equal_p (gimple_call_fn (first_stmt
),
693 gimple_call_fn (stmt
), 0)
694 || gimple_call_fntype (first_stmt
)
695 != gimple_call_fntype (stmt
))
697 if (dump_enabled_p ())
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
700 "Build SLP failed: different calls in ");
701 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
703 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
711 /* Grouped store or load. */
712 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
714 if (REFERENCE_CLASS_P (lhs
))
722 /* Check that the size of interleaved loads group is not
723 greater than the SLP group size. */
725 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
726 if (is_a
<loop_vec_info
> (vinfo
)
727 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
728 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
729 - GROUP_GAP (vinfo_for_stmt (stmt
)))
730 > ncopies
* group_size
))
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
735 "Build SLP failed: the number "
736 "of interleaved loads is greater than "
737 "the SLP group size ");
738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
740 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
742 /* Fatal mismatch. */
747 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
750 /* Check that there are no loads from different interleaving
751 chains in the same node. */
752 if (prev_first_load
!= first_load
)
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
758 "Build SLP failed: different "
759 "interleaving chains in one node ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
762 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
769 prev_first_load
= first_load
;
771 } /* Grouped access. */
774 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
776 /* Not grouped load. */
777 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
780 "Build SLP failed: not grouped load ");
781 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
782 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
785 /* FORNOW: Not grouped loads are not supported. */
786 /* Fatal mismatch. */
791 /* Not memory operation. */
792 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
793 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
794 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
795 && rhs_code
!= CALL_EXPR
)
797 if (dump_enabled_p ())
799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
800 "Build SLP failed: operation");
801 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
802 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
803 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
805 /* Fatal mismatch. */
810 if (rhs_code
== COND_EXPR
)
812 tree cond_expr
= gimple_assign_rhs1 (stmt
);
815 first_cond_code
= TREE_CODE (cond_expr
);
816 else if (first_cond_code
!= TREE_CODE (cond_expr
))
818 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
821 "Build SLP failed: different"
823 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
825 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
836 for (i
= 0; i
< group_size
; ++i
)
840 /* If we allowed a two-operation SLP node verify the target can cope
841 with the permute we are going to use. */
842 if (alt_stmt_code
!= ERROR_MARK
843 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
846 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
847 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
850 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
851 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
853 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
855 for (i
= 0; i
< group_size
; ++i
)
856 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
859 if (dump_enabled_p ())
861 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
862 "Build SLP failed: different operation "
864 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
868 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
874 *two_operators
= true;
880 /* Recursively build an SLP tree starting from NODE.
881 Fail (and return a value not equal to zero) if def-stmts are not
882 isomorphic, require data permutation or are of unsupported types of
883 operation. Otherwise, return 0.
884 The value returned is the depth in the SLP tree where a mismatch
888 vect_build_slp_tree (vec_info
*vinfo
,
889 slp_tree
*node
, unsigned int group_size
,
890 unsigned int *max_nunits
,
891 vec
<slp_tree
> *loads
,
892 unsigned int vectorization_factor
,
893 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
894 unsigned max_tree_size
)
896 unsigned nops
, i
, this_tree_size
= 0;
901 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
902 if (is_gimple_call (stmt
))
903 nops
= gimple_call_num_args (stmt
);
904 else if (is_gimple_assign (stmt
))
906 nops
= gimple_num_ops (stmt
) - 1;
907 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
913 bool two_operators
= false;
914 if (!vect_build_slp_tree_1 (vinfo
,
915 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
916 max_nunits
, vectorization_factor
, matches
,
919 SLP_TREE_TWO_OPERATORS (*node
) = two_operators
;
921 /* If the SLP node is a load, terminate the recursion. */
922 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
923 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
925 loads
->safe_push (*node
);
929 /* Get at the operands, verifying they are compatible. */
930 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
931 slp_oprnd_info oprnd_info
;
932 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
934 switch (vect_get_and_check_slp_defs (vinfo
, stmt
, i
, &oprnds_info
))
940 vect_free_oprnd_info (oprnds_info
);
947 for (i
= 0; i
< group_size
; ++i
)
950 vect_free_oprnd_info (oprnds_info
);
954 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
956 /* Create SLP_TREE nodes for the definition node/s. */
957 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
960 unsigned old_nloads
= loads
->length ();
961 unsigned old_max_nunits
= *max_nunits
;
963 if (oprnd_info
->first_dt
!= vect_internal_def
)
966 if (++this_tree_size
> max_tree_size
)
968 vect_free_oprnd_info (oprnds_info
);
972 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
975 vect_free_oprnd_info (oprnds_info
);
979 if (vect_build_slp_tree (vinfo
, &child
,
980 group_size
, max_nunits
, loads
,
981 vectorization_factor
, matches
,
982 npermutes
, &this_tree_size
, max_tree_size
))
984 /* If we have all children of child built up from scalars then just
985 throw that away and build it up this node from scalars. */
986 if (!SLP_TREE_CHILDREN (child
).is_empty ())
991 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
992 if (grandchild
!= NULL
)
997 *max_nunits
= old_max_nunits
;
998 loads
->truncate (old_nloads
);
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1000 vect_free_slp_tree (grandchild
);
1001 SLP_TREE_CHILDREN (child
).truncate (0);
1003 dump_printf_loc (MSG_NOTE
, vect_location
,
1004 "Building parent vector operands from "
1005 "scalars instead\n");
1006 oprnd_info
->def_stmts
= vNULL
;
1007 vect_free_slp_tree (child
);
1008 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1013 oprnd_info
->def_stmts
= vNULL
;
1014 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1018 /* If the SLP build failed fatally and we analyze a basic-block
1019 simply treat nodes we fail to build as externally defined
1020 (and thus build vectors from the scalar defs).
1021 The cost model will reject outright expensive cases.
1022 ??? This doesn't treat cases where permutation ultimatively
1023 fails (or we don't try permutation below). Ideally we'd
1024 even compute a permutation that will end up with the maximum
1026 if (is_a
<bb_vec_info
> (vinfo
)
1028 /* ??? Rejecting patterns this way doesn't work. We'd have to
1029 do extra work to cancel the pattern so the uses see the
1031 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1034 slp_tree grandchild
;
1037 *max_nunits
= old_max_nunits
;
1038 loads
->truncate (old_nloads
);
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1040 vect_free_slp_tree (grandchild
);
1041 SLP_TREE_CHILDREN (child
).truncate (0);
1043 dump_printf_loc (MSG_NOTE
, vect_location
,
1044 "Building vector operands from scalars\n");
1045 oprnd_info
->def_stmts
= vNULL
;
1046 vect_free_slp_tree (child
);
1047 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1051 /* If the SLP build for operand zero failed and operand zero
1052 and one can be commutated try that for the scalar stmts
1053 that failed the match. */
1055 /* A first scalar stmt mismatch signals a fatal mismatch. */
1057 /* ??? For COND_EXPRs we can swap the comparison operands
1058 as well as the arms under some constraints. */
1060 && oprnds_info
[1]->first_dt
== vect_internal_def
1061 && is_gimple_assign (stmt
)
1062 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1063 && !SLP_TREE_TWO_OPERATORS (*node
)
1064 /* Do so only if the number of not successful permutes was nor more
1065 than a cut-ff as re-trying the recursive match on
1066 possibly each level of the tree would expose exponential
1071 slp_tree grandchild
;
1074 *max_nunits
= old_max_nunits
;
1075 loads
->truncate (old_nloads
);
1076 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1077 vect_free_slp_tree (grandchild
);
1078 SLP_TREE_CHILDREN (child
).truncate (0);
1080 /* Swap mismatched definition stmts. */
1081 dump_printf_loc (MSG_NOTE
, vect_location
,
1082 "Re-trying with swapped operands of stmts ");
1083 for (j
= 0; j
< group_size
; ++j
)
1086 std::swap (oprnds_info
[0]->def_stmts
[j
],
1087 oprnds_info
[1]->def_stmts
[j
]);
1088 dump_printf (MSG_NOTE
, "%d ", j
);
1090 dump_printf (MSG_NOTE
, "\n");
1091 /* And try again with scratch 'matches' ... */
1092 bool *tem
= XALLOCAVEC (bool, group_size
);
1093 if (vect_build_slp_tree (vinfo
, &child
,
1094 group_size
, max_nunits
, loads
,
1095 vectorization_factor
,
1096 tem
, npermutes
, &this_tree_size
,
1099 /* ... so if successful we can apply the operand swapping
1100 to the GIMPLE IL. This is necessary because for example
1101 vect_get_slp_defs uses operand indexes and thus expects
1102 canonical operand order. */
1103 for (j
= 0; j
< group_size
; ++j
)
1106 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (*node
)[j
];
1107 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1108 gimple_assign_rhs2_ptr (stmt
));
1110 oprnd_info
->def_stmts
= vNULL
;
1111 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1118 oprnd_info
->def_stmts
= vNULL
;
1119 vect_free_slp_tree (child
);
1120 vect_free_oprnd_info (oprnds_info
);
1125 *tree_size
+= this_tree_size
;
1127 vect_free_oprnd_info (oprnds_info
);
1131 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1134 vect_print_slp_tree (int dump_kind
, slp_tree node
)
1143 dump_printf (dump_kind
, "node ");
1144 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1146 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
1147 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1149 dump_printf (dump_kind
, "\n");
1151 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1152 vect_print_slp_tree (dump_kind
, child
);
1156 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1157 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1158 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1159 stmts in NODE are to be marked. */
1162 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1171 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1172 if (j
< 0 || i
== j
)
1173 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1176 vect_mark_slp_stmts (child
, mark
, j
);
1180 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1183 vect_mark_slp_stmts_relevant (slp_tree node
)
1187 stmt_vec_info stmt_info
;
1193 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1195 stmt_info
= vinfo_for_stmt (stmt
);
1196 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1197 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1198 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1201 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1202 vect_mark_slp_stmts_relevant (child
);
1206 /* Rearrange the statements of NODE according to PERMUTATION. */
1209 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1210 vec
<unsigned> permutation
)
1213 vec
<gimple
*> tmp_stmts
;
1217 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1218 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1220 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1221 tmp_stmts
.create (group_size
);
1222 tmp_stmts
.quick_grow_cleared (group_size
);
1224 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1225 tmp_stmts
[permutation
[i
]] = stmt
;
1227 SLP_TREE_SCALAR_STMTS (node
).release ();
1228 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1232 /* Attempt to reorder stmts in a reduction chain so that we don't
1233 require any load permutation. Return true if that was possible,
1234 otherwise return false. */
1237 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1239 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1243 slp_tree node
, load
;
1245 /* Compare all the permutation sequences to the first one. We know
1246 that at least one load is permuted. */
1247 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1248 if (!node
->load_permutation
.exists ())
1250 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1252 if (!load
->load_permutation
.exists ())
1254 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1255 if (lidx
!= node
->load_permutation
[j
])
1259 /* Check that the loads in the first sequence are different and there
1260 are no gaps between them. */
1261 load_index
= sbitmap_alloc (group_size
);
1262 bitmap_clear (load_index
);
1263 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1265 if (bitmap_bit_p (load_index
, lidx
))
1267 sbitmap_free (load_index
);
1270 bitmap_set_bit (load_index
, lidx
);
1272 for (i
= 0; i
< group_size
; i
++)
1273 if (!bitmap_bit_p (load_index
, i
))
1275 sbitmap_free (load_index
);
1278 sbitmap_free (load_index
);
1280 /* This permutation is valid for reduction. Since the order of the
1281 statements in the nodes is not important unless they are memory
1282 accesses, we can rearrange the statements in all the nodes
1283 according to the order of the loads. */
1284 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1285 node
->load_permutation
);
1287 /* We are done, no actual permutations need to be generated. */
1288 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1289 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1293 /* Check if the required load permutations in the SLP instance
1294 SLP_INSTN are supported. */
1297 vect_supported_load_permutation_p (slp_instance slp_instn
)
1299 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1300 unsigned int i
, j
, k
, next
;
1302 gimple
*stmt
, *load
, *next_load
, *first_load
;
1303 struct data_reference
*dr
;
1305 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1309 if (node
->load_permutation
.exists ())
1310 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1311 dump_printf (MSG_NOTE
, "%d ", next
);
1313 for (k
= 0; k
< group_size
; ++k
)
1314 dump_printf (MSG_NOTE
, "%d ", k
);
1315 dump_printf (MSG_NOTE
, "\n");
1318 /* In case of reduction every load permutation is allowed, since the order
1319 of the reduction statements is not important (as opposed to the case of
1320 grouped stores). The only condition we need to check is that all the
1321 load nodes are of the same size and have the same permutation (and then
1322 rearrange all the nodes of the SLP instance according to this
1325 /* Check that all the load nodes are of the same size. */
1326 /* ??? Can't we assert this? */
1327 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1328 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1331 node
= SLP_INSTANCE_TREE (slp_instn
);
1332 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1334 /* Reduction (there are no data-refs in the root).
1335 In reduction chain the order of the loads is not important. */
1336 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1337 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1339 if (vect_attempt_slp_rearrange_stmts (slp_instn
))
1342 /* Fallthru to general load permutation handling. */
1345 /* In basic block vectorization we allow any subchain of an interleaving
1347 FORNOW: not supported in loop SLP because of realignment compications. */
1348 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1350 /* Check whether the loads in an instance form a subchain and thus
1351 no permutation is necessary. */
1352 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1354 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1356 bool subchain_p
= true;
1358 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1361 && (next_load
!= load
1362 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1367 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1370 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1373 /* Verify the permutation can be generated. */
1375 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1376 1, slp_instn
, true))
1378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1380 "unsupported load permutation\n");
1386 /* Check that the alignment of the first load in every subchain, i.e.,
1387 the first statement in every load node, is supported.
1388 ??? This belongs in alignment checking. */
1389 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1391 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1392 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1394 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1395 if (vect_supportable_dr_alignment (dr
, false)
1396 == dr_unaligned_unsupported
)
1398 if (dump_enabled_p ())
1400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1402 "unsupported unaligned load ");
1403 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1405 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1415 /* For loop vectorization verify we can generate the permutation. */
1416 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1417 if (node
->load_permutation
.exists ()
1418 && !vect_transform_slp_perm_load
1420 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1427 /* Find the last store in SLP INSTANCE. */
1430 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1432 gimple
*last
= NULL
, *stmt
;
1434 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1436 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1437 if (is_pattern_stmt_p (stmt_vinfo
))
1438 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1440 last
= get_later_stmt (stmt
, last
);
1446 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1449 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1450 stmt_vector_for_cost
*prologue_cost_vec
,
1451 stmt_vector_for_cost
*body_cost_vec
,
1452 unsigned ncopies_for_cost
)
1457 stmt_vec_info stmt_info
;
1459 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1461 /* Recurse down the SLP tree. */
1462 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1464 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1465 body_cost_vec
, ncopies_for_cost
);
1467 /* Look at the first scalar stmt to determine the cost. */
1468 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1469 stmt_info
= vinfo_for_stmt (stmt
);
1470 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1472 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1473 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1474 vect_uninitialized_def
,
1475 node
, prologue_cost_vec
, body_cost_vec
);
1479 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1480 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1481 node
, prologue_cost_vec
, body_cost_vec
);
1482 /* If the load is permuted record the cost for the permutation.
1483 ??? Loads from multiple chains are let through here only
1484 for a single special case involving complex numbers where
1485 in the end no permutation is necessary. */
1486 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1487 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1488 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1489 && vect_get_place_in_interleaving_chain
1490 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1492 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1493 stmt_info
, 0, vect_body
);
1500 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1501 stmt_info
, 0, vect_body
);
1502 if (SLP_TREE_TWO_OPERATORS (node
))
1504 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1505 stmt_info
, 0, vect_body
);
1506 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1507 stmt_info
, 0, vect_body
);
1511 /* Scan operands and account for prologue cost of constants/externals.
1512 ??? This over-estimates cost for multiple uses and should be
1514 lhs
= gimple_get_lhs (stmt
);
1515 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1517 tree op
= gimple_op (stmt
, i
);
1519 enum vect_def_type dt
;
1520 if (!op
|| op
== lhs
)
1522 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1524 /* Without looking at the actual initializer a vector of
1525 constants can be implemented as load from the constant pool.
1526 ??? We need to pass down stmt_info for a vector type
1527 even if it points to the wrong stmt. */
1528 if (dt
== vect_constant_def
)
1529 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1530 stmt_info
, 0, vect_prologue
);
1531 else if (dt
== vect_external_def
)
1532 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1533 stmt_info
, 0, vect_prologue
);
1538 /* Compute the cost for the SLP instance INSTANCE. */
1541 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1543 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1544 unsigned ncopies_for_cost
;
1545 stmt_info_for_cost
*si
;
1548 if (dump_enabled_p ())
1549 dump_printf_loc (MSG_NOTE
, vect_location
,
1550 "=== vect_analyze_slp_cost ===\n");
1552 /* Calculate the number of vector stmts to create based on the unrolling
1553 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1554 GROUP_SIZE / NUNITS otherwise. */
1555 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1556 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1557 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1558 /* Adjust the group_size by the vectorization factor which is always one
1559 for basic-block vectorization. */
1560 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1561 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1562 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1563 /* For reductions look at a reduction operand in case the reduction
1564 operation is widening like DOT_PROD or SAD. */
1565 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1567 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1568 switch (gimple_assign_rhs_code (stmt
))
1572 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1573 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1578 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1580 prologue_cost_vec
.create (10);
1581 body_cost_vec
.create (10);
1582 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1583 &prologue_cost_vec
, &body_cost_vec
,
1586 /* Record the prologue costs, which were delayed until we were
1587 sure that SLP was successful. */
1588 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1590 struct _stmt_vec_info
*stmt_info
1591 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1592 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1593 si
->misalign
, vect_prologue
);
1596 /* Record the instance's instructions in the target cost model. */
1597 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1599 struct _stmt_vec_info
*stmt_info
1600 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1601 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1602 si
->misalign
, vect_body
);
1605 prologue_cost_vec
.release ();
1606 body_cost_vec
.release ();
1609 /* Analyze an SLP instance starting from a group of grouped stores. Call
1610 vect_build_slp_tree to build a tree of packed stmts if possible.
1611 Return FALSE if it's impossible to SLP any stmt in the loop. */
1614 vect_analyze_slp_instance (vec_info
*vinfo
,
1615 gimple
*stmt
, unsigned max_tree_size
)
1617 slp_instance new_instance
;
1619 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1620 unsigned int unrolling_factor
= 1, nunits
;
1621 tree vectype
, scalar_type
= NULL_TREE
;
1623 unsigned int vectorization_factor
= 0;
1625 unsigned int max_nunits
= 0;
1626 vec
<slp_tree
> loads
;
1627 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1628 vec
<gimple
*> scalar_stmts
;
1630 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1634 scalar_type
= TREE_TYPE (DR_REF (dr
));
1635 vectype
= get_vectype_for_scalar_type (scalar_type
);
1639 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1640 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1643 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1647 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1648 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1649 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1654 if (dump_enabled_p ())
1656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1657 "Build SLP failed: unsupported data-type ");
1658 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1659 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1665 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1666 if (is_a
<loop_vec_info
> (vinfo
))
1667 vectorization_factor
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
1669 vectorization_factor
= nunits
;
1671 /* Calculate the unrolling factor. */
1672 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1673 if (unrolling_factor
!= 1 && is_a
<bb_vec_info
> (vinfo
))
1675 if (dump_enabled_p ())
1676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1677 "Build SLP failed: unrolling required in basic"
1683 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1684 scalar_stmts
.create (group_size
);
1686 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1688 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1691 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1692 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1693 scalar_stmts
.safe_push (
1694 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1696 scalar_stmts
.safe_push (next
);
1697 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1699 /* Mark the first element of the reduction chain as reduction to properly
1700 transform the node. In the reduction analysis phase only the last
1701 element of the chain is marked as reduction. */
1702 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1703 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1707 /* Collect reduction statements. */
1708 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1709 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1710 scalar_stmts
.safe_push (next
);
1713 node
= vect_create_new_slp_node (scalar_stmts
);
1715 loads
.create (group_size
);
1717 /* Build the tree for the SLP instance. */
1718 bool *matches
= XALLOCAVEC (bool, group_size
);
1719 unsigned npermutes
= 0;
1720 if (vect_build_slp_tree (vinfo
, &node
, group_size
,
1721 &max_nunits
, &loads
,
1722 vectorization_factor
, matches
, &npermutes
, NULL
,
1725 /* Calculate the unrolling factor based on the smallest type. */
1726 if (max_nunits
> nunits
)
1727 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1730 if (unrolling_factor
!= 1 && is_a
<bb_vec_info
> (vinfo
))
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1734 "Build SLP failed: unrolling required in basic"
1736 vect_free_slp_tree (node
);
1741 /* Create a new SLP instance. */
1742 new_instance
= XNEW (struct _slp_instance
);
1743 SLP_INSTANCE_TREE (new_instance
) = node
;
1744 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1745 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1746 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1748 /* Compute the load permutation. */
1750 bool loads_permuted
= false;
1751 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1753 vec
<unsigned> load_permutation
;
1755 gimple
*load
, *first_stmt
;
1756 bool this_load_permuted
= false;
1757 load_permutation
.create (group_size
);
1758 first_stmt
= GROUP_FIRST_ELEMENT
1759 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1760 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1763 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1764 gcc_assert (load_place
!= -1);
1765 if (load_place
!= j
)
1766 this_load_permuted
= true;
1767 load_permutation
.safe_push (load_place
);
1769 if (!this_load_permuted
1770 /* The load requires permutation when unrolling exposes
1771 a gap either because the group is larger than the SLP
1772 group-size or because there is a gap between the groups. */
1773 && (unrolling_factor
== 1
1774 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1775 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
1777 load_permutation
.release ();
1780 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1781 loads_permuted
= true;
1786 if (!vect_supported_load_permutation_p (new_instance
))
1788 if (dump_enabled_p ())
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1791 "Build SLP failed: unsupported load "
1793 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1794 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1796 vect_free_slp_instance (new_instance
);
1801 vinfo
->slp_instances
.safe_push (new_instance
);
1803 if (dump_enabled_p ())
1804 vect_print_slp_tree (MSG_NOTE
, node
);
1809 /* Failed to SLP. */
1810 /* Free the allocated memory. */
1811 vect_free_slp_tree (node
);
1818 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1819 trees of packed scalar stmts if SLP is possible. */
1822 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
1825 gimple
*first_element
;
1828 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
1831 /* Find SLP sequences starting from groups of grouped stores. */
1832 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
1833 if (vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
))
1836 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
1838 if (loop_vinfo
->reduction_chains
.length () > 0)
1840 /* Find SLP sequences starting from reduction chains. */
1841 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
1842 if (vect_analyze_slp_instance (vinfo
, first_element
,
1848 /* Don't try to vectorize SLP reductions if reduction chain was
1853 /* Find SLP sequences starting from groups of reductions. */
1854 if (loop_vinfo
->reductions
.length () > 1
1855 && vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
1864 /* For each possible SLP instance decide whether to SLP it and calculate overall
1865 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1866 least one instance. */
1869 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1871 unsigned int i
, unrolling_factor
= 1;
1872 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1873 slp_instance instance
;
1874 int decided_to_slp
= 0;
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
1880 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1882 /* FORNOW: SLP if you can. */
1883 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1884 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1886 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1887 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1888 loop-based vectorization. Such stmts will be marked as HYBRID. */
1889 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1893 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1895 if (decided_to_slp
&& dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE
, vect_location
,
1897 "Decided to SLP %d instances. Unrolling factor %d\n",
1898 decided_to_slp
, unrolling_factor
);
1900 return (decided_to_slp
> 0);
1904 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1905 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1908 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
1910 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
1911 imm_use_iterator imm_iter
;
1913 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
1915 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1916 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1919 /* Propagate hybrid down the SLP tree. */
1920 if (stype
== hybrid
)
1922 else if (HYBRID_SLP_STMT (stmt_vinfo
))
1926 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1927 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
1928 /* We always get the pattern stmt here, but for immediate
1929 uses we have to use the LHS of the original stmt. */
1930 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
1931 if (STMT_VINFO_RELATED_STMT (stmt_vinfo
))
1932 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
1933 if (TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1934 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1936 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1938 use_vinfo
= vinfo_for_stmt (use_stmt
);
1939 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
1940 && STMT_VINFO_RELATED_STMT (use_vinfo
))
1941 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
1942 if (!STMT_SLP_TYPE (use_vinfo
)
1943 && (STMT_VINFO_RELEVANT (use_vinfo
)
1944 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
1945 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1946 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
1948 if (dump_enabled_p ())
1950 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
1951 "def in non-SLP stmt: ");
1952 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
1960 && !HYBRID_SLP_STMT (stmt_vinfo
))
1962 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
1965 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
1967 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
1970 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
1972 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
1975 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
1978 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
1980 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
1981 struct loop
*loopp
= (struct loop
*)wi
->info
;
1986 if (TREE_CODE (*tp
) == SSA_NAME
1987 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
1989 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
1990 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
1991 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
1993 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
1996 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1998 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2006 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2009 /* If the stmt is in a SLP instance then this isn't a reason
2010 to mark use definitions in other SLP instances as hybrid. */
2011 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi
))) != loop_vect
)
2016 /* Find stmts that must be both vectorized and SLPed. */
2019 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2022 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2023 slp_instance instance
;
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2029 /* First walk all pattern stmt in the loop and mark defs of uses as
2030 hybrid because immediate uses in them are not recorded. */
2031 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2033 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2034 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2037 gimple
*stmt
= gsi_stmt (gsi
);
2038 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2039 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2042 memset (&wi
, 0, sizeof (wi
));
2043 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2044 gimple_stmt_iterator gsi2
2045 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2046 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2047 vect_detect_hybrid_slp_1
, &wi
);
2048 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2049 vect_detect_hybrid_slp_2
,
2050 vect_detect_hybrid_slp_1
, &wi
);
2055 /* Then walk the SLP instance trees marking stmts with uses in
2056 non-SLP stmts as hybrid, also propagating hybrid down the
2057 SLP tree, collecting the above info on-the-fly. */
2058 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2060 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2061 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2067 /* Create and initialize a new bb_vec_info struct for BB, as well as
2068 stmt_vec_info structs for all the stmts in it. */
2071 new_bb_vec_info (gimple_stmt_iterator region_begin
,
2072 gimple_stmt_iterator region_end
)
2074 basic_block bb
= gsi_bb (region_begin
);
2075 bb_vec_info res
= NULL
;
2076 gimple_stmt_iterator gsi
;
2078 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
2079 res
->kind
= vec_info::bb
;
2080 BB_VINFO_BB (res
) = bb
;
2081 res
->region_begin
= region_begin
;
2082 res
->region_end
= region_end
;
2084 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2087 gimple
*stmt
= gsi_stmt (gsi
);
2088 gimple_set_uid (stmt
, 0);
2089 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
));
2092 BB_VINFO_GROUPED_STORES (res
).create (10);
2093 BB_VINFO_SLP_INSTANCES (res
).create (2);
2094 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
2101 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2102 stmts in the basic block. */
2105 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
2107 vec
<slp_instance
> slp_instances
;
2108 slp_instance instance
;
2110 gimple_stmt_iterator si
;
2116 bb
= BB_VINFO_BB (bb_vinfo
);
2118 for (si
= bb_vinfo
->region_begin
;
2119 gsi_stmt (si
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&si
))
2121 gimple
*stmt
= gsi_stmt (si
);
2122 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2125 /* Free stmt_vec_info. */
2126 free_stmt_vec_info (stmt
);
2128 /* Reset region marker. */
2129 gimple_set_uid (stmt
, -1);
2132 vect_destroy_datarefs (bb_vinfo
);
2133 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
2134 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
2135 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2136 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2137 vect_free_slp_instance (instance
);
2138 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
2139 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
2145 /* Analyze statements contained in SLP tree node after recursively analyzing
2146 the subtree. Return TRUE if the operations are supported. */
2149 vect_slp_analyze_node_operations (slp_tree node
)
2159 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2160 if (!vect_slp_analyze_node_operations (child
))
2163 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2165 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2166 gcc_assert (stmt_info
);
2167 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2169 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
2177 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2178 operations are supported. */
2181 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2183 slp_instance instance
;
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_NOTE
, vect_location
,
2188 "=== vect_slp_analyze_operations ===\n");
2190 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2192 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
)))
2194 dump_printf_loc (MSG_NOTE
, vect_location
,
2195 "removing SLP instance operations starting from: ");
2196 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2197 SLP_TREE_SCALAR_STMTS
2198 (SLP_INSTANCE_TREE (instance
))[0], 0);
2199 vect_free_slp_instance (instance
);
2200 slp_instances
.ordered_remove (i
);
2204 /* Compute the costs of the SLP instance. */
2205 vect_analyze_slp_cost (instance
, data
);
2210 if (!slp_instances
.length ())
2217 /* Compute the scalar cost of the SLP node NODE and its children
2218 and return it. Do not account defs that are marked in LIFE and
2219 update LIFE according to uses of NODE. */
2222 vect_bb_slp_scalar_cost (basic_block bb
,
2223 slp_tree node
, vec
<bool, va_heap
> *life
)
2225 unsigned scalar_cost
= 0;
2230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2233 ssa_op_iter op_iter
;
2234 def_operand_p def_p
;
2235 stmt_vec_info stmt_info
;
2240 /* If there is a non-vectorized use of the defs then the scalar
2241 stmt is kept live in which case we do not account it or any
2242 required defs in the SLP children in the scalar cost. This
2243 way we make the vectorization more costly when compared to
2245 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2247 imm_use_iterator use_iter
;
2249 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2250 if (!is_gimple_debug (use_stmt
)
2251 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2253 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt
))))
2256 BREAK_FROM_IMM_USE_STMT (use_iter
);
2262 stmt_info
= vinfo_for_stmt (stmt
);
2263 if (STMT_VINFO_DATA_REF (stmt_info
))
2265 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2266 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2268 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2271 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2273 scalar_cost
+= stmt_cost
;
2276 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2278 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2283 /* Check if vectorization of the basic block is profitable. */
2286 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2288 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2289 slp_instance instance
;
2291 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2292 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2294 /* Calculate scalar cost. */
2295 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2297 auto_vec
<bool, 20> life
;
2298 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2299 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2300 SLP_INSTANCE_TREE (instance
),
2304 /* Complete the target-specific cost calculation. */
2305 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2306 &vec_inside_cost
, &vec_epilogue_cost
);
2308 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2310 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2313 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2315 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2316 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2317 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2320 /* Vectorization is profitable if its cost is less than the cost of scalar
2322 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2328 /* Check if the basic block can be vectorized. */
2331 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2332 gimple_stmt_iterator region_end
,
2333 vec
<data_reference_p
> datarefs
, int n_stmts
)
2335 bb_vec_info bb_vinfo
;
2336 vec
<slp_instance
> slp_instances
;
2337 slp_instance instance
;
2341 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2343 if (dump_enabled_p ())
2344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2345 "not vectorized: too many instructions in "
2347 free_data_refs (datarefs
);
2351 bb_vinfo
= new_bb_vec_info (region_begin
, region_end
);
2355 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2357 /* Analyze the data references. */
2359 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2363 "not vectorized: unhandled data-ref in basic "
2366 destroy_bb_vec_info (bb_vinfo
);
2370 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2372 if (dump_enabled_p ())
2373 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2374 "not vectorized: not enough data-refs in "
2377 destroy_bb_vec_info (bb_vinfo
);
2381 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2383 if (dump_enabled_p ())
2384 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2385 "not vectorized: unhandled data access in "
2388 destroy_bb_vec_info (bb_vinfo
);
2392 vect_pattern_recog (bb_vinfo
);
2394 if (!vect_analyze_data_refs_alignment (bb_vinfo
))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2398 "not vectorized: bad data alignment in basic "
2401 destroy_bb_vec_info (bb_vinfo
);
2405 /* Check the SLP opportunities in the basic block, analyze and build SLP
2407 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2409 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2412 "Failed to SLP the basic block.\n");
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2414 "not vectorized: failed to find SLP opportunities "
2415 "in basic block.\n");
2418 destroy_bb_vec_info (bb_vinfo
);
2422 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2424 /* Mark all the statements that we want to vectorize as pure SLP and
2426 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2428 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2429 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2432 /* Mark all the statements that we do not want to vectorize. */
2433 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2434 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2436 stmt_vec_info vinfo
= vinfo_for_stmt (gsi_stmt (gsi
));
2437 if (STMT_SLP_TYPE (vinfo
) != pure_slp
)
2438 STMT_VINFO_VECTORIZABLE (vinfo
) = false;
2441 /* Analyze dependences. At this point all stmts not participating in
2442 vectorization have to be marked. Dependence analysis assumes
2443 that we either vectorize all SLP instances or none at all. */
2444 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2448 "not vectorized: unhandled data dependence "
2449 "in basic block.\n");
2451 destroy_bb_vec_info (bb_vinfo
);
2455 if (!vect_verify_datarefs_alignment (bb_vinfo
))
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2459 "not vectorized: unsupported alignment in basic "
2461 destroy_bb_vec_info (bb_vinfo
);
2465 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2466 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2470 "not vectorized: bad operation in basic block.\n");
2472 destroy_bb_vec_info (bb_vinfo
);
2476 /* Cost model: check if the vectorization is worthwhile. */
2477 if (!unlimited_cost_model (NULL
)
2478 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2482 "not vectorized: vectorization is not "
2485 destroy_bb_vec_info (bb_vinfo
);
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_NOTE
, vect_location
,
2491 "Basic block will be vectorized using SLP\n");
2497 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2498 true if anything in the basic-block was vectorized. */
2501 vect_slp_bb (basic_block bb
)
2503 bb_vec_info bb_vinfo
;
2504 gimple_stmt_iterator gsi
;
2505 unsigned int vector_sizes
;
2506 bool any_vectorized
= false;
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2511 /* Autodetect first vector size we try. */
2512 current_vector_size
= 0;
2513 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2515 gsi
= gsi_start_bb (bb
);
2519 if (gsi_end_p (gsi
))
2522 gimple_stmt_iterator region_begin
= gsi
;
2523 vec
<data_reference_p
> datarefs
= vNULL
;
2526 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2528 gimple
*stmt
= gsi_stmt (gsi
);
2529 if (is_gimple_debug (stmt
))
2533 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
2534 vect_location
= gimple_location (stmt
);
2536 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
2540 /* Skip leading unhandled stmts. */
2541 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
2547 gimple_stmt_iterator region_end
= gsi
;
2549 bool vectorized
= false;
2550 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
2553 && dbg_cnt (vect_slp
))
2555 if (dump_enabled_p ())
2556 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
2558 vect_schedule_slp (bb_vinfo
);
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_NOTE
, vect_location
,
2562 "basic block part vectorized\n");
2564 destroy_bb_vec_info (bb_vinfo
);
2569 destroy_bb_vec_info (bb_vinfo
);
2571 any_vectorized
|= vectorized
;
2573 vector_sizes
&= ~current_vector_size
;
2575 || vector_sizes
== 0
2576 || current_vector_size
== 0)
2578 if (gsi_end_p (region_end
))
2581 /* Skip the unhandled stmt. */
2584 /* And reset vector sizes. */
2585 current_vector_size
= 0;
2586 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2590 /* Try the next biggest vector size. */
2591 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_NOTE
, vect_location
,
2594 "***** Re-trying analysis with "
2595 "vector size %d\n", current_vector_size
);
2602 return any_vectorized
;
2606 /* For constant and loop invariant defs of SLP_NODE this function returns
2607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2609 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2610 REDUC_INDEX is the index of the reduction operand in the statements, unless
2614 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2615 vec
<tree
> *vec_oprnds
,
2616 unsigned int op_num
, unsigned int number_of_vectors
,
2619 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2620 gimple
*stmt
= stmts
[0];
2621 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2625 unsigned j
, number_of_places_left_in_vector
;
2628 int group_size
= stmts
.length ();
2629 unsigned int vec_num
, i
;
2630 unsigned number_of_copies
= 1;
2632 voprnds
.create (number_of_vectors
);
2633 bool constant_p
, is_store
;
2634 tree neutral_op
= NULL
;
2635 enum tree_code code
= gimple_expr_code (stmt
);
2638 gimple_seq ctor_seq
= NULL
;
2640 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2641 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2643 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2644 && reduc_index
!= -1)
2646 op_num
= reduc_index
;
2647 op
= gimple_op (stmt
, op_num
+ 1);
2648 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2649 we need either neutral operands or the original operands. See
2650 get_initial_def_for_reduction() for details. */
2653 case WIDEN_SUM_EXPR
:
2660 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2661 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2663 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2668 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2669 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2671 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2676 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2679 /* For MIN/MAX we don't have an easy neutral operand but
2680 the initial values can be used fine here. Only for
2681 a reduction chain we have to force a neutral element. */
2684 if (!GROUP_FIRST_ELEMENT (stmt_vinfo
))
2688 def_stmt
= SSA_NAME_DEF_STMT (op
);
2689 loop
= (gimple_bb (stmt
))->loop_father
;
2690 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2691 loop_preheader_edge (loop
));
2696 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo
));
2701 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2704 op
= gimple_assign_rhs1 (stmt
);
2711 if (CONSTANT_CLASS_P (op
))
2716 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2717 created vectors. It is greater than 1 if unrolling is performed.
2719 For example, we have two scalar operands, s1 and s2 (e.g., group of
2720 strided accesses of size two), while NUNITS is four (i.e., four scalars
2721 of this type can be packed in a vector). The output vector will contain
2722 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2725 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2726 containing the operands.
2728 For example, NUNITS is four as before, and the group size is 8
2729 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2730 {s5, s6, s7, s8}. */
2732 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
2734 number_of_places_left_in_vector
= nunits
;
2735 elts
= XALLOCAVEC (tree
, nunits
);
2736 bool place_after_defs
= false;
2737 for (j
= 0; j
< number_of_copies
; j
++)
2739 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2742 op
= gimple_assign_rhs1 (stmt
);
2748 if (op_num
== 0 || op_num
== 1)
2750 tree cond
= gimple_assign_rhs1 (stmt
);
2751 op
= TREE_OPERAND (cond
, op_num
);
2756 op
= gimple_assign_rhs2 (stmt
);
2758 op
= gimple_assign_rhs3 (stmt
);
2763 op
= gimple_call_arg (stmt
, op_num
);
2770 op
= gimple_op (stmt
, op_num
+ 1);
2771 /* Unlike the other binary operators, shifts/rotates have
2772 the shift count being int, instead of the same type as
2773 the lhs, so make sure the scalar is the right type if
2774 we are dealing with vectors of
2775 long long/long/short/char. */
2776 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2777 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2781 op
= gimple_op (stmt
, op_num
+ 1);
2786 if (reduc_index
!= -1)
2788 loop
= (gimple_bb (stmt
))->loop_father
;
2789 def_stmt
= SSA_NAME_DEF_STMT (op
);
2793 /* Get the def before the loop. In reduction chain we have only
2794 one initial value. */
2795 if ((j
!= (number_of_copies
- 1)
2796 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2801 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2802 loop_preheader_edge (loop
));
2805 /* Create 'vect_ = {op0,op1,...,opn}'. */
2806 number_of_places_left_in_vector
--;
2808 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2810 if (CONSTANT_CLASS_P (op
))
2812 op
= fold_unary (VIEW_CONVERT_EXPR
,
2813 TREE_TYPE (vector_type
), op
);
2814 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2818 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
2820 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
), op
);
2822 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
, op
);
2823 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2827 elts
[number_of_places_left_in_vector
] = op
;
2828 if (!CONSTANT_CLASS_P (op
))
2830 if (TREE_CODE (orig_op
) == SSA_NAME
2831 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
2832 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
2833 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
2834 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
2835 place_after_defs
= true;
2837 if (number_of_places_left_in_vector
== 0)
2839 number_of_places_left_in_vector
= nunits
;
2842 vec_cst
= build_vector (vector_type
, elts
);
2845 vec
<constructor_elt
, va_gc
> *v
;
2847 vec_alloc (v
, nunits
);
2848 for (k
= 0; k
< nunits
; ++k
)
2849 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2850 vec_cst
= build_constructor (vector_type
, v
);
2853 gimple_stmt_iterator gsi
;
2854 if (place_after_defs
)
2857 (vect_find_last_scalar_stmt_in_slp (slp_node
));
2858 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
2861 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
2862 if (ctor_seq
!= NULL
)
2864 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
2865 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2869 voprnds
.quick_push (init
);
2870 place_after_defs
= false;
2875 /* Since the vectors are created in the reverse order, we should invert
2877 vec_num
= voprnds
.length ();
2878 for (j
= vec_num
; j
!= 0; j
--)
2880 vop
= voprnds
[j
- 1];
2881 vec_oprnds
->quick_push (vop
);
2886 /* In case that VF is greater than the unrolling factor needed for the SLP
2887 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2888 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2889 to replicate the vectors. */
2890 while (number_of_vectors
> vec_oprnds
->length ())
2892 tree neutral_vec
= NULL
;
2897 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2899 vec_oprnds
->quick_push (neutral_vec
);
2903 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2904 vec_oprnds
->quick_push (vop
);
2910 /* Get vectorized definitions from SLP_NODE that contains corresponding
2911 vectorized def-stmts. */
2914 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2917 gimple
*vec_def_stmt
;
2920 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2922 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2924 gcc_assert (vec_def_stmt
);
2925 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2926 vec_oprnds
->quick_push (vec_oprnd
);
2931 /* Get vectorized definitions for SLP_NODE.
2932 If the scalar definitions are loop invariants or constants, collect them and
2933 call vect_get_constant_vectors() to create vector stmts.
2934 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2935 must be stored in the corresponding child of SLP_NODE, and we call
2936 vect_get_slp_vect_defs () to retrieve them. */
2939 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2940 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2943 int number_of_vects
= 0, i
;
2944 unsigned int child_index
= 0;
2945 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2946 slp_tree child
= NULL
;
2949 bool vectorized_defs
;
2951 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2952 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2954 /* For each operand we check if it has vectorized definitions in a child
2955 node or we need to create them (for invariants and constants). We
2956 check if the LHS of the first stmt of the next child matches OPRND.
2957 If it does, we found the correct child. Otherwise, we call
2958 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2959 to check this child node for the next operand. */
2960 vectorized_defs
= false;
2961 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2963 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
2965 /* We have to check both pattern and original def, if available. */
2968 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2970 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2972 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2974 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2976 /* The number of vector defs is determined by the number of
2977 vector statements in the node from which we get those
2979 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2980 vectorized_defs
= true;
2988 if (!vectorized_defs
)
2992 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2993 /* Number of vector stmts was calculated according to LHS in
2994 vect_schedule_slp_instance (), fix it by replacing LHS with
2995 RHS, if necessary. See vect_get_smallest_scalar_type () for
2997 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2999 if (rhs_size_unit
!= lhs_size_unit
)
3001 number_of_vects
*= rhs_size_unit
;
3002 number_of_vects
/= lhs_size_unit
;
3007 /* Allocate memory for vectorized defs. */
3009 vec_defs
.create (number_of_vects
);
3011 /* For reduction defs we call vect_get_constant_vectors (), since we are
3012 looking for initial loop invariant values. */
3013 if (vectorized_defs
&& reduc_index
== -1)
3014 /* The defs are already vectorized. */
3015 vect_get_slp_vect_defs (child
, &vec_defs
);
3017 /* Build vectors from scalar defs. */
3018 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3019 number_of_vects
, reduc_index
);
3021 vec_oprnds
->quick_push (vec_defs
);
3023 /* For reductions, we only need initial values. */
3024 if (reduc_index
!= -1)
3030 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3031 building a vector of type MASK_TYPE from it) and two input vectors placed in
3032 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3033 shifting by STRIDE elements of DR_CHAIN for every copy.
3034 (STRIDE is the number of vectorized stmts for NODE divided by the number of
3036 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3037 the created stmts must be inserted. */
3040 vect_create_mask_and_perm (gimple
*stmt
,
3041 tree mask
, int first_vec_indx
, int second_vec_indx
,
3042 gimple_stmt_iterator
*gsi
, slp_tree node
,
3043 tree vectype
, vec
<tree
> dr_chain
,
3044 int ncopies
, int vect_stmts_counter
)
3047 gimple
*perm_stmt
= NULL
;
3049 tree first_vec
, second_vec
, data_ref
;
3051 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
3053 /* Initialize the vect stmts of NODE to properly insert the generated
3055 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
3056 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3057 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3059 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
3060 for (i
= 0; i
< ncopies
; i
++)
3062 first_vec
= dr_chain
[first_vec_indx
];
3063 second_vec
= dr_chain
[second_vec_indx
];
3065 /* Generate the permute statement. */
3066 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3067 first_vec
, second_vec
, mask
);
3068 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3069 gimple_set_lhs (perm_stmt
, data_ref
);
3070 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3072 /* Store the vector statement in NODE. */
3073 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
3075 first_vec_indx
+= stride
;
3076 second_vec_indx
+= stride
;
3081 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3082 return in CURRENT_MASK_ELEMENT its equivalent in target specific
3083 representation. Check that the mask is valid and return FALSE if not.
3084 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3085 the next vector, i.e., the current first vector is not needed. */
3088 vect_get_mask_element (gimple
*stmt
, int first_mask_element
, int m
,
3089 int mask_nunits
, bool only_one_vec
, int index
,
3090 unsigned char *mask
, int *current_mask_element
,
3091 bool *need_next_vector
, int *number_of_mask_fixes
,
3092 bool *mask_fixed
, bool *needs_first_vector
)
3096 /* Convert to target specific representation. */
3097 *current_mask_element
= first_mask_element
+ m
;
3098 /* Adjust the value in case it's a mask for second and third vectors. */
3099 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
3101 if (*current_mask_element
< 0)
3103 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3106 "permutation requires past vector ");
3107 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3108 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3113 if (*current_mask_element
< mask_nunits
)
3114 *needs_first_vector
= true;
3116 /* We have only one input vector to permute but the mask accesses values in
3117 the next vector as well. */
3118 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
3120 if (dump_enabled_p ())
3122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3123 "permutation requires at least two vectors ");
3124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3125 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3131 /* The mask requires the next vector. */
3132 while (*current_mask_element
>= mask_nunits
* 2)
3134 if (*needs_first_vector
|| *mask_fixed
)
3136 /* We either need the first vector too or have already moved to the
3137 next vector. In both cases, this permutation needs three
3139 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3142 "permutation requires at "
3143 "least three vectors ");
3144 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3145 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3151 /* We move to the next vector, dropping the first one and working with
3152 the second and the third - we need to adjust the values of the mask
3154 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
3156 for (i
= 0; i
< index
; i
++)
3157 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
3159 (*number_of_mask_fixes
)++;
3163 *need_next_vector
= *mask_fixed
;
3165 /* This was the last element of this mask. Start a new one. */
3166 if (index
== mask_nunits
- 1)
3168 *number_of_mask_fixes
= 1;
3169 *mask_fixed
= false;
3170 *needs_first_vector
= false;
3177 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3178 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3179 permute statements for the SLP node NODE of the SLP instance
3180 SLP_NODE_INSTANCE. */
3183 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3184 gimple_stmt_iterator
*gsi
, int vf
,
3185 slp_instance slp_node_instance
, bool analyze_only
)
3187 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3188 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3189 tree mask_element_type
= NULL_TREE
, mask_type
;
3190 int i
, j
, k
, nunits
, vec_index
= 0;
3191 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3192 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3193 int first_mask_element
;
3194 int index
, unroll_factor
, current_mask_element
, ncopies
;
3195 unsigned char *mask
;
3196 bool only_one_vec
= false, need_next_vector
= false;
3197 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
3198 int number_of_mask_fixes
= 1;
3199 bool mask_fixed
= false;
3200 bool needs_first_vector
= false;
3203 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3206 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3208 mode
= TYPE_MODE (vectype
);
3210 if (!can_vec_perm_p (mode
, false, NULL
))
3212 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3215 "no vect permute for ");
3216 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3217 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3222 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3223 same size as the vector element being permuted. */
3224 mask_element_type
= lang_hooks
.types
.type_for_mode
3225 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3226 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3227 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3228 mask
= XALLOCAVEC (unsigned char, nunits
);
3229 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3231 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3232 unrolling factor. */
3234 = (STMT_VINFO_GROUP_SIZE (stmt_info
)
3235 * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
)
3236 + nunits
- 1) / nunits
;
3237 if (orig_vec_stmts_num
== 1)
3238 only_one_vec
= true;
3240 /* Number of copies is determined by the final vectorization factor
3241 relatively to SLP_NODE_INSTANCE unrolling factor. */
3242 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3244 /* Generate permutation masks for every NODE. Number of masks for each NODE
3245 is equal to GROUP_SIZE.
3246 E.g., we have a group of three nodes with three loads from the same
3247 location in each node, and the vector size is 4. I.e., we have a
3248 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3249 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3250 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3253 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3254 The last mask is illegal since we assume two operands for permute
3255 operation, and the mask element values can't be outside that range.
3256 Hence, the last mask must be converted into {2,5,5,5}.
3257 For the first two permutations we need the first and the second input
3258 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3259 we need the second and the third vectors: {b1,c1,a2,b2} and
3264 vect_stmts_counter
= 0;
3266 first_vec_index
= vec_index
++;
3268 second_vec_index
= first_vec_index
;
3270 second_vec_index
= vec_index
++;
3272 for (j
= 0; j
< unroll_factor
; j
++)
3274 for (k
= 0; k
< group_size
; k
++)
3276 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
3277 first_mask_element
= i
+ j
* STMT_VINFO_GROUP_SIZE (stmt_info
);
3278 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
3279 nunits
, only_one_vec
, index
,
3280 mask
, ¤t_mask_element
,
3282 &number_of_mask_fixes
, &mask_fixed
,
3283 &needs_first_vector
))
3285 gcc_assert (current_mask_element
>= 0
3286 && current_mask_element
< 2 * nunits
);
3287 mask
[index
++] = current_mask_element
;
3289 if (index
== nunits
)
3292 if (!can_vec_perm_p (mode
, false, mask
))
3294 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3298 "unsupported vect permute { ");
3299 for (i
= 0; i
< nunits
; ++i
)
3300 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
3302 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3310 tree mask_vec
, *mask_elts
;
3311 mask_elts
= XALLOCAVEC (tree
, nunits
);
3312 for (l
= 0; l
< nunits
; ++l
)
3313 mask_elts
[l
] = build_int_cst (mask_element_type
,
3315 mask_vec
= build_vector (mask_type
, mask_elts
);
3317 if (need_next_vector
)
3319 first_vec_index
= second_vec_index
;
3320 second_vec_index
= vec_index
;
3323 vect_create_mask_and_perm (stmt
,
3324 mask_vec
, first_vec_index
, second_vec_index
,
3325 gsi
, node
, vectype
, dr_chain
,
3326 ncopies
, vect_stmts_counter
++);
3338 /* Vectorize SLP instance tree in postorder. */
3341 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3342 unsigned int vectorization_factor
)
3345 bool grouped_store
, is_store
;
3346 gimple_stmt_iterator si
;
3347 stmt_vec_info stmt_info
;
3348 unsigned int vec_stmts_size
, nunits
, group_size
;
3356 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3357 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3359 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3360 stmt_info
= vinfo_for_stmt (stmt
);
3362 /* VECTYPE is the type of the destination. */
3363 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3364 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3365 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3367 /* For each SLP instance calculate number of vector stmts to be created
3368 for the scalar stmts in each node of the SLP tree. Number of vector
3369 elements in one vector iteration is the number of scalar elements in
3370 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3372 Unless this is a SLP reduction in which case the number of vector
3373 stmts is equal to the number of vector stmts of the children. */
3374 if (GROUP_FIRST_ELEMENT (stmt_info
)
3375 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3376 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3378 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3380 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3382 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3383 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3386 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_NOTE
,vect_location
,
3389 "------>vectorizing SLP node starting from: ");
3390 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3391 dump_printf (MSG_NOTE
, "\n");
3394 /* Vectorized stmts go before the last scalar stmt which is where
3395 all uses are ready. */
3396 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3398 /* Mark the first element of the reduction chain as reduction to properly
3399 transform the node. In the analysis phase only the last element of the
3400 chain is marked as reduction. */
3401 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3402 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3404 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3405 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3408 /* Handle two-operation SLP nodes by vectorizing the group with
3409 both operations and then performing a merge. */
3410 if (SLP_TREE_TWO_OPERATORS (node
))
3412 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3413 enum tree_code ocode
;
3415 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3416 bool allsame
= true;
3417 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3418 if (gimple_assign_rhs_code (ostmt
) != code0
)
3422 ocode
= gimple_assign_rhs_code (ostmt
);
3431 tree tmask
= NULL_TREE
;
3432 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3433 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3434 SLP_TREE_VEC_STMTS (node
).truncate (0);
3435 gimple_assign_set_rhs_code (stmt
, ocode
);
3436 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3437 gimple_assign_set_rhs_code (stmt
, code0
);
3438 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3439 SLP_TREE_VEC_STMTS (node
).truncate (0);
3440 tree meltype
= build_nonstandard_integer_type
3441 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3442 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3444 for (j
= 0; j
< v0
.length (); ++j
)
3446 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3447 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3449 if (k
>= group_size
)
3451 melts
[l
] = build_int_cst
3452 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3454 tmask
= build_vector (mvectype
, melts
);
3456 /* ??? Not all targets support a VEC_PERM_EXPR with a
3457 constant mask that would translate to a vec_merge RTX
3458 (with their vec_perm_const_ok). We can either not
3459 vectorize in that case or let veclower do its job.
3460 Unfortunately that isn't too great and at least for
3461 plus/minus we'd eventually like to match targets
3462 vector addsub instructions. */
3464 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3466 gimple_assign_lhs (v0
[j
]),
3467 gimple_assign_lhs (v1
[j
]), tmask
);
3468 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3469 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3476 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3480 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3481 For loop vectorization this is done in vectorizable_call, but for SLP
3482 it needs to be deferred until end of vect_schedule_slp, because multiple
3483 SLP instances may refer to the same scalar stmt. */
3486 vect_remove_slp_scalar_calls (slp_tree node
)
3488 gimple
*stmt
, *new_stmt
;
3489 gimple_stmt_iterator gsi
;
3493 stmt_vec_info stmt_info
;
3498 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3499 vect_remove_slp_scalar_calls (child
);
3501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3503 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3505 stmt_info
= vinfo_for_stmt (stmt
);
3506 if (stmt_info
== NULL
3507 || is_pattern_stmt_p (stmt_info
)
3508 || !PURE_SLP_STMT (stmt_info
))
3510 lhs
= gimple_call_lhs (stmt
);
3511 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3512 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3513 set_vinfo_for_stmt (stmt
, NULL
);
3514 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3515 gsi
= gsi_for_stmt (stmt
);
3516 gsi_replace (&gsi
, new_stmt
, false);
3517 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3521 /* Generate vector code for all SLP instances in the loop/basic block. */
3524 vect_schedule_slp (vec_info
*vinfo
)
3526 vec
<slp_instance
> slp_instances
;
3527 slp_instance instance
;
3529 bool is_store
= false;
3531 slp_instances
= vinfo
->slp_instances
;
3532 if (is_a
<loop_vec_info
> (vinfo
))
3533 vf
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
3537 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3539 /* Schedule the tree of INSTANCE. */
3540 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_NOTE
, vect_location
,
3544 "vectorizing stmts using SLP.\n");
3547 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3549 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3552 gimple_stmt_iterator gsi
;
3554 /* Remove scalar call stmts. Do not do this for basic-block
3555 vectorization as not all uses may be vectorized.
3556 ??? Why should this be necessary? DCE should be able to
3557 remove the stmts itself.
3558 ??? For BB vectorization we can as well remove scalar
3559 stmts starting from the SLP tree root if they have no
3561 if (is_a
<loop_vec_info
> (vinfo
))
3562 vect_remove_slp_scalar_calls (root
);
3564 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3565 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3567 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3570 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3571 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3572 /* Free the attached stmt_vec_info and remove the stmt. */
3573 gsi
= gsi_for_stmt (store
);
3574 unlink_stmt_vdef (store
);
3575 gsi_remove (&gsi
, true);
3576 release_defs (store
);
3577 free_stmt_vec_info (store
);