1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
36 #include "recog.h" /* FIXME: for insn_data */
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb
)
48 gimple_stmt_iterator si
;
53 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
56 if (gimple_location (stmt
) != UNKNOWN_LOC
)
57 return gimple_location (stmt
);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node
)
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
76 vect_free_slp_tree (child
);
78 SLP_TREE_CHILDREN (node
).release ();
79 SLP_TREE_SCALAR_STMTS (node
).release ();
80 SLP_TREE_VEC_STMTS (node
).release ();
81 SLP_TREE_LOAD_PERMUTATION (node
).release ();
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance
)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
93 SLP_INSTANCE_LOADS (instance
).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance
).release ();
99 /* Create an SLP node for SCALAR_STMTS. */
102 vect_create_new_slp_node (vec
<gimple
> scalar_stmts
)
105 gimple stmt
= scalar_stmts
[0];
108 if (is_gimple_call (stmt
))
109 nops
= gimple_call_num_args (stmt
);
110 else if (is_gimple_assign (stmt
))
112 nops
= gimple_num_ops (stmt
) - 1;
113 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
119 node
= XNEW (struct _slp_tree
);
120 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
121 SLP_TREE_VEC_STMTS (node
).create (0);
122 SLP_TREE_CHILDREN (node
).create (nops
);
123 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
131 static vec
<slp_oprnd_info
>
132 vect_create_oprnd_info (int nops
, int group_size
)
135 slp_oprnd_info oprnd_info
;
136 vec
<slp_oprnd_info
> oprnds_info
;
138 oprnds_info
.create (nops
);
139 for (i
= 0; i
< nops
; i
++)
141 oprnd_info
= XNEW (struct _slp_oprnd_info
);
142 oprnd_info
->def_stmts
.create (group_size
);
143 oprnd_info
->first_dt
= vect_uninitialized_def
;
144 oprnd_info
->first_op_type
= NULL_TREE
;
145 oprnd_info
->first_pattern
= false;
146 oprnds_info
.quick_push (oprnd_info
);
153 /* Free operands info. */
156 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
159 slp_oprnd_info oprnd_info
;
161 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
163 oprnd_info
->def_stmts
.release ();
164 XDELETE (oprnd_info
);
167 oprnds_info
.release ();
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
175 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
177 gimple next_stmt
= first_stmt
;
180 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
185 if (next_stmt
== stmt
)
188 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197 they are of a valid type and that they match the defs of the first stmt of
198 the SLP group (stored in OPRNDS_INFO). */
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
202 gimple stmt
, bool first
,
203 vec
<slp_oprnd_info
> *oprnds_info
)
206 unsigned int i
, number_of_oprnds
;
209 enum vect_def_type dt
= vect_uninitialized_def
;
210 struct loop
*loop
= NULL
;
211 bool pattern
= false;
212 slp_oprnd_info oprnd_info
;
214 tree compare_rhs
= NULL_TREE
;
217 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
219 if (is_gimple_call (stmt
))
221 number_of_oprnds
= gimple_call_num_args (stmt
);
224 else if (is_gimple_assign (stmt
))
226 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
227 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
233 for (i
= 0; i
< number_of_oprnds
; i
++)
238 compare_rhs
= NULL_TREE
;
241 oprnd
= gimple_op (stmt
, op_idx
++);
243 oprnd_info
= (*oprnds_info
)[i
];
245 if (COMPARISON_CLASS_P (oprnd
))
247 compare_rhs
= TREE_OPERAND (oprnd
, 1);
248 oprnd
= TREE_OPERAND (oprnd
, 0);
251 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
253 || (!def_stmt
&& dt
!= vect_constant_def
))
255 if (dump_enabled_p ())
257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
258 "Build SLP failed: can't find def for ");
259 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
265 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266 from the pattern. Check that all the stmts of the node are in the
268 if (def_stmt
&& gimple_bb (def_stmt
)
269 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
270 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
271 && gimple_code (def_stmt
) != GIMPLE_PHI
))
272 && vinfo_for_stmt (def_stmt
)
273 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
274 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
275 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
278 if (!first
&& !oprnd_info
->first_pattern
)
280 if (dump_enabled_p ())
282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
283 "Build SLP failed: some of the stmts"
284 " are in a pattern, and others are not ");
285 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
291 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
292 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
294 if (dt
== vect_unknown_def_type
)
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
298 "Unsupported pattern.");
302 switch (gimple_code (def_stmt
))
305 def
= gimple_phi_result (def_stmt
);
309 def
= gimple_assign_lhs (def_stmt
);
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
315 "unsupported defining stmt: ");
322 oprnd_info
->first_dt
= dt
;
323 oprnd_info
->first_pattern
= pattern
;
324 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
328 /* Not first stmt of the group, check that the def-stmt/s match
329 the def-stmt/s of the first stmt. Allow different definition
330 types for reduction chains: the first stmt must be a
331 vect_reduction_def (a phi node), and the rest
332 vect_internal_def. */
333 if (((oprnd_info
->first_dt
!= dt
334 && !(oprnd_info
->first_dt
== vect_reduction_def
335 && dt
== vect_internal_def
)
336 && !((oprnd_info
->first_dt
== vect_external_def
337 || oprnd_info
->first_dt
== vect_constant_def
)
338 && (dt
== vect_external_def
339 || dt
== vect_constant_def
)))
340 || !types_compatible_p (oprnd_info
->first_op_type
,
343 if (dump_enabled_p ())
344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
345 "Build SLP failed: different types ");
351 /* Check the types of the definitions. */
354 case vect_constant_def
:
355 case vect_external_def
:
356 case vect_reduction_def
:
359 case vect_internal_def
:
360 oprnd_info
->def_stmts
.quick_push (def_stmt
);
364 /* FORNOW: Not supported. */
365 if (dump_enabled_p ())
367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
368 "Build SLP failed: illegal type of def ");
369 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, def
);
380 /* Verify if the scalar stmts STMTS are isomorphic, require data
381 permutation or are of unsupported types of operation. Return
382 true if they are, otherwise return false and indicate in *MATCHES
383 which stmts are not isomorphic to the first one. If MATCHES[0]
384 is false then this indicates the comparison could not be
385 carried out or the stmts will never be vectorized by SLP. */
388 vect_build_slp_tree_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
389 vec
<gimple
> stmts
, unsigned int group_size
,
390 unsigned nops
, unsigned int *max_nunits
,
391 unsigned int vectorization_factor
, bool *matches
)
394 gimple stmt
= stmts
[0];
395 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
396 enum tree_code first_cond_code
= ERROR_MARK
;
398 bool need_same_oprnds
= false;
399 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
402 enum machine_mode optab_op2_mode
;
403 enum machine_mode vec_mode
;
404 struct data_reference
*first_dr
;
406 gimple first_load
= NULL
, prev_first_load
= NULL
, old_first_load
= NULL
;
409 /* For every stmt in NODE find its def stmt/s. */
410 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
414 if (dump_enabled_p ())
416 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
417 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
420 /* Fail to vectorize statements marked as unvectorizable. */
421 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
423 if (dump_enabled_p ())
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
426 "Build SLP failed: unvectorizable statement ");
427 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
429 /* Fatal mismatch. */
434 lhs
= gimple_get_lhs (stmt
);
435 if (lhs
== NULL_TREE
)
437 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
440 "Build SLP failed: not GIMPLE_ASSIGN nor "
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
444 /* Fatal mismatch. */
449 if (is_gimple_assign (stmt
)
450 && gimple_assign_rhs_code (stmt
) == COND_EXPR
451 && (cond
= gimple_assign_rhs1 (stmt
))
452 && !COMPARISON_CLASS_P (cond
))
454 if (dump_enabled_p ())
456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
457 "Build SLP failed: condition is not "
459 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
461 /* Fatal mismatch. */
466 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
467 vectype
= get_vectype_for_scalar_type (scalar_type
);
470 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
473 "Build SLP failed: unsupported data-type ");
474 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
477 /* Fatal mismatch. */
482 /* In case of multiple types we need to detect the smallest type. */
483 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
485 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
487 vectorization_factor
= *max_nunits
;
490 if (is_gimple_call (stmt
))
492 rhs_code
= CALL_EXPR
;
493 if (gimple_call_internal_p (stmt
)
494 || gimple_call_tail_p (stmt
)
495 || gimple_call_noreturn_p (stmt
)
496 || !gimple_call_nothrow_p (stmt
)
497 || gimple_call_chain (stmt
))
499 if (dump_enabled_p ())
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
502 "Build SLP failed: unsupported call type ");
503 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
505 /* Fatal mismatch. */
511 rhs_code
= gimple_assign_rhs_code (stmt
);
513 /* Check the operation. */
516 first_stmt_code
= rhs_code
;
518 /* Shift arguments should be equal in all the packed stmts for a
519 vector shift with scalar shift operand. */
520 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
521 || rhs_code
== LROTATE_EXPR
522 || rhs_code
== RROTATE_EXPR
)
524 vec_mode
= TYPE_MODE (vectype
);
526 /* First see if we have a vector/vector shift. */
527 optab
= optab_for_tree_code (rhs_code
, vectype
,
531 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
533 /* No vector/vector shift, try for a vector/scalar shift. */
534 optab
= optab_for_tree_code (rhs_code
, vectype
,
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
541 "Build SLP failed: no optab.");
542 /* Fatal mismatch. */
546 icode
= (int) optab_handler (optab
, vec_mode
);
547 if (icode
== CODE_FOR_nothing
)
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
552 "op not supported by target.");
553 /* Fatal mismatch. */
557 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
558 if (!VECTOR_MODE_P (optab_op2_mode
))
560 need_same_oprnds
= true;
561 first_op1
= gimple_assign_rhs2 (stmt
);
565 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
567 need_same_oprnds
= true;
568 first_op1
= gimple_assign_rhs2 (stmt
);
573 if (first_stmt_code
!= rhs_code
574 && (first_stmt_code
!= IMAGPART_EXPR
575 || rhs_code
!= REALPART_EXPR
)
576 && (first_stmt_code
!= REALPART_EXPR
577 || rhs_code
!= IMAGPART_EXPR
)
578 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
579 && (first_stmt_code
== ARRAY_REF
580 || first_stmt_code
== BIT_FIELD_REF
581 || first_stmt_code
== INDIRECT_REF
582 || first_stmt_code
== COMPONENT_REF
583 || first_stmt_code
== MEM_REF
)))
585 if (dump_enabled_p ())
587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
588 "Build SLP failed: different operation "
590 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
597 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
599 if (dump_enabled_p ())
601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
602 "Build SLP failed: different shift "
604 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
610 if (rhs_code
== CALL_EXPR
)
612 gimple first_stmt
= stmts
[0];
613 if (gimple_call_num_args (stmt
) != nops
614 || !operand_equal_p (gimple_call_fn (first_stmt
),
615 gimple_call_fn (stmt
), 0)
616 || gimple_call_fntype (first_stmt
)
617 != gimple_call_fntype (stmt
))
619 if (dump_enabled_p ())
621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
622 "Build SLP failed: different calls in ");
623 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
632 /* Grouped store or load. */
633 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
635 if (REFERENCE_CLASS_P (lhs
))
643 unsigned unrolling_factor
644 = least_common_multiple
645 (*max_nunits
, group_size
) / group_size
;
646 /* FORNOW: Check that there is no gap between the loads
647 and no gap between the groups when we need to load
648 multiple groups at once.
649 ??? We should enhance this to only disallow gaps
651 if ((unrolling_factor
> 1
652 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
653 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
654 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
655 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
657 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
660 "Build SLP failed: grouped "
662 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
665 /* Fatal mismatch. */
670 /* Check that the size of interleaved loads group is not
671 greater than the SLP group size. */
673 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
675 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
676 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
677 - GROUP_GAP (vinfo_for_stmt (stmt
)))
678 > ncopies
* group_size
))
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
683 "Build SLP failed: the number "
684 "of interleaved loads is greater than "
685 "the SLP group size ");
686 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
689 /* Fatal mismatch. */
694 old_first_load
= first_load
;
695 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
698 /* Check that there are no loads from different interleaving
699 chains in the same node. */
700 if (prev_first_load
!= first_load
)
702 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
706 "Build SLP failed: different "
707 "interleaving chains in one node ");
708 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
716 prev_first_load
= first_load
;
718 /* In some cases a group of loads is just the same load
719 repeated N times. Only analyze its cost once. */
720 if (first_load
== stmt
&& old_first_load
!= first_load
)
722 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
723 if (vect_supportable_dr_alignment (first_dr
, false)
724 == dr_unaligned_unsupported
)
726 if (dump_enabled_p ())
728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
730 "Build SLP failed: unsupported "
732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
735 /* Fatal mismatch. */
741 } /* Grouped access. */
744 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
746 /* Not grouped load. */
747 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
750 "Build SLP failed: not grouped load ");
751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
754 /* FORNOW: Not grouped loads are not supported. */
755 /* Fatal mismatch. */
760 /* Not memory operation. */
761 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
762 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
763 && rhs_code
!= COND_EXPR
764 && rhs_code
!= CALL_EXPR
)
766 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
769 "Build SLP failed: operation");
770 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
773 /* Fatal mismatch. */
778 if (rhs_code
== COND_EXPR
)
780 tree cond_expr
= gimple_assign_rhs1 (stmt
);
783 first_cond_code
= TREE_CODE (cond_expr
);
784 else if (first_cond_code
!= TREE_CODE (cond_expr
))
786 if (dump_enabled_p ())
788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
789 "Build SLP failed: different"
791 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
803 for (i
= 0; i
< group_size
; ++i
)
810 /* Recursively build an SLP tree starting from NODE.
811 Fail (and return a value not equal to zero) if def-stmts are not
812 isomorphic, require data permutation or are of unsupported types of
813 operation. Otherwise, return 0.
814 The value returned is the depth in the SLP tree where a mismatch
818 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
819 slp_tree
*node
, unsigned int group_size
,
820 unsigned int *max_nunits
,
821 vec
<slp_tree
> *loads
,
822 unsigned int vectorization_factor
,
823 bool *matches
, unsigned *npermutes
)
825 unsigned nops
, i
, this_npermutes
= 0;
829 matches
= XALLOCAVEC (bool, group_size
);
831 npermutes
= &this_npermutes
;
835 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
836 if (is_gimple_call (stmt
))
837 nops
= gimple_call_num_args (stmt
);
838 else if (is_gimple_assign (stmt
))
840 nops
= gimple_num_ops (stmt
) - 1;
841 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
847 if (!vect_build_slp_tree_1 (loop_vinfo
, bb_vinfo
,
848 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
849 max_nunits
, vectorization_factor
, matches
))
852 /* If the SLP node is a load, terminate the recursion. */
853 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
854 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
856 loads
->safe_push (*node
);
860 /* Get at the operands, verifying they are compatible. */
861 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
862 slp_oprnd_info oprnd_info
;
863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
865 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
,
866 stmt
, (i
== 0), &oprnds_info
))
868 vect_free_oprnd_info (oprnds_info
);
873 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
875 /* Create SLP_TREE nodes for the definition node/s. */
876 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
879 unsigned old_nloads
= loads
->length ();
880 unsigned old_max_nunits
= *max_nunits
;
882 if (oprnd_info
->first_dt
!= vect_internal_def
)
885 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
888 vect_free_oprnd_info (oprnds_info
);
892 bool *matches
= XALLOCAVEC (bool, group_size
);
893 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
894 group_size
, max_nunits
, loads
,
895 vectorization_factor
, matches
, npermutes
))
897 oprnd_info
->def_stmts
= vNULL
;
898 SLP_TREE_CHILDREN (*node
).quick_push (child
);
902 /* If the SLP build for operand zero failed and operand zero
903 and one can be commutated try that for the scalar stmts
904 that failed the match. */
906 /* A first scalar stmt mismatch signals a fatal mismatch. */
908 /* ??? For COND_EXPRs we can swap the comparison operands
909 as well as the arms under some constraints. */
911 && oprnds_info
[1]->first_dt
== vect_internal_def
912 && is_gimple_assign (stmt
)
913 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
914 /* Do so only if the number of not successful permutes was nor more
915 than a cut-ff as re-trying the recursive match on
916 possibly each level of the tree would expose exponential
921 *max_nunits
= old_max_nunits
;
922 loads
->truncate (old_nloads
);
923 /* Swap mismatched definition stmts. */
924 for (unsigned j
= 0; j
< group_size
; ++j
)
927 gimple tem
= oprnds_info
[0]->def_stmts
[j
];
928 oprnds_info
[0]->def_stmts
[j
] = oprnds_info
[1]->def_stmts
[j
];
929 oprnds_info
[1]->def_stmts
[j
] = tem
;
931 /* And try again ... */
932 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
933 group_size
, max_nunits
, loads
,
934 vectorization_factor
,
937 oprnd_info
->def_stmts
= vNULL
;
938 SLP_TREE_CHILDREN (*node
).quick_push (child
);
945 oprnd_info
->def_stmts
= vNULL
;
946 vect_free_slp_tree (child
);
947 vect_free_oprnd_info (oprnds_info
);
951 vect_free_oprnd_info (oprnds_info
);
955 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
958 vect_print_slp_tree (int dump_kind
, slp_tree node
)
967 dump_printf (dump_kind
, "node ");
968 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
970 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
971 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
973 dump_printf (dump_kind
, "\n");
975 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
976 vect_print_slp_tree (dump_kind
, child
);
980 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
981 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
982 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
983 stmts in NODE are to be marked. */
986 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
995 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
997 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1000 vect_mark_slp_stmts (child
, mark
, j
);
1004 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1007 vect_mark_slp_stmts_relevant (slp_tree node
)
1011 stmt_vec_info stmt_info
;
1017 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1019 stmt_info
= vinfo_for_stmt (stmt
);
1020 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1021 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1022 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1025 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1026 vect_mark_slp_stmts_relevant (child
);
1030 /* Rearrange the statements of NODE according to PERMUTATION. */
1033 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1034 vec
<unsigned> permutation
)
1037 vec
<gimple
> tmp_stmts
;
1041 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1042 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1044 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1045 tmp_stmts
.create (group_size
);
1046 tmp_stmts
.quick_grow_cleared (group_size
);
1048 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1049 tmp_stmts
[permutation
[i
]] = stmt
;
1051 SLP_TREE_SCALAR_STMTS (node
).release ();
1052 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1056 /* Check if the required load permutations in the SLP instance
1057 SLP_INSTN are supported. */
1060 vect_supported_load_permutation_p (slp_instance slp_instn
)
1062 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1063 unsigned int i
, j
, k
, next
;
1066 gimple stmt
, load
, next_load
, first_load
;
1067 struct data_reference
*dr
;
1069 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1072 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1073 if (node
->load_permutation
.exists ())
1074 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1075 dump_printf (MSG_NOTE
, "%d ", next
);
1077 for (i
= 0; i
< group_size
; ++i
)
1078 dump_printf (MSG_NOTE
, "%d ", i
);
1081 /* In case of reduction every load permutation is allowed, since the order
1082 of the reduction statements is not important (as opposed to the case of
1083 grouped stores). The only condition we need to check is that all the
1084 load nodes are of the same size and have the same permutation (and then
1085 rearrange all the nodes of the SLP instance according to this
1088 /* Check that all the load nodes are of the same size. */
1089 /* ??? Can't we assert this? */
1090 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1091 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1094 node
= SLP_INSTANCE_TREE (slp_instn
);
1095 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1097 /* Reduction (there are no data-refs in the root).
1098 In reduction chain the order of the loads is important. */
1099 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1100 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1105 /* Compare all the permutation sequences to the first one. We know
1106 that at least one load is permuted. */
1107 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1108 if (!node
->load_permutation
.exists ())
1110 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1112 if (!load
->load_permutation
.exists ())
1114 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1115 if (lidx
!= node
->load_permutation
[j
])
1119 /* Check that the loads in the first sequence are different and there
1120 are no gaps between them. */
1121 load_index
= sbitmap_alloc (group_size
);
1122 bitmap_clear (load_index
);
1123 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1125 if (bitmap_bit_p (load_index
, lidx
))
1127 sbitmap_free (load_index
);
1130 bitmap_set_bit (load_index
, lidx
);
1132 for (i
= 0; i
< group_size
; i
++)
1133 if (!bitmap_bit_p (load_index
, i
))
1135 sbitmap_free (load_index
);
1138 sbitmap_free (load_index
);
1140 /* This permutation is valid for reduction. Since the order of the
1141 statements in the nodes is not important unless they are memory
1142 accesses, we can rearrange the statements in all the nodes
1143 according to the order of the loads. */
1144 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1145 node
->load_permutation
);
1147 /* We are done, no actual permutations need to be generated. */
1148 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1149 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1153 /* In basic block vectorization we allow any subchain of an interleaving
1155 FORNOW: not supported in loop SLP because of realignment compications. */
1156 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1158 /* Check that for every node in the instance the loads
1160 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1163 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1165 if (j
!= 0 && next_load
!= load
)
1167 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1171 /* Check that the alignment of the first load in every subchain, i.e.,
1172 the first statement in every load node, is supported.
1173 ??? This belongs in alignment checking. */
1174 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1176 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1177 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1179 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1180 if (vect_supportable_dr_alignment (dr
, false)
1181 == dr_unaligned_unsupported
)
1183 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1187 "unsupported unaligned load ");
1188 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1196 /* We are done, no actual permutations need to be generated. */
1197 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1198 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1202 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1203 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1204 well (unless it's reduction). */
1205 if (SLP_INSTANCE_LOADS (slp_instn
).length () != group_size
)
1207 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1208 if (!node
->load_permutation
.exists ())
1211 load_index
= sbitmap_alloc (group_size
);
1212 bitmap_clear (load_index
);
1213 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1215 unsigned int lidx
= node
->load_permutation
[0];
1216 if (bitmap_bit_p (load_index
, lidx
))
1218 sbitmap_free (load_index
);
1221 bitmap_set_bit (load_index
, lidx
);
1222 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, k
)
1225 sbitmap_free (load_index
);
1229 for (i
= 0; i
< group_size
; i
++)
1230 if (!bitmap_bit_p (load_index
, i
))
1232 sbitmap_free (load_index
);
1235 sbitmap_free (load_index
);
1237 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1238 if (node
->load_permutation
.exists ()
1239 && !vect_transform_slp_perm_load
1241 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1247 /* Find the first load in the loop that belongs to INSTANCE.
1248 When loads are in several SLP nodes, there can be a case in which the first
1249 load does not appear in the first SLP node to be transformed, causing
1250 incorrect order of statements. Since we generate all the loads together,
1251 they must be inserted before the first load of the SLP instance and not
1252 before the first load of the first node of the instance. */
1255 vect_find_first_load_in_slp_instance (slp_instance instance
)
1259 gimple first_load
= NULL
, load
;
1261 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1262 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1263 first_load
= get_earlier_stmt (load
, first_load
);
1269 /* Find the last store in SLP INSTANCE. */
1272 vect_find_last_store_in_slp_instance (slp_instance instance
)
1276 gimple last_store
= NULL
, store
;
1278 node
= SLP_INSTANCE_TREE (instance
);
1279 for (i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &store
); i
++)
1280 last_store
= get_later_stmt (store
, last_store
);
1285 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1288 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1289 slp_instance instance
, slp_tree node
,
1290 stmt_vector_for_cost
*prologue_cost_vec
,
1291 unsigned ncopies_for_cost
)
1293 stmt_vector_for_cost
*body_cost_vec
= &SLP_INSTANCE_BODY_COST_VEC (instance
);
1298 stmt_vec_info stmt_info
;
1300 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1302 /* Recurse down the SLP tree. */
1303 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1304 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1305 instance
, child
, prologue_cost_vec
,
1308 /* Look at the first scalar stmt to determine the cost. */
1309 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1310 stmt_info
= vinfo_for_stmt (stmt
);
1311 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1313 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1314 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1315 vect_uninitialized_def
,
1316 node
, prologue_cost_vec
, body_cost_vec
);
1320 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1321 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1322 node
, prologue_cost_vec
, body_cost_vec
);
1323 /* If the load is permuted record the cost for the permutation.
1324 ??? Loads from multiple chains are let through here only
1325 for a single special case involving complex numbers where
1326 in the end no permutation is necessary. */
1327 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1328 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1329 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1330 && vect_get_place_in_interleaving_chain
1331 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1333 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1334 stmt_info
, 0, vect_body
);
1340 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1341 stmt_info
, 0, vect_body
);
1343 /* Scan operands and account for prologue cost of constants/externals.
1344 ??? This over-estimates cost for multiple uses and should be
1346 lhs
= gimple_get_lhs (stmt
);
1347 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1349 tree def
, op
= gimple_op (stmt
, i
);
1351 enum vect_def_type dt
;
1352 if (!op
|| op
== lhs
)
1354 if (vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
,
1355 &def_stmt
, &def
, &dt
)
1356 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
1357 record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
1358 stmt_info
, 0, vect_prologue
);
1362 /* Compute the cost for the SLP instance INSTANCE. */
1365 vect_analyze_slp_cost (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1366 slp_instance instance
, unsigned nunits
)
1368 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1369 unsigned ncopies_for_cost
;
1370 stmt_info_for_cost
*si
;
1373 /* Calculate the number of vector stmts to create based on the unrolling
1374 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1375 GROUP_SIZE / NUNITS otherwise. */
1376 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1377 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1379 prologue_cost_vec
.create (10);
1380 body_cost_vec
.create (10);
1381 SLP_INSTANCE_BODY_COST_VEC (instance
) = body_cost_vec
;
1382 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1383 instance
, SLP_INSTANCE_TREE (instance
),
1384 &prologue_cost_vec
, ncopies_for_cost
);
1386 /* Record the prologue costs, which were delayed until we were
1387 sure that SLP was successful. Unlike the body costs, we know
1388 the final values now regardless of the loop vectorization factor. */
1389 void *data
= (loop_vinfo
? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
)
1390 : BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1391 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1393 struct _stmt_vec_info
*stmt_info
1394 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1395 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1396 si
->misalign
, vect_prologue
);
1399 prologue_cost_vec
.release ();
1402 /* Analyze an SLP instance starting from a group of grouped stores. Call
1403 vect_build_slp_tree to build a tree of packed stmts if possible.
1404 Return FALSE if it's impossible to SLP any stmt in the loop. */
1407 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1410 slp_instance new_instance
;
1412 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1413 unsigned int unrolling_factor
= 1, nunits
;
1414 tree vectype
, scalar_type
= NULL_TREE
;
1416 unsigned int vectorization_factor
= 0;
1418 unsigned int max_nunits
= 0;
1419 vec
<slp_tree
> loads
;
1420 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1421 vec
<gimple
> scalar_stmts
;
1423 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1427 scalar_type
= TREE_TYPE (DR_REF (dr
));
1428 vectype
= get_vectype_for_scalar_type (scalar_type
);
1432 gcc_assert (loop_vinfo
);
1433 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1436 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1440 gcc_assert (loop_vinfo
);
1441 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1442 group_size
= LOOP_VINFO_REDUCTIONS (loop_vinfo
).length ();
1447 if (dump_enabled_p ())
1449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1450 "Build SLP failed: unsupported data-type ");
1451 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1457 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1459 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1461 vectorization_factor
= nunits
;
1463 /* Calculate the unrolling factor. */
1464 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1465 if (unrolling_factor
!= 1 && !loop_vinfo
)
1467 if (dump_enabled_p ())
1468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1469 "Build SLP failed: unrolling required in basic"
1475 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1476 scalar_stmts
.create (group_size
);
1478 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1480 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1483 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1484 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1485 scalar_stmts
.safe_push (
1486 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1488 scalar_stmts
.safe_push (next
);
1489 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1494 /* Collect reduction statements. */
1495 vec
<gimple
> reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1496 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1497 scalar_stmts
.safe_push (next
);
1500 node
= vect_create_new_slp_node (scalar_stmts
);
1502 loads
.create (group_size
);
1504 /* Build the tree for the SLP instance. */
1505 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1506 &max_nunits
, &loads
,
1507 vectorization_factor
, NULL
, NULL
))
1509 /* Calculate the unrolling factor based on the smallest type. */
1510 if (max_nunits
> nunits
)
1511 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1514 if (unrolling_factor
!= 1 && !loop_vinfo
)
1516 if (dump_enabled_p ())
1517 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1518 "Build SLP failed: unrolling required in basic"
1520 vect_free_slp_tree (node
);
1525 /* Create a new SLP instance. */
1526 new_instance
= XNEW (struct _slp_instance
);
1527 SLP_INSTANCE_TREE (new_instance
) = node
;
1528 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1529 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1530 SLP_INSTANCE_BODY_COST_VEC (new_instance
) = vNULL
;
1531 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1532 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1534 /* Compute the load permutation. */
1536 bool loads_permuted
= false;
1537 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1539 vec
<unsigned> load_permutation
;
1541 gimple load
, first_stmt
;
1542 bool this_load_permuted
= false;
1543 load_permutation
.create (group_size
);
1544 first_stmt
= GROUP_FIRST_ELEMENT
1545 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1546 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1549 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1550 gcc_assert (load_place
!= -1);
1551 if (load_place
!= j
)
1552 this_load_permuted
= true;
1553 load_permutation
.safe_push (load_place
);
1555 if (!this_load_permuted
)
1557 load_permutation
.release ();
1560 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1561 loads_permuted
= true;
1566 if (!vect_supported_load_permutation_p (new_instance
))
1568 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1571 "Build SLP failed: unsupported load "
1573 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1575 vect_free_slp_instance (new_instance
);
1579 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1580 = vect_find_first_load_in_slp_instance (new_instance
);
1583 /* Compute the costs of this SLP instance. */
1584 vect_analyze_slp_cost (loop_vinfo
, bb_vinfo
,
1585 new_instance
, TYPE_VECTOR_SUBPARTS (vectype
));
1588 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).safe_push (new_instance
);
1590 BB_VINFO_SLP_INSTANCES (bb_vinfo
).safe_push (new_instance
);
1592 if (dump_enabled_p ())
1593 vect_print_slp_tree (MSG_NOTE
, node
);
1598 /* Failed to SLP. */
1599 /* Free the allocated memory. */
1600 vect_free_slp_tree (node
);
1607 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1608 trees of packed scalar stmts if SLP is possible. */
1611 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1614 vec
<gimple
> grouped_stores
;
1615 vec
<gimple
> reductions
= vNULL
;
1616 vec
<gimple
> reduc_chains
= vNULL
;
1617 gimple first_element
;
1620 if (dump_enabled_p ())
1621 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===");
1625 grouped_stores
= LOOP_VINFO_GROUPED_STORES (loop_vinfo
);
1626 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1627 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1630 grouped_stores
= BB_VINFO_GROUPED_STORES (bb_vinfo
);
1632 /* Find SLP sequences starting from groups of grouped stores. */
1633 FOR_EACH_VEC_ELT (grouped_stores
, i
, first_element
)
1634 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1637 if (bb_vinfo
&& !ok
)
1639 if (dump_enabled_p ())
1640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1641 "Failed to SLP the basic block.");
1647 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).length () > 0)
1649 /* Find SLP sequences starting from reduction chains. */
1650 FOR_EACH_VEC_ELT (reduc_chains
, i
, first_element
)
1651 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1656 /* Don't try to vectorize SLP reductions if reduction chain was
1661 /* Find SLP sequences starting from groups of reductions. */
1662 if (loop_vinfo
&& LOOP_VINFO_REDUCTIONS (loop_vinfo
).length () > 1
1663 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, reductions
[0]))
1670 /* For each possible SLP instance decide whether to SLP it and calculate overall
1671 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1672 least one instance. */
1675 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1677 unsigned int i
, unrolling_factor
= 1;
1678 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1679 slp_instance instance
;
1680 int decided_to_slp
= 0;
1682 if (dump_enabled_p ())
1683 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ===");
1685 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1687 /* FORNOW: SLP if you can. */
1688 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1689 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1691 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1692 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1693 loop-based vectorization. Such stmts will be marked as HYBRID. */
1694 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1698 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1700 if (decided_to_slp
&& dump_enabled_p ())
1701 dump_printf_loc (MSG_NOTE
, vect_location
,
1702 "Decided to SLP %d instances. Unrolling factor %d",
1703 decided_to_slp
, unrolling_factor
);
1705 return (decided_to_slp
> 0);
1709 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1710 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1713 vect_detect_hybrid_slp_stmts (slp_tree node
)
1716 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (node
);
1717 gimple stmt
= stmts
[0];
1718 imm_use_iterator imm_iter
;
1720 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1722 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1723 struct loop
*loop
= NULL
;
1724 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1725 basic_block bb
= NULL
;
1731 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1733 bb
= BB_VINFO_BB (bb_vinfo
);
1735 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1736 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1737 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1738 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1739 if (gimple_bb (use_stmt
)
1740 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1741 || bb
== gimple_bb (use_stmt
))
1742 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1743 && !STMT_SLP_TYPE (stmt_vinfo
)
1744 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1745 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1746 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1747 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1748 == vect_reduction_def
))
1749 vect_mark_slp_stmts (node
, hybrid
, i
);
1751 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1752 vect_detect_hybrid_slp_stmts (child
);
1756 /* Find stmts that must be both vectorized and SLPed. */
1759 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1762 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1763 slp_instance instance
;
1765 if (dump_enabled_p ())
1766 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ===");
1768 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1769 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1773 /* Create and initialize a new bb_vec_info struct for BB, as well as
1774 stmt_vec_info structs for all the stmts in it. */
1777 new_bb_vec_info (basic_block bb
)
1779 bb_vec_info res
= NULL
;
1780 gimple_stmt_iterator gsi
;
1782 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1783 BB_VINFO_BB (res
) = bb
;
1785 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1787 gimple stmt
= gsi_stmt (gsi
);
1788 gimple_set_uid (stmt
, 0);
1789 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1792 BB_VINFO_GROUPED_STORES (res
).create (10);
1793 BB_VINFO_SLP_INSTANCES (res
).create (2);
1794 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802 stmts in the basic block. */
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1807 vec
<slp_instance
> slp_instances
;
1808 slp_instance instance
;
1810 gimple_stmt_iterator si
;
1816 bb
= BB_VINFO_BB (bb_vinfo
);
1818 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1820 gimple stmt
= gsi_stmt (si
);
1821 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1824 /* Free stmt_vec_info. */
1825 free_stmt_vec_info (stmt
);
1828 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1829 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1830 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
1831 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1832 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1833 vect_free_slp_instance (instance
);
1834 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
1835 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1841 /* Analyze statements contained in SLP tree node after recursively analyzing
1842 the subtree. Return TRUE if the operations are supported. */
1845 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1856 if (!vect_slp_analyze_node_operations (bb_vinfo
, child
))
1859 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1861 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1862 gcc_assert (stmt_info
);
1863 gcc_assert (PURE_SLP_STMT (stmt_info
));
1865 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1873 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1874 operations are supported. */
1877 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1879 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1880 slp_instance instance
;
1883 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
1885 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1886 SLP_INSTANCE_TREE (instance
)))
1888 vect_free_slp_instance (instance
);
1889 slp_instances
.ordered_remove (i
);
1895 if (!slp_instances
.length ())
1901 /* Check if vectorization of the basic block is profitable. */
1904 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1906 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1907 slp_instance instance
;
1909 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
1910 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
1911 unsigned int stmt_cost
;
1913 gimple_stmt_iterator si
;
1914 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1915 void *target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
1916 stmt_vec_info stmt_info
= NULL
;
1917 stmt_vector_for_cost body_cost_vec
;
1918 stmt_info_for_cost
*ci
;
1920 /* Calculate vector costs. */
1921 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1923 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
1925 FOR_EACH_VEC_ELT (body_cost_vec
, j
, ci
)
1927 stmt_info
= ci
->stmt
? vinfo_for_stmt (ci
->stmt
) : NULL
;
1928 (void) add_stmt_cost (target_cost_data
, ci
->count
, ci
->kind
,
1929 stmt_info
, ci
->misalign
, vect_body
);
1933 /* Calculate scalar cost. */
1934 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1936 stmt
= gsi_stmt (si
);
1937 stmt_info
= vinfo_for_stmt (stmt
);
1939 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1940 || !PURE_SLP_STMT (stmt_info
))
1943 if (STMT_VINFO_DATA_REF (stmt_info
))
1945 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1946 stmt_cost
= vect_get_stmt_cost (scalar_load
);
1948 stmt_cost
= vect_get_stmt_cost (scalar_store
);
1951 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
1953 scalar_cost
+= stmt_cost
;
1956 /* Complete the target-specific cost calculation. */
1957 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
1958 &vec_inside_cost
, &vec_epilogue_cost
);
1960 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
1962 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
1965 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
1967 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
1968 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
1969 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d", scalar_cost
);
1972 /* Vectorization is profitable if its cost is less than the cost of scalar
1974 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1980 /* Check if the basic block can be vectorized. */
1983 vect_slp_analyze_bb_1 (basic_block bb
)
1985 bb_vec_info bb_vinfo
;
1986 vec
<slp_instance
> slp_instances
;
1987 slp_instance instance
;
1991 bb_vinfo
= new_bb_vec_info (bb
);
1995 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1997 if (dump_enabled_p ())
1998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1999 "not vectorized: unhandled data-ref in basic "
2002 destroy_bb_vec_info (bb_vinfo
);
2006 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2010 "not vectorized: not enough data-refs in "
2013 destroy_bb_vec_info (bb_vinfo
);
2017 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2019 if (dump_enabled_p ())
2020 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2021 "not vectorized: unhandled data access in "
2024 destroy_bb_vec_info (bb_vinfo
);
2028 vect_pattern_recog (NULL
, bb_vinfo
);
2030 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2034 "not vectorized: unhandled data dependence "
2035 "in basic block.\n");
2037 destroy_bb_vec_info (bb_vinfo
);
2041 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2045 "not vectorized: bad data alignment in basic "
2048 destroy_bb_vec_info (bb_vinfo
);
2052 /* Check the SLP opportunities in the basic block, analyze and build SLP
2054 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2056 if (dump_enabled_p ())
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2058 "not vectorized: failed to find SLP opportunities "
2059 "in basic block.\n");
2061 destroy_bb_vec_info (bb_vinfo
);
2065 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2067 /* Mark all the statements that we want to vectorize as pure SLP and
2069 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2071 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2072 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2075 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2077 if (dump_enabled_p ())
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2079 "not vectorized: unsupported alignment in basic "
2081 destroy_bb_vec_info (bb_vinfo
);
2085 if (!vect_slp_analyze_operations (bb_vinfo
))
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2089 "not vectorized: bad operation in basic block.\n");
2091 destroy_bb_vec_info (bb_vinfo
);
2095 /* Cost model: check if the vectorization is worthwhile. */
2096 if (flag_vect_cost_model
2097 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2101 "not vectorized: vectorization is not "
2104 destroy_bb_vec_info (bb_vinfo
);
2108 if (dump_enabled_p ())
2109 dump_printf_loc (MSG_NOTE
, vect_location
,
2110 "Basic block will be vectorized using SLP\n");
2117 vect_slp_analyze_bb (basic_block bb
)
2119 bb_vec_info bb_vinfo
;
2121 gimple_stmt_iterator gsi
;
2122 unsigned int vector_sizes
;
2124 if (dump_enabled_p ())
2125 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2127 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2129 gimple stmt
= gsi_stmt (gsi
);
2130 if (!is_gimple_debug (stmt
)
2131 && !gimple_nop_p (stmt
)
2132 && gimple_code (stmt
) != GIMPLE_LABEL
)
2136 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2140 "not vectorized: too many instructions in "
2146 /* Autodetect first vector size we try. */
2147 current_vector_size
= 0;
2148 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2152 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2156 destroy_bb_vec_info (bb_vinfo
);
2158 vector_sizes
&= ~current_vector_size
;
2159 if (vector_sizes
== 0
2160 || current_vector_size
== 0)
2163 /* Try the next biggest vector size. */
2164 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_NOTE
, vect_location
,
2167 "***** Re-trying analysis with "
2168 "vector size %d\n", current_vector_size
);
2173 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2174 the number of created vector stmts depends on the unrolling factor).
2175 However, the actual number of vector stmts for every SLP node depends on
2176 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2177 should be updated. In this function we assume that the inside costs
2178 calculated in vect_model_xxx_cost are linear in ncopies. */
2181 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2183 unsigned int i
, j
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2184 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2185 slp_instance instance
;
2186 stmt_vector_for_cost body_cost_vec
;
2187 stmt_info_for_cost
*si
;
2188 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2190 if (dump_enabled_p ())
2191 dump_printf_loc (MSG_NOTE
, vect_location
,
2192 "=== vect_update_slp_costs_according_to_vf ===");
2194 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2196 /* We assume that costs are linear in ncopies. */
2197 int ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2199 /* Record the instance's instructions in the target cost model.
2200 This was delayed until here because the count of instructions
2201 isn't known beforehand. */
2202 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2204 FOR_EACH_VEC_ELT (body_cost_vec
, j
, si
)
2205 (void) add_stmt_cost (data
, si
->count
* ncopies
, si
->kind
,
2206 vinfo_for_stmt (si
->stmt
), si
->misalign
,
2212 /* For constant and loop invariant defs of SLP_NODE this function returns
2213 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2214 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2215 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2216 REDUC_INDEX is the index of the reduction operand in the statements, unless
2220 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2221 vec
<tree
> *vec_oprnds
,
2222 unsigned int op_num
, unsigned int number_of_vectors
,
2225 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2226 gimple stmt
= stmts
[0];
2227 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2231 unsigned j
, number_of_places_left_in_vector
;
2234 int group_size
= stmts
.length ();
2235 unsigned int vec_num
, i
;
2236 unsigned number_of_copies
= 1;
2238 voprnds
.create (number_of_vectors
);
2239 bool constant_p
, is_store
;
2240 tree neutral_op
= NULL
;
2241 enum tree_code code
= gimple_expr_code (stmt
);
2244 gimple_seq ctor_seq
= NULL
;
2246 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2247 && reduc_index
!= -1)
2249 op_num
= reduc_index
- 1;
2250 op
= gimple_op (stmt
, reduc_index
);
2251 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2252 we need either neutral operands or the original operands. See
2253 get_initial_def_for_reduction() for details. */
2256 case WIDEN_SUM_EXPR
:
2262 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2263 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2265 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2270 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2271 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2273 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2278 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2283 def_stmt
= SSA_NAME_DEF_STMT (op
);
2284 loop
= (gimple_bb (stmt
))->loop_father
;
2285 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2286 loop_preheader_edge (loop
));
2294 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2297 op
= gimple_assign_rhs1 (stmt
);
2304 if (CONSTANT_CLASS_P (op
))
2309 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2310 gcc_assert (vector_type
);
2311 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2313 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2314 created vectors. It is greater than 1 if unrolling is performed.
2316 For example, we have two scalar operands, s1 and s2 (e.g., group of
2317 strided accesses of size two), while NUNITS is four (i.e., four scalars
2318 of this type can be packed in a vector). The output vector will contain
2319 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2322 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2323 containing the operands.
2325 For example, NUNITS is four as before, and the group size is 8
2326 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2327 {s5, s6, s7, s8}. */
2329 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2331 number_of_places_left_in_vector
= nunits
;
2332 elts
= XALLOCAVEC (tree
, nunits
);
2333 for (j
= 0; j
< number_of_copies
; j
++)
2335 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2338 op
= gimple_assign_rhs1 (stmt
);
2344 if (op_num
== 0 || op_num
== 1)
2346 tree cond
= gimple_assign_rhs1 (stmt
);
2347 op
= TREE_OPERAND (cond
, op_num
);
2352 op
= gimple_assign_rhs2 (stmt
);
2354 op
= gimple_assign_rhs3 (stmt
);
2359 op
= gimple_call_arg (stmt
, op_num
);
2366 op
= gimple_op (stmt
, op_num
+ 1);
2367 /* Unlike the other binary operators, shifts/rotates have
2368 the shift count being int, instead of the same type as
2369 the lhs, so make sure the scalar is the right type if
2370 we are dealing with vectors of
2371 long long/long/short/char. */
2372 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2373 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2377 op
= gimple_op (stmt
, op_num
+ 1);
2382 if (reduc_index
!= -1)
2384 loop
= (gimple_bb (stmt
))->loop_father
;
2385 def_stmt
= SSA_NAME_DEF_STMT (op
);
2389 /* Get the def before the loop. In reduction chain we have only
2390 one initial value. */
2391 if ((j
!= (number_of_copies
- 1)
2392 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2397 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2398 loop_preheader_edge (loop
));
2401 /* Create 'vect_ = {op0,op1,...,opn}'. */
2402 number_of_places_left_in_vector
--;
2403 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2405 if (CONSTANT_CLASS_P (op
))
2407 op
= fold_unary (VIEW_CONVERT_EXPR
,
2408 TREE_TYPE (vector_type
), op
);
2409 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2414 = make_ssa_name (TREE_TYPE (vector_type
), NULL
);
2416 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
2419 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
2420 new_temp
, op
, NULL_TREE
);
2421 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2425 elts
[number_of_places_left_in_vector
] = op
;
2426 if (!CONSTANT_CLASS_P (op
))
2429 if (number_of_places_left_in_vector
== 0)
2431 number_of_places_left_in_vector
= nunits
;
2434 vec_cst
= build_vector (vector_type
, elts
);
2437 vec
<constructor_elt
, va_gc
> *v
;
2439 vec_alloc (v
, nunits
);
2440 for (k
= 0; k
< nunits
; ++k
)
2441 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2442 vec_cst
= build_constructor (vector_type
, v
);
2444 voprnds
.quick_push (vect_init_vector (stmt
, vec_cst
,
2445 vector_type
, NULL
));
2446 if (ctor_seq
!= NULL
)
2448 gimple init_stmt
= SSA_NAME_DEF_STMT (voprnds
.last ());
2449 gimple_stmt_iterator gsi
= gsi_for_stmt (init_stmt
);
2450 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2458 /* Since the vectors are created in the reverse order, we should invert
2460 vec_num
= voprnds
.length ();
2461 for (j
= vec_num
; j
!= 0; j
--)
2463 vop
= voprnds
[j
- 1];
2464 vec_oprnds
->quick_push (vop
);
2469 /* In case that VF is greater than the unrolling factor needed for the SLP
2470 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2471 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2472 to replicate the vectors. */
2473 while (number_of_vectors
> vec_oprnds
->length ())
2475 tree neutral_vec
= NULL
;
2480 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2482 vec_oprnds
->quick_push (neutral_vec
);
2486 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2487 vec_oprnds
->quick_push (vop
);
2493 /* Get vectorized definitions from SLP_NODE that contains corresponding
2494 vectorized def-stmts. */
2497 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2500 gimple vec_def_stmt
;
2503 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2505 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2507 gcc_assert (vec_def_stmt
);
2508 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2509 vec_oprnds
->quick_push (vec_oprnd
);
2514 /* Get vectorized definitions for SLP_NODE.
2515 If the scalar definitions are loop invariants or constants, collect them and
2516 call vect_get_constant_vectors() to create vector stmts.
2517 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2518 must be stored in the corresponding child of SLP_NODE, and we call
2519 vect_get_slp_vect_defs () to retrieve them. */
2522 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2523 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2526 int number_of_vects
= 0, i
;
2527 unsigned int child_index
= 0;
2528 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2529 slp_tree child
= NULL
;
2532 bool vectorized_defs
;
2534 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2535 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2537 /* For each operand we check if it has vectorized definitions in a child
2538 node or we need to create them (for invariants and constants). We
2539 check if the LHS of the first stmt of the next child matches OPRND.
2540 If it does, we found the correct child. Otherwise, we call
2541 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2542 to check this child node for the next operand. */
2543 vectorized_defs
= false;
2544 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2546 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
2548 /* We have to check both pattern and original def, if available. */
2549 gimple first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2550 gimple related
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2552 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2554 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2556 /* The number of vector defs is determined by the number of
2557 vector statements in the node from which we get those
2559 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2560 vectorized_defs
= true;
2565 if (!vectorized_defs
)
2569 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2570 /* Number of vector stmts was calculated according to LHS in
2571 vect_schedule_slp_instance (), fix it by replacing LHS with
2572 RHS, if necessary. See vect_get_smallest_scalar_type () for
2574 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2576 if (rhs_size_unit
!= lhs_size_unit
)
2578 number_of_vects
*= rhs_size_unit
;
2579 number_of_vects
/= lhs_size_unit
;
2584 /* Allocate memory for vectorized defs. */
2586 vec_defs
.create (number_of_vects
);
2588 /* For reduction defs we call vect_get_constant_vectors (), since we are
2589 looking for initial loop invariant values. */
2590 if (vectorized_defs
&& reduc_index
== -1)
2591 /* The defs are already vectorized. */
2592 vect_get_slp_vect_defs (child
, &vec_defs
);
2594 /* Build vectors from scalar defs. */
2595 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2596 number_of_vects
, reduc_index
);
2598 vec_oprnds
->quick_push (vec_defs
);
2600 /* For reductions, we only need initial values. */
2601 if (reduc_index
!= -1)
2607 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2608 building a vector of type MASK_TYPE from it) and two input vectors placed in
2609 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2610 shifting by STRIDE elements of DR_CHAIN for every copy.
2611 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2613 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2614 the created stmts must be inserted. */
2617 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2618 tree mask
, int first_vec_indx
, int second_vec_indx
,
2619 gimple_stmt_iterator
*gsi
, slp_tree node
,
2620 tree vectype
, vec
<tree
> dr_chain
,
2621 int ncopies
, int vect_stmts_counter
)
2624 gimple perm_stmt
= NULL
;
2625 stmt_vec_info next_stmt_info
;
2627 tree first_vec
, second_vec
, data_ref
;
2629 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2631 /* Initialize the vect stmts of NODE to properly insert the generated
2633 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
2634 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2635 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
2637 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2638 for (i
= 0; i
< ncopies
; i
++)
2640 first_vec
= dr_chain
[first_vec_indx
];
2641 second_vec
= dr_chain
[second_vec_indx
];
2643 /* Generate the permute statement. */
2644 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, perm_dest
,
2645 first_vec
, second_vec
, mask
);
2646 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2647 gimple_set_lhs (perm_stmt
, data_ref
);
2648 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2650 /* Store the vector statement in NODE. */
2651 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
2653 first_vec_indx
+= stride
;
2654 second_vec_indx
+= stride
;
2657 /* Mark the scalar stmt as vectorized. */
2658 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2659 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2663 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2664 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2665 representation. Check that the mask is valid and return FALSE if not.
2666 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2667 the next vector, i.e., the current first vector is not needed. */
2670 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2671 int mask_nunits
, bool only_one_vec
, int index
,
2672 unsigned char *mask
, int *current_mask_element
,
2673 bool *need_next_vector
, int *number_of_mask_fixes
,
2674 bool *mask_fixed
, bool *needs_first_vector
)
2678 /* Convert to target specific representation. */
2679 *current_mask_element
= first_mask_element
+ m
;
2680 /* Adjust the value in case it's a mask for second and third vectors. */
2681 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2683 if (*current_mask_element
< mask_nunits
)
2684 *needs_first_vector
= true;
2686 /* We have only one input vector to permute but the mask accesses values in
2687 the next vector as well. */
2688 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2690 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2693 "permutation requires at least two vectors ");
2694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2700 /* The mask requires the next vector. */
2701 if (*current_mask_element
>= mask_nunits
* 2)
2703 if (*needs_first_vector
|| *mask_fixed
)
2705 /* We either need the first vector too or have already moved to the
2706 next vector. In both cases, this permutation needs three
2708 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2711 "permutation requires at "
2712 "least three vectors ");
2713 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2719 /* We move to the next vector, dropping the first one and working with
2720 the second and the third - we need to adjust the values of the mask
2722 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2724 for (i
= 0; i
< index
; i
++)
2725 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2727 (*number_of_mask_fixes
)++;
2731 *need_next_vector
= *mask_fixed
;
2733 /* This was the last element of this mask. Start a new one. */
2734 if (index
== mask_nunits
- 1)
2736 *number_of_mask_fixes
= 1;
2737 *mask_fixed
= false;
2738 *needs_first_vector
= false;
2745 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2746 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2747 permute statements for the SLP node NODE of the SLP instance
2748 SLP_NODE_INSTANCE. */
2751 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
2752 gimple_stmt_iterator
*gsi
, int vf
,
2753 slp_instance slp_node_instance
, bool analyze_only
)
2755 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2756 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2757 tree mask_element_type
= NULL_TREE
, mask_type
;
2758 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2759 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2760 gimple next_scalar_stmt
;
2761 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2762 int first_mask_element
;
2763 int index
, unroll_factor
, current_mask_element
, ncopies
;
2764 unsigned char *mask
;
2765 bool only_one_vec
= false, need_next_vector
= false;
2766 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2767 int number_of_mask_fixes
= 1;
2768 bool mask_fixed
= false;
2769 bool needs_first_vector
= false;
2770 enum machine_mode mode
;
2772 mode
= TYPE_MODE (vectype
);
2774 if (!can_vec_perm_p (mode
, false, NULL
))
2776 if (dump_enabled_p ())
2778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2779 "no vect permute for ");
2780 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2785 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2786 same size as the vector element being permuted. */
2787 mask_element_type
= lang_hooks
.types
.type_for_mode
2788 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
2789 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2790 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2791 mask
= XALLOCAVEC (unsigned char, nunits
);
2792 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2794 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2795 unrolling factor. */
2796 orig_vec_stmts_num
= group_size
*
2797 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2798 if (orig_vec_stmts_num
== 1)
2799 only_one_vec
= true;
2801 /* Number of copies is determined by the final vectorization factor
2802 relatively to SLP_NODE_INSTANCE unrolling factor. */
2803 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2805 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2808 /* Generate permutation masks for every NODE. Number of masks for each NODE
2809 is equal to GROUP_SIZE.
2810 E.g., we have a group of three nodes with three loads from the same
2811 location in each node, and the vector size is 4. I.e., we have a
2812 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2813 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2814 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2817 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2818 The last mask is illegal since we assume two operands for permute
2819 operation, and the mask element values can't be outside that range.
2820 Hence, the last mask must be converted into {2,5,5,5}.
2821 For the first two permutations we need the first and the second input
2822 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2823 we need the second and the third vectors: {b1,c1,a2,b2} and
2829 vect_stmts_counter
= 0;
2831 first_vec_index
= vec_index
++;
2833 second_vec_index
= first_vec_index
;
2835 second_vec_index
= vec_index
++;
2837 for (j
= 0; j
< unroll_factor
; j
++)
2839 for (k
= 0; k
< group_size
; k
++)
2841 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
2842 first_mask_element
= i
+ j
* group_size
;
2843 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2844 nunits
, only_one_vec
, index
,
2845 mask
, ¤t_mask_element
,
2847 &number_of_mask_fixes
, &mask_fixed
,
2848 &needs_first_vector
))
2850 mask
[index
++] = current_mask_element
;
2852 if (index
== nunits
)
2855 if (!can_vec_perm_p (mode
, false, mask
))
2857 if (dump_enabled_p ())
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2861 "unsupported vect permute { ");
2862 for (i
= 0; i
< nunits
; ++i
)
2863 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
2865 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
2873 tree mask_vec
, *mask_elts
;
2874 mask_elts
= XALLOCAVEC (tree
, nunits
);
2875 for (l
= 0; l
< nunits
; ++l
)
2876 mask_elts
[l
] = build_int_cst (mask_element_type
,
2878 mask_vec
= build_vector (mask_type
, mask_elts
);
2880 if (need_next_vector
)
2882 first_vec_index
= second_vec_index
;
2883 second_vec_index
= vec_index
;
2887 = SLP_TREE_SCALAR_STMTS (node
)[scalar_index
++];
2889 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2890 mask_vec
, first_vec_index
, second_vec_index
,
2891 gsi
, node
, vectype
, dr_chain
,
2892 ncopies
, vect_stmts_counter
++);
2904 /* Vectorize SLP instance tree in postorder. */
2907 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2908 unsigned int vectorization_factor
)
2911 bool grouped_store
, is_store
;
2912 gimple_stmt_iterator si
;
2913 stmt_vec_info stmt_info
;
2914 unsigned int vec_stmts_size
, nunits
, group_size
;
2922 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2923 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
2925 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2926 stmt_info
= vinfo_for_stmt (stmt
);
2928 /* VECTYPE is the type of the destination. */
2929 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2930 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2931 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2933 /* For each SLP instance calculate number of vector stmts to be created
2934 for the scalar stmts in each node of the SLP tree. Number of vector
2935 elements in one vector iteration is the number of scalar elements in
2936 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2938 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2940 if (!SLP_TREE_VEC_STMTS (node
).exists ())
2942 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
2943 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2946 if (dump_enabled_p ())
2948 dump_printf_loc (MSG_NOTE
,vect_location
,
2949 "------>vectorizing SLP node starting from: ");
2950 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2953 /* Loads should be inserted before the first load. */
2954 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2955 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2956 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
2957 && SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2958 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2959 else if (is_pattern_stmt_p (stmt_info
))
2960 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2962 si
= gsi_for_stmt (stmt
);
2964 /* Stores should be inserted just before the last store. */
2965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2966 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2968 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2969 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
2970 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
2971 si
= gsi_for_stmt (last_store
);
2974 /* Mark the first element of the reduction chain as reduction to properly
2975 transform the node. In the analysis phase only the last element of the
2976 chain is marked as reduction. */
2977 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2978 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2980 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2981 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2984 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
2988 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2989 For loop vectorization this is done in vectorizable_call, but for SLP
2990 it needs to be deferred until end of vect_schedule_slp, because multiple
2991 SLP instances may refer to the same scalar stmt. */
2994 vect_remove_slp_scalar_calls (slp_tree node
)
2996 gimple stmt
, new_stmt
;
2997 gimple_stmt_iterator gsi
;
3001 stmt_vec_info stmt_info
;
3006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3007 vect_remove_slp_scalar_calls (child
);
3009 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3011 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3013 stmt_info
= vinfo_for_stmt (stmt
);
3014 if (stmt_info
== NULL
3015 || is_pattern_stmt_p (stmt_info
)
3016 || !PURE_SLP_STMT (stmt_info
))
3018 lhs
= gimple_call_lhs (stmt
);
3019 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3020 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3021 set_vinfo_for_stmt (stmt
, NULL
);
3022 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3023 gsi
= gsi_for_stmt (stmt
);
3024 gsi_replace (&gsi
, new_stmt
, false);
3025 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3029 /* Generate vector code for all SLP instances in the loop/basic block. */
3032 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3034 vec
<slp_instance
> slp_instances
;
3035 slp_instance instance
;
3037 bool is_store
= false;
3041 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3042 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3046 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3050 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3052 /* Schedule the tree of INSTANCE. */
3053 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3055 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_NOTE
, vect_location
,
3057 "vectorizing stmts using SLP.");
3060 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3062 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3065 gimple_stmt_iterator gsi
;
3067 /* Remove scalar call stmts. Do not do this for basic-block
3068 vectorization as not all uses may be vectorized.
3069 ??? Why should this be necessary? DCE should be able to
3070 remove the stmts itself.
3071 ??? For BB vectorization we can as well remove scalar
3072 stmts starting from the SLP tree root if they have no
3075 vect_remove_slp_scalar_calls (root
);
3077 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3078 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3080 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3083 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3084 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3085 /* Free the attached stmt_vec_info and remove the stmt. */
3086 gsi
= gsi_for_stmt (store
);
3087 unlink_stmt_vdef (store
);
3088 gsi_remove (&gsi
, true);
3089 release_defs (store
);
3090 free_stmt_vec_info (store
);
3098 /* Vectorize the basic block. */
3101 vect_slp_transform_bb (basic_block bb
)
3103 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3104 gimple_stmt_iterator si
;
3106 gcc_assert (bb_vinfo
);
3108 if (dump_enabled_p ())
3109 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3111 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3113 gimple stmt
= gsi_stmt (si
);
3114 stmt_vec_info stmt_info
;
3116 if (dump_enabled_p ())
3118 dump_printf_loc (MSG_NOTE
, vect_location
,
3119 "------>SLPing statement: ");
3120 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3123 stmt_info
= vinfo_for_stmt (stmt
);
3124 gcc_assert (stmt_info
);
3126 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3127 if (STMT_SLP_TYPE (stmt_info
))
3129 vect_schedule_slp (NULL
, bb_vinfo
);
3134 if (dump_enabled_p ())
3135 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3136 "BASIC BLOCK VECTORIZED\n");
3138 destroy_bb_vec_info (bb_vinfo
);