1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
33 #include "gimple-iterator.h"
34 #include "gimple-ssa.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-pass.h"
41 #include "recog.h" /* FIXME: for insn_data */
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
46 /* Extract the location of the basic block in the source code.
47 Return the basic block location if succeed and NULL if not. */
50 find_bb_location (basic_block bb
)
53 gimple_stmt_iterator si
;
58 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
61 if (gimple_location (stmt
) != UNKNOWN_LOC
)
62 return gimple_location (stmt
);
69 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
72 vect_free_slp_tree (slp_tree node
)
80 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
81 vect_free_slp_tree (child
);
83 SLP_TREE_CHILDREN (node
).release ();
84 SLP_TREE_SCALAR_STMTS (node
).release ();
85 SLP_TREE_VEC_STMTS (node
).release ();
86 SLP_TREE_LOAD_PERMUTATION (node
).release ();
92 /* Free the memory allocated for the SLP instance. */
95 vect_free_slp_instance (slp_instance instance
)
97 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
98 SLP_INSTANCE_LOADS (instance
).release ();
99 SLP_INSTANCE_BODY_COST_VEC (instance
).release ();
104 /* Create an SLP node for SCALAR_STMTS. */
107 vect_create_new_slp_node (vec
<gimple
> scalar_stmts
)
110 gimple stmt
= scalar_stmts
[0];
113 if (is_gimple_call (stmt
))
114 nops
= gimple_call_num_args (stmt
);
115 else if (is_gimple_assign (stmt
))
117 nops
= gimple_num_ops (stmt
) - 1;
118 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
124 node
= XNEW (struct _slp_tree
);
125 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_CHILDREN (node
).create (nops
);
128 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
134 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
136 static vec
<slp_oprnd_info
>
137 vect_create_oprnd_info (int nops
, int group_size
)
140 slp_oprnd_info oprnd_info
;
141 vec
<slp_oprnd_info
> oprnds_info
;
143 oprnds_info
.create (nops
);
144 for (i
= 0; i
< nops
; i
++)
146 oprnd_info
= XNEW (struct _slp_oprnd_info
);
147 oprnd_info
->def_stmts
.create (group_size
);
148 oprnd_info
->first_dt
= vect_uninitialized_def
;
149 oprnd_info
->first_op_type
= NULL_TREE
;
150 oprnd_info
->first_pattern
= false;
151 oprnds_info
.quick_push (oprnd_info
);
158 /* Free operands info. */
161 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
164 slp_oprnd_info oprnd_info
;
166 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
168 oprnd_info
->def_stmts
.release ();
169 XDELETE (oprnd_info
);
172 oprnds_info
.release ();
176 /* Find the place of the data-ref in STMT in the interleaving chain that starts
177 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
180 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
182 gimple next_stmt
= first_stmt
;
185 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
190 if (next_stmt
== stmt
)
193 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
201 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
202 they are of a valid type and that they match the defs of the first stmt of
203 the SLP group (stored in OPRNDS_INFO). */
206 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
207 gimple stmt
, bool first
,
208 vec
<slp_oprnd_info
> *oprnds_info
)
211 unsigned int i
, number_of_oprnds
;
214 enum vect_def_type dt
= vect_uninitialized_def
;
215 struct loop
*loop
= NULL
;
216 bool pattern
= false;
217 slp_oprnd_info oprnd_info
;
219 tree compare_rhs
= NULL_TREE
;
222 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
224 if (is_gimple_call (stmt
))
226 number_of_oprnds
= gimple_call_num_args (stmt
);
229 else if (is_gimple_assign (stmt
))
231 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
232 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
238 for (i
= 0; i
< number_of_oprnds
; i
++)
243 compare_rhs
= NULL_TREE
;
246 oprnd
= gimple_op (stmt
, op_idx
++);
248 oprnd_info
= (*oprnds_info
)[i
];
250 if (COMPARISON_CLASS_P (oprnd
))
252 compare_rhs
= TREE_OPERAND (oprnd
, 1);
253 oprnd
= TREE_OPERAND (oprnd
, 0);
256 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
258 || (!def_stmt
&& dt
!= vect_constant_def
))
260 if (dump_enabled_p ())
262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
263 "Build SLP failed: can't find def for ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
265 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
271 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
272 from the pattern. Check that all the stmts of the node are in the
274 if (def_stmt
&& gimple_bb (def_stmt
)
275 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
276 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
277 && gimple_code (def_stmt
) != GIMPLE_PHI
))
278 && vinfo_for_stmt (def_stmt
)
279 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
280 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
281 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
284 if (!first
&& !oprnd_info
->first_pattern
)
286 if (dump_enabled_p ())
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
289 "Build SLP failed: some of the stmts"
290 " are in a pattern, and others are not ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
298 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
299 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
301 if (dt
== vect_unknown_def_type
)
303 if (dump_enabled_p ())
304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
305 "Unsupported pattern.\n");
309 switch (gimple_code (def_stmt
))
312 def
= gimple_phi_result (def_stmt
);
316 def
= gimple_assign_lhs (def_stmt
);
320 if (dump_enabled_p ())
321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
322 "unsupported defining stmt:\n");
329 oprnd_info
->first_dt
= dt
;
330 oprnd_info
->first_pattern
= pattern
;
331 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
335 /* Not first stmt of the group, check that the def-stmt/s match
336 the def-stmt/s of the first stmt. Allow different definition
337 types for reduction chains: the first stmt must be a
338 vect_reduction_def (a phi node), and the rest
339 vect_internal_def. */
340 if (((oprnd_info
->first_dt
!= dt
341 && !(oprnd_info
->first_dt
== vect_reduction_def
342 && dt
== vect_internal_def
)
343 && !((oprnd_info
->first_dt
== vect_external_def
344 || oprnd_info
->first_dt
== vect_constant_def
)
345 && (dt
== vect_external_def
346 || dt
== vect_constant_def
)))
347 || !types_compatible_p (oprnd_info
->first_op_type
,
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
352 "Build SLP failed: different types\n");
358 /* Check the types of the definitions. */
361 case vect_constant_def
:
362 case vect_external_def
:
363 case vect_reduction_def
:
366 case vect_internal_def
:
367 oprnd_info
->def_stmts
.quick_push (def_stmt
);
371 /* FORNOW: Not supported. */
372 if (dump_enabled_p ())
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
375 "Build SLP failed: illegal type of def ");
376 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, def
);
377 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
388 /* Verify if the scalar stmts STMTS are isomorphic, require data
389 permutation or are of unsupported types of operation. Return
390 true if they are, otherwise return false and indicate in *MATCHES
391 which stmts are not isomorphic to the first one. If MATCHES[0]
392 is false then this indicates the comparison could not be
393 carried out or the stmts will never be vectorized by SLP. */
396 vect_build_slp_tree_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
397 vec
<gimple
> stmts
, unsigned int group_size
,
398 unsigned nops
, unsigned int *max_nunits
,
399 unsigned int vectorization_factor
, bool *matches
)
402 gimple stmt
= stmts
[0];
403 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
404 enum tree_code first_cond_code
= ERROR_MARK
;
406 bool need_same_oprnds
= false;
407 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
410 enum machine_mode optab_op2_mode
;
411 enum machine_mode vec_mode
;
412 struct data_reference
*first_dr
;
414 gimple first_load
= NULL
, prev_first_load
= NULL
, old_first_load
= NULL
;
417 /* For every stmt in NODE find its def stmt/s. */
418 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
422 if (dump_enabled_p ())
424 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
425 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
426 dump_printf (MSG_NOTE
, "\n");
429 /* Fail to vectorize statements marked as unvectorizable. */
430 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
435 "Build SLP failed: unvectorizable statement ");
436 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
437 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
439 /* Fatal mismatch. */
444 lhs
= gimple_get_lhs (stmt
);
445 if (lhs
== NULL_TREE
)
447 if (dump_enabled_p ())
449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
450 "Build SLP failed: not GIMPLE_ASSIGN nor "
452 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
453 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
455 /* Fatal mismatch. */
460 if (is_gimple_assign (stmt
)
461 && gimple_assign_rhs_code (stmt
) == COND_EXPR
462 && (cond
= gimple_assign_rhs1 (stmt
))
463 && !COMPARISON_CLASS_P (cond
))
465 if (dump_enabled_p ())
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
468 "Build SLP failed: condition is not "
470 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
471 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
473 /* Fatal mismatch. */
478 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
479 vectype
= get_vectype_for_scalar_type (scalar_type
);
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
485 "Build SLP failed: unsupported data-type ");
486 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
488 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
490 /* Fatal mismatch. */
495 /* In case of multiple types we need to detect the smallest type. */
496 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
498 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
500 vectorization_factor
= *max_nunits
;
503 if (is_gimple_call (stmt
))
505 rhs_code
= CALL_EXPR
;
506 if (gimple_call_internal_p (stmt
)
507 || gimple_call_tail_p (stmt
)
508 || gimple_call_noreturn_p (stmt
)
509 || !gimple_call_nothrow_p (stmt
)
510 || gimple_call_chain (stmt
))
512 if (dump_enabled_p ())
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
515 "Build SLP failed: unsupported call type ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
519 /* Fatal mismatch. */
525 rhs_code
= gimple_assign_rhs_code (stmt
);
527 /* Check the operation. */
530 first_stmt_code
= rhs_code
;
532 /* Shift arguments should be equal in all the packed stmts for a
533 vector shift with scalar shift operand. */
534 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
535 || rhs_code
== LROTATE_EXPR
536 || rhs_code
== RROTATE_EXPR
)
538 vec_mode
= TYPE_MODE (vectype
);
540 /* First see if we have a vector/vector shift. */
541 optab
= optab_for_tree_code (rhs_code
, vectype
,
545 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
547 /* No vector/vector shift, try for a vector/scalar shift. */
548 optab
= optab_for_tree_code (rhs_code
, vectype
,
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
555 "Build SLP failed: no optab.\n");
556 /* Fatal mismatch. */
560 icode
= (int) optab_handler (optab
, vec_mode
);
561 if (icode
== CODE_FOR_nothing
)
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
566 "op not supported by target.\n");
567 /* Fatal mismatch. */
571 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
572 if (!VECTOR_MODE_P (optab_op2_mode
))
574 need_same_oprnds
= true;
575 first_op1
= gimple_assign_rhs2 (stmt
);
579 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
581 need_same_oprnds
= true;
582 first_op1
= gimple_assign_rhs2 (stmt
);
587 if (first_stmt_code
!= rhs_code
588 && (first_stmt_code
!= IMAGPART_EXPR
589 || rhs_code
!= REALPART_EXPR
)
590 && (first_stmt_code
!= REALPART_EXPR
591 || rhs_code
!= IMAGPART_EXPR
)
592 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
593 && (first_stmt_code
== ARRAY_REF
594 || first_stmt_code
== BIT_FIELD_REF
595 || first_stmt_code
== INDIRECT_REF
596 || first_stmt_code
== COMPONENT_REF
597 || first_stmt_code
== MEM_REF
)))
599 if (dump_enabled_p ())
601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
602 "Build SLP failed: different operation "
604 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
605 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
612 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
617 "Build SLP failed: different shift "
619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
620 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
626 if (rhs_code
== CALL_EXPR
)
628 gimple first_stmt
= stmts
[0];
629 if (gimple_call_num_args (stmt
) != nops
630 || !operand_equal_p (gimple_call_fn (first_stmt
),
631 gimple_call_fn (stmt
), 0)
632 || gimple_call_fntype (first_stmt
)
633 != gimple_call_fntype (stmt
))
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
638 "Build SLP failed: different calls in ");
639 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
641 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
649 /* Grouped store or load. */
650 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
652 if (REFERENCE_CLASS_P (lhs
))
660 unsigned unrolling_factor
661 = least_common_multiple
662 (*max_nunits
, group_size
) / group_size
;
663 /* FORNOW: Check that there is no gap between the loads
664 and no gap between the groups when we need to load
665 multiple groups at once.
666 ??? We should enhance this to only disallow gaps
668 if ((unrolling_factor
> 1
669 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
670 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
671 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
672 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
674 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
677 "Build SLP failed: grouped "
679 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
681 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
683 /* Fatal mismatch. */
688 /* Check that the size of interleaved loads group is not
689 greater than the SLP group size. */
691 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
693 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
694 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
695 - GROUP_GAP (vinfo_for_stmt (stmt
)))
696 > ncopies
* group_size
))
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
701 "Build SLP failed: the number "
702 "of interleaved loads is greater than "
703 "the SLP group size ");
704 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
706 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
708 /* Fatal mismatch. */
713 old_first_load
= first_load
;
714 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
717 /* Check that there are no loads from different interleaving
718 chains in the same node. */
719 if (prev_first_load
!= first_load
)
721 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
725 "Build SLP failed: different "
726 "interleaving chains in one node ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
729 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
736 prev_first_load
= first_load
;
738 /* In some cases a group of loads is just the same load
739 repeated N times. Only analyze its cost once. */
740 if (first_load
== stmt
&& old_first_load
!= first_load
)
742 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
743 if (vect_supportable_dr_alignment (first_dr
, false)
744 == dr_unaligned_unsupported
)
746 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
750 "Build SLP failed: unsupported "
752 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
754 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
756 /* Fatal mismatch. */
762 } /* Grouped access. */
765 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
767 /* Not grouped load. */
768 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
771 "Build SLP failed: not grouped load ");
772 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
773 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
776 /* FORNOW: Not grouped loads are not supported. */
777 /* Fatal mismatch. */
782 /* Not memory operation. */
783 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
784 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
785 && rhs_code
!= COND_EXPR
786 && rhs_code
!= CALL_EXPR
)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
791 "Build SLP failed: operation");
792 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
793 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
794 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
796 /* Fatal mismatch. */
801 if (rhs_code
== COND_EXPR
)
803 tree cond_expr
= gimple_assign_rhs1 (stmt
);
806 first_cond_code
= TREE_CODE (cond_expr
);
807 else if (first_cond_code
!= TREE_CODE (cond_expr
))
809 if (dump_enabled_p ())
811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
812 "Build SLP failed: different"
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
816 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
827 for (i
= 0; i
< group_size
; ++i
)
834 /* Recursively build an SLP tree starting from NODE.
835 Fail (and return a value not equal to zero) if def-stmts are not
836 isomorphic, require data permutation or are of unsupported types of
837 operation. Otherwise, return 0.
838 The value returned is the depth in the SLP tree where a mismatch
842 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
843 slp_tree
*node
, unsigned int group_size
,
844 unsigned int *max_nunits
,
845 vec
<slp_tree
> *loads
,
846 unsigned int vectorization_factor
,
847 bool *matches
, unsigned *npermutes
)
849 unsigned nops
, i
, this_npermutes
= 0;
853 matches
= XALLOCAVEC (bool, group_size
);
855 npermutes
= &this_npermutes
;
859 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
860 if (is_gimple_call (stmt
))
861 nops
= gimple_call_num_args (stmt
);
862 else if (is_gimple_assign (stmt
))
864 nops
= gimple_num_ops (stmt
) - 1;
865 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
871 if (!vect_build_slp_tree_1 (loop_vinfo
, bb_vinfo
,
872 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
873 max_nunits
, vectorization_factor
, matches
))
876 /* If the SLP node is a load, terminate the recursion. */
877 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
878 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
880 loads
->safe_push (*node
);
884 /* Get at the operands, verifying they are compatible. */
885 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
886 slp_oprnd_info oprnd_info
;
887 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
889 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
,
890 stmt
, (i
== 0), &oprnds_info
))
892 vect_free_oprnd_info (oprnds_info
);
897 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
899 /* Create SLP_TREE nodes for the definition node/s. */
900 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
903 unsigned old_nloads
= loads
->length ();
904 unsigned old_max_nunits
= *max_nunits
;
906 if (oprnd_info
->first_dt
!= vect_internal_def
)
909 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
912 vect_free_oprnd_info (oprnds_info
);
916 bool *matches
= XALLOCAVEC (bool, group_size
);
917 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
918 group_size
, max_nunits
, loads
,
919 vectorization_factor
, matches
, npermutes
))
921 oprnd_info
->def_stmts
= vNULL
;
922 SLP_TREE_CHILDREN (*node
).quick_push (child
);
926 /* If the SLP build for operand zero failed and operand zero
927 and one can be commutated try that for the scalar stmts
928 that failed the match. */
930 /* A first scalar stmt mismatch signals a fatal mismatch. */
932 /* ??? For COND_EXPRs we can swap the comparison operands
933 as well as the arms under some constraints. */
935 && oprnds_info
[1]->first_dt
== vect_internal_def
936 && is_gimple_assign (stmt
)
937 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
938 /* Do so only if the number of not successful permutes was nor more
939 than a cut-ff as re-trying the recursive match on
940 possibly each level of the tree would expose exponential
945 *max_nunits
= old_max_nunits
;
946 loads
->truncate (old_nloads
);
947 /* Swap mismatched definition stmts. */
948 for (unsigned j
= 0; j
< group_size
; ++j
)
951 gimple tem
= oprnds_info
[0]->def_stmts
[j
];
952 oprnds_info
[0]->def_stmts
[j
] = oprnds_info
[1]->def_stmts
[j
];
953 oprnds_info
[1]->def_stmts
[j
] = tem
;
955 /* And try again ... */
956 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
957 group_size
, max_nunits
, loads
,
958 vectorization_factor
,
961 oprnd_info
->def_stmts
= vNULL
;
962 SLP_TREE_CHILDREN (*node
).quick_push (child
);
969 oprnd_info
->def_stmts
= vNULL
;
970 vect_free_slp_tree (child
);
971 vect_free_oprnd_info (oprnds_info
);
975 vect_free_oprnd_info (oprnds_info
);
979 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
982 vect_print_slp_tree (int dump_kind
, slp_tree node
)
991 dump_printf (dump_kind
, "node ");
992 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
994 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
995 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
997 dump_printf (dump_kind
, "\n");
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1000 vect_print_slp_tree (dump_kind
, child
);
1004 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1005 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1006 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1007 stmts in NODE are to be marked. */
1010 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1019 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1020 if (j
< 0 || i
== j
)
1021 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1023 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1024 vect_mark_slp_stmts (child
, mark
, j
);
1028 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1031 vect_mark_slp_stmts_relevant (slp_tree node
)
1035 stmt_vec_info stmt_info
;
1041 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1043 stmt_info
= vinfo_for_stmt (stmt
);
1044 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1045 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1046 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1049 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1050 vect_mark_slp_stmts_relevant (child
);
1054 /* Rearrange the statements of NODE according to PERMUTATION. */
1057 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1058 vec
<unsigned> permutation
)
1061 vec
<gimple
> tmp_stmts
;
1065 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1066 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1068 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1069 tmp_stmts
.create (group_size
);
1070 tmp_stmts
.quick_grow_cleared (group_size
);
1072 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1073 tmp_stmts
[permutation
[i
]] = stmt
;
1075 SLP_TREE_SCALAR_STMTS (node
).release ();
1076 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1080 /* Check if the required load permutations in the SLP instance
1081 SLP_INSTN are supported. */
1084 vect_supported_load_permutation_p (slp_instance slp_instn
)
1086 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1087 unsigned int i
, j
, k
, next
;
1090 gimple stmt
, load
, next_load
, first_load
;
1091 struct data_reference
*dr
;
1093 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1096 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1097 if (node
->load_permutation
.exists ())
1098 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1099 dump_printf (MSG_NOTE
, "%d ", next
);
1101 for (i
= 0; i
< group_size
; ++i
)
1102 dump_printf (MSG_NOTE
, "%d ", i
);
1103 dump_printf (MSG_NOTE
, "\n");
1106 /* In case of reduction every load permutation is allowed, since the order
1107 of the reduction statements is not important (as opposed to the case of
1108 grouped stores). The only condition we need to check is that all the
1109 load nodes are of the same size and have the same permutation (and then
1110 rearrange all the nodes of the SLP instance according to this
1113 /* Check that all the load nodes are of the same size. */
1114 /* ??? Can't we assert this? */
1115 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1116 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1119 node
= SLP_INSTANCE_TREE (slp_instn
);
1120 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1122 /* Reduction (there are no data-refs in the root).
1123 In reduction chain the order of the loads is important. */
1124 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1125 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1130 /* Compare all the permutation sequences to the first one. We know
1131 that at least one load is permuted. */
1132 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1133 if (!node
->load_permutation
.exists ())
1135 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1137 if (!load
->load_permutation
.exists ())
1139 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1140 if (lidx
!= node
->load_permutation
[j
])
1144 /* Check that the loads in the first sequence are different and there
1145 are no gaps between them. */
1146 load_index
= sbitmap_alloc (group_size
);
1147 bitmap_clear (load_index
);
1148 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1150 if (bitmap_bit_p (load_index
, lidx
))
1152 sbitmap_free (load_index
);
1155 bitmap_set_bit (load_index
, lidx
);
1157 for (i
= 0; i
< group_size
; i
++)
1158 if (!bitmap_bit_p (load_index
, i
))
1160 sbitmap_free (load_index
);
1163 sbitmap_free (load_index
);
1165 /* This permutation is valid for reduction. Since the order of the
1166 statements in the nodes is not important unless they are memory
1167 accesses, we can rearrange the statements in all the nodes
1168 according to the order of the loads. */
1169 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1170 node
->load_permutation
);
1172 /* We are done, no actual permutations need to be generated. */
1173 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1174 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1178 /* In basic block vectorization we allow any subchain of an interleaving
1180 FORNOW: not supported in loop SLP because of realignment compications. */
1181 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1183 /* Check that for every node in the instance the loads
1185 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1188 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1190 if (j
!= 0 && next_load
!= load
)
1192 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1196 /* Check that the alignment of the first load in every subchain, i.e.,
1197 the first statement in every load node, is supported.
1198 ??? This belongs in alignment checking. */
1199 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1201 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1202 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1204 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1205 if (vect_supportable_dr_alignment (dr
, false)
1206 == dr_unaligned_unsupported
)
1208 if (dump_enabled_p ())
1210 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1212 "unsupported unaligned load ");
1213 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1215 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1222 /* We are done, no actual permutations need to be generated. */
1223 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1224 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1228 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1229 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1230 well (unless it's reduction). */
1231 if (SLP_INSTANCE_LOADS (slp_instn
).length () != group_size
)
1233 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1234 if (!node
->load_permutation
.exists ())
1237 load_index
= sbitmap_alloc (group_size
);
1238 bitmap_clear (load_index
);
1239 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1241 unsigned int lidx
= node
->load_permutation
[0];
1242 if (bitmap_bit_p (load_index
, lidx
))
1244 sbitmap_free (load_index
);
1247 bitmap_set_bit (load_index
, lidx
);
1248 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, k
)
1251 sbitmap_free (load_index
);
1255 for (i
= 0; i
< group_size
; i
++)
1256 if (!bitmap_bit_p (load_index
, i
))
1258 sbitmap_free (load_index
);
1261 sbitmap_free (load_index
);
1263 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1264 if (node
->load_permutation
.exists ()
1265 && !vect_transform_slp_perm_load
1267 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1273 /* Find the first load in the loop that belongs to INSTANCE.
1274 When loads are in several SLP nodes, there can be a case in which the first
1275 load does not appear in the first SLP node to be transformed, causing
1276 incorrect order of statements. Since we generate all the loads together,
1277 they must be inserted before the first load of the SLP instance and not
1278 before the first load of the first node of the instance. */
1281 vect_find_first_load_in_slp_instance (slp_instance instance
)
1285 gimple first_load
= NULL
, load
;
1287 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1288 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1289 first_load
= get_earlier_stmt (load
, first_load
);
1295 /* Find the last store in SLP INSTANCE. */
1298 vect_find_last_store_in_slp_instance (slp_instance instance
)
1302 gimple last_store
= NULL
, store
;
1304 node
= SLP_INSTANCE_TREE (instance
);
1305 for (i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &store
); i
++)
1306 last_store
= get_later_stmt (store
, last_store
);
1311 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1314 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1315 slp_instance instance
, slp_tree node
,
1316 stmt_vector_for_cost
*prologue_cost_vec
,
1317 unsigned ncopies_for_cost
)
1319 stmt_vector_for_cost
*body_cost_vec
= &SLP_INSTANCE_BODY_COST_VEC (instance
);
1324 stmt_vec_info stmt_info
;
1326 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1328 /* Recurse down the SLP tree. */
1329 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1330 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1331 instance
, child
, prologue_cost_vec
,
1334 /* Look at the first scalar stmt to determine the cost. */
1335 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1336 stmt_info
= vinfo_for_stmt (stmt
);
1337 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1339 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1340 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1341 vect_uninitialized_def
,
1342 node
, prologue_cost_vec
, body_cost_vec
);
1346 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1347 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1348 node
, prologue_cost_vec
, body_cost_vec
);
1349 /* If the load is permuted record the cost for the permutation.
1350 ??? Loads from multiple chains are let through here only
1351 for a single special case involving complex numbers where
1352 in the end no permutation is necessary. */
1353 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1354 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1355 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1356 && vect_get_place_in_interleaving_chain
1357 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1359 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1360 stmt_info
, 0, vect_body
);
1366 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1367 stmt_info
, 0, vect_body
);
1369 /* Scan operands and account for prologue cost of constants/externals.
1370 ??? This over-estimates cost for multiple uses and should be
1372 lhs
= gimple_get_lhs (stmt
);
1373 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1375 tree def
, op
= gimple_op (stmt
, i
);
1377 enum vect_def_type dt
;
1378 if (!op
|| op
== lhs
)
1380 if (vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
,
1381 &def_stmt
, &def
, &dt
)
1382 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
1383 record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
1384 stmt_info
, 0, vect_prologue
);
1388 /* Compute the cost for the SLP instance INSTANCE. */
1391 vect_analyze_slp_cost (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1392 slp_instance instance
, unsigned nunits
)
1394 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1395 unsigned ncopies_for_cost
;
1396 stmt_info_for_cost
*si
;
1399 /* Calculate the number of vector stmts to create based on the unrolling
1400 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1401 GROUP_SIZE / NUNITS otherwise. */
1402 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1403 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1405 prologue_cost_vec
.create (10);
1406 body_cost_vec
.create (10);
1407 SLP_INSTANCE_BODY_COST_VEC (instance
) = body_cost_vec
;
1408 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1409 instance
, SLP_INSTANCE_TREE (instance
),
1410 &prologue_cost_vec
, ncopies_for_cost
);
1412 /* Record the prologue costs, which were delayed until we were
1413 sure that SLP was successful. Unlike the body costs, we know
1414 the final values now regardless of the loop vectorization factor. */
1415 void *data
= (loop_vinfo
? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
)
1416 : BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1417 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1419 struct _stmt_vec_info
*stmt_info
1420 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1421 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1422 si
->misalign
, vect_prologue
);
1425 prologue_cost_vec
.release ();
1428 /* Analyze an SLP instance starting from a group of grouped stores. Call
1429 vect_build_slp_tree to build a tree of packed stmts if possible.
1430 Return FALSE if it's impossible to SLP any stmt in the loop. */
1433 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1436 slp_instance new_instance
;
1438 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1439 unsigned int unrolling_factor
= 1, nunits
;
1440 tree vectype
, scalar_type
= NULL_TREE
;
1442 unsigned int vectorization_factor
= 0;
1444 unsigned int max_nunits
= 0;
1445 vec
<slp_tree
> loads
;
1446 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1447 vec
<gimple
> scalar_stmts
;
1449 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1453 scalar_type
= TREE_TYPE (DR_REF (dr
));
1454 vectype
= get_vectype_for_scalar_type (scalar_type
);
1458 gcc_assert (loop_vinfo
);
1459 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1462 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1466 gcc_assert (loop_vinfo
);
1467 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1468 group_size
= LOOP_VINFO_REDUCTIONS (loop_vinfo
).length ();
1473 if (dump_enabled_p ())
1475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1476 "Build SLP failed: unsupported data-type ");
1477 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1478 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1484 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1486 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1488 vectorization_factor
= nunits
;
1490 /* Calculate the unrolling factor. */
1491 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1492 if (unrolling_factor
!= 1 && !loop_vinfo
)
1494 if (dump_enabled_p ())
1495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1496 "Build SLP failed: unrolling required in basic"
1502 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1503 scalar_stmts
.create (group_size
);
1505 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1507 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1510 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1511 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1512 scalar_stmts
.safe_push (
1513 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1515 scalar_stmts
.safe_push (next
);
1516 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1521 /* Collect reduction statements. */
1522 vec
<gimple
> reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1523 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1524 scalar_stmts
.safe_push (next
);
1527 node
= vect_create_new_slp_node (scalar_stmts
);
1529 loads
.create (group_size
);
1531 /* Build the tree for the SLP instance. */
1532 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1533 &max_nunits
, &loads
,
1534 vectorization_factor
, NULL
, NULL
))
1536 /* Calculate the unrolling factor based on the smallest type. */
1537 if (max_nunits
> nunits
)
1538 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1541 if (unrolling_factor
!= 1 && !loop_vinfo
)
1543 if (dump_enabled_p ())
1544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1545 "Build SLP failed: unrolling required in basic"
1547 vect_free_slp_tree (node
);
1552 /* Create a new SLP instance. */
1553 new_instance
= XNEW (struct _slp_instance
);
1554 SLP_INSTANCE_TREE (new_instance
) = node
;
1555 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1556 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1557 SLP_INSTANCE_BODY_COST_VEC (new_instance
) = vNULL
;
1558 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1559 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1561 /* Compute the load permutation. */
1563 bool loads_permuted
= false;
1564 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1566 vec
<unsigned> load_permutation
;
1568 gimple load
, first_stmt
;
1569 bool this_load_permuted
= false;
1570 load_permutation
.create (group_size
);
1571 first_stmt
= GROUP_FIRST_ELEMENT
1572 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1573 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1576 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1577 gcc_assert (load_place
!= -1);
1578 if (load_place
!= j
)
1579 this_load_permuted
= true;
1580 load_permutation
.safe_push (load_place
);
1582 if (!this_load_permuted
)
1584 load_permutation
.release ();
1587 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1588 loads_permuted
= true;
1593 if (!vect_supported_load_permutation_p (new_instance
))
1595 if (dump_enabled_p ())
1597 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1598 "Build SLP failed: unsupported load "
1600 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1601 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1603 vect_free_slp_instance (new_instance
);
1607 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1608 = vect_find_first_load_in_slp_instance (new_instance
);
1611 /* Compute the costs of this SLP instance. */
1612 vect_analyze_slp_cost (loop_vinfo
, bb_vinfo
,
1613 new_instance
, TYPE_VECTOR_SUBPARTS (vectype
));
1616 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).safe_push (new_instance
);
1618 BB_VINFO_SLP_INSTANCES (bb_vinfo
).safe_push (new_instance
);
1620 if (dump_enabled_p ())
1621 vect_print_slp_tree (MSG_NOTE
, node
);
1626 /* Failed to SLP. */
1627 /* Free the allocated memory. */
1628 vect_free_slp_tree (node
);
1635 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1636 trees of packed scalar stmts if SLP is possible. */
1639 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1642 vec
<gimple
> grouped_stores
;
1643 vec
<gimple
> reductions
= vNULL
;
1644 vec
<gimple
> reduc_chains
= vNULL
;
1645 gimple first_element
;
1648 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
1653 grouped_stores
= LOOP_VINFO_GROUPED_STORES (loop_vinfo
);
1654 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1655 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1658 grouped_stores
= BB_VINFO_GROUPED_STORES (bb_vinfo
);
1660 /* Find SLP sequences starting from groups of grouped stores. */
1661 FOR_EACH_VEC_ELT (grouped_stores
, i
, first_element
)
1662 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1665 if (bb_vinfo
&& !ok
)
1667 if (dump_enabled_p ())
1668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1669 "Failed to SLP the basic block.\n");
1675 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).length () > 0)
1677 /* Find SLP sequences starting from reduction chains. */
1678 FOR_EACH_VEC_ELT (reduc_chains
, i
, first_element
)
1679 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1684 /* Don't try to vectorize SLP reductions if reduction chain was
1689 /* Find SLP sequences starting from groups of reductions. */
1690 if (loop_vinfo
&& LOOP_VINFO_REDUCTIONS (loop_vinfo
).length () > 1
1691 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, reductions
[0]))
1698 /* For each possible SLP instance decide whether to SLP it and calculate overall
1699 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1700 least one instance. */
1703 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1705 unsigned int i
, unrolling_factor
= 1;
1706 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1707 slp_instance instance
;
1708 int decided_to_slp
= 0;
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
1714 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1716 /* FORNOW: SLP if you can. */
1717 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1718 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1720 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1721 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1722 loop-based vectorization. Such stmts will be marked as HYBRID. */
1723 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1727 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1729 if (decided_to_slp
&& dump_enabled_p ())
1730 dump_printf_loc (MSG_NOTE
, vect_location
,
1731 "Decided to SLP %d instances. Unrolling factor %d\n",
1732 decided_to_slp
, unrolling_factor
);
1734 return (decided_to_slp
> 0);
1738 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1739 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1742 vect_detect_hybrid_slp_stmts (slp_tree node
)
1745 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (node
);
1746 gimple stmt
= stmts
[0];
1747 imm_use_iterator imm_iter
;
1749 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1751 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1752 struct loop
*loop
= NULL
;
1753 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1754 basic_block bb
= NULL
;
1760 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1762 bb
= BB_VINFO_BB (bb_vinfo
);
1764 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1765 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1766 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1767 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1768 if (gimple_bb (use_stmt
)
1769 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1770 || bb
== gimple_bb (use_stmt
))
1771 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1772 && !STMT_SLP_TYPE (stmt_vinfo
)
1773 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1774 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1775 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1776 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1777 == vect_reduction_def
))
1778 vect_mark_slp_stmts (node
, hybrid
, i
);
1780 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1781 vect_detect_hybrid_slp_stmts (child
);
1785 /* Find stmts that must be both vectorized and SLPed. */
1788 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1791 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1792 slp_instance instance
;
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
1798 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1799 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1803 /* Create and initialize a new bb_vec_info struct for BB, as well as
1804 stmt_vec_info structs for all the stmts in it. */
1807 new_bb_vec_info (basic_block bb
)
1809 bb_vec_info res
= NULL
;
1810 gimple_stmt_iterator gsi
;
1812 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1813 BB_VINFO_BB (res
) = bb
;
1815 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1817 gimple stmt
= gsi_stmt (gsi
);
1818 gimple_set_uid (stmt
, 0);
1819 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1822 BB_VINFO_GROUPED_STORES (res
).create (10);
1823 BB_VINFO_SLP_INSTANCES (res
).create (2);
1824 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
1831 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1832 stmts in the basic block. */
1835 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1837 vec
<slp_instance
> slp_instances
;
1838 slp_instance instance
;
1840 gimple_stmt_iterator si
;
1846 bb
= BB_VINFO_BB (bb_vinfo
);
1848 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1850 gimple stmt
= gsi_stmt (si
);
1851 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1854 /* Free stmt_vec_info. */
1855 free_stmt_vec_info (stmt
);
1858 vect_destroy_datarefs (NULL
, bb_vinfo
);
1859 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1860 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
1861 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1862 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1863 vect_free_slp_instance (instance
);
1864 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
1865 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1871 /* Analyze statements contained in SLP tree node after recursively analyzing
1872 the subtree. Return TRUE if the operations are supported. */
1875 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1886 if (!vect_slp_analyze_node_operations (bb_vinfo
, child
))
1889 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1891 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1892 gcc_assert (stmt_info
);
1893 gcc_assert (PURE_SLP_STMT (stmt_info
));
1895 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1903 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1904 operations are supported. */
1907 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1909 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1910 slp_instance instance
;
1913 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
1915 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1916 SLP_INSTANCE_TREE (instance
)))
1918 vect_free_slp_instance (instance
);
1919 slp_instances
.ordered_remove (i
);
1925 if (!slp_instances
.length ())
1932 /* Compute the scalar cost of the SLP node NODE and its children
1933 and return it. Do not account defs that are marked in LIFE and
1934 update LIFE according to uses of NODE. */
1937 vect_bb_slp_scalar_cost (basic_block bb
,
1938 slp_tree node
, vec
<bool, va_heap
> *life
)
1940 unsigned scalar_cost
= 0;
1945 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1948 ssa_op_iter op_iter
;
1949 def_operand_p def_p
;
1950 stmt_vec_info stmt_info
;
1955 /* If there is a non-vectorized use of the defs then the scalar
1956 stmt is kept live in which case we do not account it or any
1957 required defs in the SLP children in the scalar cost. This
1958 way we make the vectorization more costly when compared to
1960 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
1962 imm_use_iterator use_iter
;
1964 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
1965 if (gimple_code (use_stmt
) == GIMPLE_PHI
1966 || gimple_bb (use_stmt
) != bb
1967 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt
)))
1970 BREAK_FROM_IMM_USE_STMT (use_iter
);
1976 stmt_info
= vinfo_for_stmt (stmt
);
1977 if (STMT_VINFO_DATA_REF (stmt_info
))
1979 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1980 stmt_cost
= vect_get_stmt_cost (scalar_load
);
1982 stmt_cost
= vect_get_stmt_cost (scalar_store
);
1985 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
1987 scalar_cost
+= stmt_cost
;
1990 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1991 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
1996 /* Check if vectorization of the basic block is profitable. */
1999 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2001 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2002 slp_instance instance
;
2004 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2005 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2006 void *target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
2007 stmt_vec_info stmt_info
= NULL
;
2008 stmt_vector_for_cost body_cost_vec
;
2009 stmt_info_for_cost
*ci
;
2011 /* Calculate vector costs. */
2012 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2014 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2016 FOR_EACH_VEC_ELT (body_cost_vec
, j
, ci
)
2018 stmt_info
= ci
->stmt
? vinfo_for_stmt (ci
->stmt
) : NULL
;
2019 (void) add_stmt_cost (target_cost_data
, ci
->count
, ci
->kind
,
2020 stmt_info
, ci
->misalign
, vect_body
);
2024 /* Calculate scalar cost. */
2025 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2027 stack_vec
<bool, 20> life
;
2028 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2029 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2030 SLP_INSTANCE_TREE (instance
),
2034 /* Complete the target-specific cost calculation. */
2035 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2036 &vec_inside_cost
, &vec_epilogue_cost
);
2038 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2040 if (dump_enabled_p ())
2042 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2043 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2045 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2046 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2047 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2050 /* Vectorization is profitable if its cost is less than the cost of scalar
2052 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2058 /* Check if the basic block can be vectorized. */
2061 vect_slp_analyze_bb_1 (basic_block bb
)
2063 bb_vec_info bb_vinfo
;
2064 vec
<slp_instance
> slp_instances
;
2065 slp_instance instance
;
2069 bb_vinfo
= new_bb_vec_info (bb
);
2073 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2077 "not vectorized: unhandled data-ref in basic "
2080 destroy_bb_vec_info (bb_vinfo
);
2084 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2086 if (dump_enabled_p ())
2087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2088 "not vectorized: not enough data-refs in "
2091 destroy_bb_vec_info (bb_vinfo
);
2095 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2097 if (dump_enabled_p ())
2098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2099 "not vectorized: unhandled data access in "
2102 destroy_bb_vec_info (bb_vinfo
);
2106 vect_pattern_recog (NULL
, bb_vinfo
);
2108 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2110 if (dump_enabled_p ())
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2112 "not vectorized: unhandled data dependence "
2113 "in basic block.\n");
2115 destroy_bb_vec_info (bb_vinfo
);
2119 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2123 "not vectorized: bad data alignment in basic "
2126 destroy_bb_vec_info (bb_vinfo
);
2130 /* Check the SLP opportunities in the basic block, analyze and build SLP
2132 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2136 "not vectorized: failed to find SLP opportunities "
2137 "in basic block.\n");
2139 destroy_bb_vec_info (bb_vinfo
);
2143 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2145 /* Mark all the statements that we want to vectorize as pure SLP and
2147 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2149 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2150 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2153 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2157 "not vectorized: unsupported alignment in basic "
2159 destroy_bb_vec_info (bb_vinfo
);
2163 if (!vect_slp_analyze_operations (bb_vinfo
))
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2167 "not vectorized: bad operation in basic block.\n");
2169 destroy_bb_vec_info (bb_vinfo
);
2173 /* Cost model: check if the vectorization is worthwhile. */
2174 if (!unlimited_cost_model ()
2175 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2177 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2179 "not vectorized: vectorization is not "
2182 destroy_bb_vec_info (bb_vinfo
);
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_NOTE
, vect_location
,
2188 "Basic block will be vectorized using SLP\n");
2195 vect_slp_analyze_bb (basic_block bb
)
2197 bb_vec_info bb_vinfo
;
2199 gimple_stmt_iterator gsi
;
2200 unsigned int vector_sizes
;
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2205 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2207 gimple stmt
= gsi_stmt (gsi
);
2208 if (!is_gimple_debug (stmt
)
2209 && !gimple_nop_p (stmt
)
2210 && gimple_code (stmt
) != GIMPLE_LABEL
)
2214 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2218 "not vectorized: too many instructions in "
2224 /* Autodetect first vector size we try. */
2225 current_vector_size
= 0;
2226 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2230 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2234 destroy_bb_vec_info (bb_vinfo
);
2236 vector_sizes
&= ~current_vector_size
;
2237 if (vector_sizes
== 0
2238 || current_vector_size
== 0)
2241 /* Try the next biggest vector size. */
2242 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_NOTE
, vect_location
,
2245 "***** Re-trying analysis with "
2246 "vector size %d\n", current_vector_size
);
2251 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2252 the number of created vector stmts depends on the unrolling factor).
2253 However, the actual number of vector stmts for every SLP node depends on
2254 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2255 should be updated. In this function we assume that the inside costs
2256 calculated in vect_model_xxx_cost are linear in ncopies. */
2259 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2261 unsigned int i
, j
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2262 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2263 slp_instance instance
;
2264 stmt_vector_for_cost body_cost_vec
;
2265 stmt_info_for_cost
*si
;
2266 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE
, vect_location
,
2270 "=== vect_update_slp_costs_according_to_vf ===\n");
2272 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2274 /* We assume that costs are linear in ncopies. */
2275 int ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2277 /* Record the instance's instructions in the target cost model.
2278 This was delayed until here because the count of instructions
2279 isn't known beforehand. */
2280 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2282 FOR_EACH_VEC_ELT (body_cost_vec
, j
, si
)
2283 (void) add_stmt_cost (data
, si
->count
* ncopies
, si
->kind
,
2284 vinfo_for_stmt (si
->stmt
), si
->misalign
,
2290 /* For constant and loop invariant defs of SLP_NODE this function returns
2291 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2292 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2293 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2294 REDUC_INDEX is the index of the reduction operand in the statements, unless
2298 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2299 vec
<tree
> *vec_oprnds
,
2300 unsigned int op_num
, unsigned int number_of_vectors
,
2303 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2304 gimple stmt
= stmts
[0];
2305 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2309 unsigned j
, number_of_places_left_in_vector
;
2312 int group_size
= stmts
.length ();
2313 unsigned int vec_num
, i
;
2314 unsigned number_of_copies
= 1;
2316 voprnds
.create (number_of_vectors
);
2317 bool constant_p
, is_store
;
2318 tree neutral_op
= NULL
;
2319 enum tree_code code
= gimple_expr_code (stmt
);
2322 gimple_seq ctor_seq
= NULL
;
2324 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2325 && reduc_index
!= -1)
2327 op_num
= reduc_index
- 1;
2328 op
= gimple_op (stmt
, reduc_index
);
2329 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2330 we need either neutral operands or the original operands. See
2331 get_initial_def_for_reduction() for details. */
2334 case WIDEN_SUM_EXPR
:
2340 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2341 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2343 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2348 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2349 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2351 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2356 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2361 def_stmt
= SSA_NAME_DEF_STMT (op
);
2362 loop
= (gimple_bb (stmt
))->loop_father
;
2363 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2364 loop_preheader_edge (loop
));
2372 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2375 op
= gimple_assign_rhs1 (stmt
);
2382 if (CONSTANT_CLASS_P (op
))
2387 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2388 gcc_assert (vector_type
);
2389 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2391 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2392 created vectors. It is greater than 1 if unrolling is performed.
2394 For example, we have two scalar operands, s1 and s2 (e.g., group of
2395 strided accesses of size two), while NUNITS is four (i.e., four scalars
2396 of this type can be packed in a vector). The output vector will contain
2397 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2400 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2401 containing the operands.
2403 For example, NUNITS is four as before, and the group size is 8
2404 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2405 {s5, s6, s7, s8}. */
2407 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2409 number_of_places_left_in_vector
= nunits
;
2410 elts
= XALLOCAVEC (tree
, nunits
);
2411 for (j
= 0; j
< number_of_copies
; j
++)
2413 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2416 op
= gimple_assign_rhs1 (stmt
);
2422 if (op_num
== 0 || op_num
== 1)
2424 tree cond
= gimple_assign_rhs1 (stmt
);
2425 op
= TREE_OPERAND (cond
, op_num
);
2430 op
= gimple_assign_rhs2 (stmt
);
2432 op
= gimple_assign_rhs3 (stmt
);
2437 op
= gimple_call_arg (stmt
, op_num
);
2444 op
= gimple_op (stmt
, op_num
+ 1);
2445 /* Unlike the other binary operators, shifts/rotates have
2446 the shift count being int, instead of the same type as
2447 the lhs, so make sure the scalar is the right type if
2448 we are dealing with vectors of
2449 long long/long/short/char. */
2450 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2451 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2455 op
= gimple_op (stmt
, op_num
+ 1);
2460 if (reduc_index
!= -1)
2462 loop
= (gimple_bb (stmt
))->loop_father
;
2463 def_stmt
= SSA_NAME_DEF_STMT (op
);
2467 /* Get the def before the loop. In reduction chain we have only
2468 one initial value. */
2469 if ((j
!= (number_of_copies
- 1)
2470 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2475 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2476 loop_preheader_edge (loop
));
2479 /* Create 'vect_ = {op0,op1,...,opn}'. */
2480 number_of_places_left_in_vector
--;
2481 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2483 if (CONSTANT_CLASS_P (op
))
2485 op
= fold_unary (VIEW_CONVERT_EXPR
,
2486 TREE_TYPE (vector_type
), op
);
2487 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2492 = make_ssa_name (TREE_TYPE (vector_type
), NULL
);
2494 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
2497 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
2498 new_temp
, op
, NULL_TREE
);
2499 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2503 elts
[number_of_places_left_in_vector
] = op
;
2504 if (!CONSTANT_CLASS_P (op
))
2507 if (number_of_places_left_in_vector
== 0)
2509 number_of_places_left_in_vector
= nunits
;
2512 vec_cst
= build_vector (vector_type
, elts
);
2515 vec
<constructor_elt
, va_gc
> *v
;
2517 vec_alloc (v
, nunits
);
2518 for (k
= 0; k
< nunits
; ++k
)
2519 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2520 vec_cst
= build_constructor (vector_type
, v
);
2522 voprnds
.quick_push (vect_init_vector (stmt
, vec_cst
,
2523 vector_type
, NULL
));
2524 if (ctor_seq
!= NULL
)
2526 gimple init_stmt
= SSA_NAME_DEF_STMT (voprnds
.last ());
2527 gimple_stmt_iterator gsi
= gsi_for_stmt (init_stmt
);
2528 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2536 /* Since the vectors are created in the reverse order, we should invert
2538 vec_num
= voprnds
.length ();
2539 for (j
= vec_num
; j
!= 0; j
--)
2541 vop
= voprnds
[j
- 1];
2542 vec_oprnds
->quick_push (vop
);
2547 /* In case that VF is greater than the unrolling factor needed for the SLP
2548 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2549 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2550 to replicate the vectors. */
2551 while (number_of_vectors
> vec_oprnds
->length ())
2553 tree neutral_vec
= NULL
;
2558 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2560 vec_oprnds
->quick_push (neutral_vec
);
2564 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2565 vec_oprnds
->quick_push (vop
);
2571 /* Get vectorized definitions from SLP_NODE that contains corresponding
2572 vectorized def-stmts. */
2575 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2578 gimple vec_def_stmt
;
2581 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2583 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2585 gcc_assert (vec_def_stmt
);
2586 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2587 vec_oprnds
->quick_push (vec_oprnd
);
2592 /* Get vectorized definitions for SLP_NODE.
2593 If the scalar definitions are loop invariants or constants, collect them and
2594 call vect_get_constant_vectors() to create vector stmts.
2595 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2596 must be stored in the corresponding child of SLP_NODE, and we call
2597 vect_get_slp_vect_defs () to retrieve them. */
2600 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2601 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2604 int number_of_vects
= 0, i
;
2605 unsigned int child_index
= 0;
2606 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2607 slp_tree child
= NULL
;
2610 bool vectorized_defs
;
2612 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2613 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2615 /* For each operand we check if it has vectorized definitions in a child
2616 node or we need to create them (for invariants and constants). We
2617 check if the LHS of the first stmt of the next child matches OPRND.
2618 If it does, we found the correct child. Otherwise, we call
2619 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2620 to check this child node for the next operand. */
2621 vectorized_defs
= false;
2622 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2624 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
2626 /* We have to check both pattern and original def, if available. */
2627 gimple first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2628 gimple related
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2630 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2632 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2634 /* The number of vector defs is determined by the number of
2635 vector statements in the node from which we get those
2637 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2638 vectorized_defs
= true;
2643 if (!vectorized_defs
)
2647 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2648 /* Number of vector stmts was calculated according to LHS in
2649 vect_schedule_slp_instance (), fix it by replacing LHS with
2650 RHS, if necessary. See vect_get_smallest_scalar_type () for
2652 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2654 if (rhs_size_unit
!= lhs_size_unit
)
2656 number_of_vects
*= rhs_size_unit
;
2657 number_of_vects
/= lhs_size_unit
;
2662 /* Allocate memory for vectorized defs. */
2664 vec_defs
.create (number_of_vects
);
2666 /* For reduction defs we call vect_get_constant_vectors (), since we are
2667 looking for initial loop invariant values. */
2668 if (vectorized_defs
&& reduc_index
== -1)
2669 /* The defs are already vectorized. */
2670 vect_get_slp_vect_defs (child
, &vec_defs
);
2672 /* Build vectors from scalar defs. */
2673 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2674 number_of_vects
, reduc_index
);
2676 vec_oprnds
->quick_push (vec_defs
);
2678 /* For reductions, we only need initial values. */
2679 if (reduc_index
!= -1)
2685 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2686 building a vector of type MASK_TYPE from it) and two input vectors placed in
2687 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2688 shifting by STRIDE elements of DR_CHAIN for every copy.
2689 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2691 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2692 the created stmts must be inserted. */
2695 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2696 tree mask
, int first_vec_indx
, int second_vec_indx
,
2697 gimple_stmt_iterator
*gsi
, slp_tree node
,
2698 tree vectype
, vec
<tree
> dr_chain
,
2699 int ncopies
, int vect_stmts_counter
)
2702 gimple perm_stmt
= NULL
;
2703 stmt_vec_info next_stmt_info
;
2705 tree first_vec
, second_vec
, data_ref
;
2707 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2709 /* Initialize the vect stmts of NODE to properly insert the generated
2711 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
2712 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2713 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
2715 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2716 for (i
= 0; i
< ncopies
; i
++)
2718 first_vec
= dr_chain
[first_vec_indx
];
2719 second_vec
= dr_chain
[second_vec_indx
];
2721 /* Generate the permute statement. */
2722 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, perm_dest
,
2723 first_vec
, second_vec
, mask
);
2724 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2725 gimple_set_lhs (perm_stmt
, data_ref
);
2726 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2728 /* Store the vector statement in NODE. */
2729 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
2731 first_vec_indx
+= stride
;
2732 second_vec_indx
+= stride
;
2735 /* Mark the scalar stmt as vectorized. */
2736 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2737 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2741 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2742 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2743 representation. Check that the mask is valid and return FALSE if not.
2744 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2745 the next vector, i.e., the current first vector is not needed. */
2748 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2749 int mask_nunits
, bool only_one_vec
, int index
,
2750 unsigned char *mask
, int *current_mask_element
,
2751 bool *need_next_vector
, int *number_of_mask_fixes
,
2752 bool *mask_fixed
, bool *needs_first_vector
)
2756 /* Convert to target specific representation. */
2757 *current_mask_element
= first_mask_element
+ m
;
2758 /* Adjust the value in case it's a mask for second and third vectors. */
2759 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2761 if (*current_mask_element
< mask_nunits
)
2762 *needs_first_vector
= true;
2764 /* We have only one input vector to permute but the mask accesses values in
2765 the next vector as well. */
2766 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2768 if (dump_enabled_p ())
2770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2771 "permutation requires at least two vectors ");
2772 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2773 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2779 /* The mask requires the next vector. */
2780 if (*current_mask_element
>= mask_nunits
* 2)
2782 if (*needs_first_vector
|| *mask_fixed
)
2784 /* We either need the first vector too or have already moved to the
2785 next vector. In both cases, this permutation needs three
2787 if (dump_enabled_p ())
2789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2790 "permutation requires at "
2791 "least three vectors ");
2792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2793 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2799 /* We move to the next vector, dropping the first one and working with
2800 the second and the third - we need to adjust the values of the mask
2802 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2804 for (i
= 0; i
< index
; i
++)
2805 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2807 (*number_of_mask_fixes
)++;
2811 *need_next_vector
= *mask_fixed
;
2813 /* This was the last element of this mask. Start a new one. */
2814 if (index
== mask_nunits
- 1)
2816 *number_of_mask_fixes
= 1;
2817 *mask_fixed
= false;
2818 *needs_first_vector
= false;
2825 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2826 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2827 permute statements for the SLP node NODE of the SLP instance
2828 SLP_NODE_INSTANCE. */
2831 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
2832 gimple_stmt_iterator
*gsi
, int vf
,
2833 slp_instance slp_node_instance
, bool analyze_only
)
2835 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2836 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2837 tree mask_element_type
= NULL_TREE
, mask_type
;
2838 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2839 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2840 gimple next_scalar_stmt
;
2841 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2842 int first_mask_element
;
2843 int index
, unroll_factor
, current_mask_element
, ncopies
;
2844 unsigned char *mask
;
2845 bool only_one_vec
= false, need_next_vector
= false;
2846 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2847 int number_of_mask_fixes
= 1;
2848 bool mask_fixed
= false;
2849 bool needs_first_vector
= false;
2850 enum machine_mode mode
;
2852 mode
= TYPE_MODE (vectype
);
2854 if (!can_vec_perm_p (mode
, false, NULL
))
2856 if (dump_enabled_p ())
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2859 "no vect permute for ");
2860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2861 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2866 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2867 same size as the vector element being permuted. */
2868 mask_element_type
= lang_hooks
.types
.type_for_mode
2869 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
2870 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2871 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2872 mask
= XALLOCAVEC (unsigned char, nunits
);
2873 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2875 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2876 unrolling factor. */
2877 orig_vec_stmts_num
= group_size
*
2878 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2879 if (orig_vec_stmts_num
== 1)
2880 only_one_vec
= true;
2882 /* Number of copies is determined by the final vectorization factor
2883 relatively to SLP_NODE_INSTANCE unrolling factor. */
2884 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2886 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2889 /* Generate permutation masks for every NODE. Number of masks for each NODE
2890 is equal to GROUP_SIZE.
2891 E.g., we have a group of three nodes with three loads from the same
2892 location in each node, and the vector size is 4. I.e., we have a
2893 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2894 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2895 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2898 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2899 The last mask is illegal since we assume two operands for permute
2900 operation, and the mask element values can't be outside that range.
2901 Hence, the last mask must be converted into {2,5,5,5}.
2902 For the first two permutations we need the first and the second input
2903 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2904 we need the second and the third vectors: {b1,c1,a2,b2} and
2910 vect_stmts_counter
= 0;
2912 first_vec_index
= vec_index
++;
2914 second_vec_index
= first_vec_index
;
2916 second_vec_index
= vec_index
++;
2918 for (j
= 0; j
< unroll_factor
; j
++)
2920 for (k
= 0; k
< group_size
; k
++)
2922 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
2923 first_mask_element
= i
+ j
* group_size
;
2924 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2925 nunits
, only_one_vec
, index
,
2926 mask
, ¤t_mask_element
,
2928 &number_of_mask_fixes
, &mask_fixed
,
2929 &needs_first_vector
))
2931 mask
[index
++] = current_mask_element
;
2933 if (index
== nunits
)
2936 if (!can_vec_perm_p (mode
, false, mask
))
2938 if (dump_enabled_p ())
2940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2942 "unsupported vect permute { ");
2943 for (i
= 0; i
< nunits
; ++i
)
2944 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
2946 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
2954 tree mask_vec
, *mask_elts
;
2955 mask_elts
= XALLOCAVEC (tree
, nunits
);
2956 for (l
= 0; l
< nunits
; ++l
)
2957 mask_elts
[l
] = build_int_cst (mask_element_type
,
2959 mask_vec
= build_vector (mask_type
, mask_elts
);
2961 if (need_next_vector
)
2963 first_vec_index
= second_vec_index
;
2964 second_vec_index
= vec_index
;
2968 = SLP_TREE_SCALAR_STMTS (node
)[scalar_index
++];
2970 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2971 mask_vec
, first_vec_index
, second_vec_index
,
2972 gsi
, node
, vectype
, dr_chain
,
2973 ncopies
, vect_stmts_counter
++);
2985 /* Vectorize SLP instance tree in postorder. */
2988 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2989 unsigned int vectorization_factor
)
2992 bool grouped_store
, is_store
;
2993 gimple_stmt_iterator si
;
2994 stmt_vec_info stmt_info
;
2995 unsigned int vec_stmts_size
, nunits
, group_size
;
3003 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3004 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3006 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3007 stmt_info
= vinfo_for_stmt (stmt
);
3009 /* VECTYPE is the type of the destination. */
3010 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3011 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3012 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3014 /* For each SLP instance calculate number of vector stmts to be created
3015 for the scalar stmts in each node of the SLP tree. Number of vector
3016 elements in one vector iteration is the number of scalar elements in
3017 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3019 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3021 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3023 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3024 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3027 if (dump_enabled_p ())
3029 dump_printf_loc (MSG_NOTE
,vect_location
,
3030 "------>vectorizing SLP node starting from: ");
3031 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3032 dump_printf (MSG_NOTE
, "\n");
3035 /* Loads should be inserted before the first load. */
3036 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
3037 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3038 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
3039 && SLP_TREE_LOAD_PERMUTATION (node
).exists ())
3040 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
3041 else if (is_pattern_stmt_p (stmt_info
))
3042 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
3044 si
= gsi_for_stmt (stmt
);
3046 /* Stores should be inserted just before the last store. */
3047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3048 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
3050 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
3051 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
3052 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
3053 si
= gsi_for_stmt (last_store
);
3056 /* Mark the first element of the reduction chain as reduction to properly
3057 transform the node. In the analysis phase only the last element of the
3058 chain is marked as reduction. */
3059 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3060 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3062 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3063 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3066 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3070 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3071 For loop vectorization this is done in vectorizable_call, but for SLP
3072 it needs to be deferred until end of vect_schedule_slp, because multiple
3073 SLP instances may refer to the same scalar stmt. */
3076 vect_remove_slp_scalar_calls (slp_tree node
)
3078 gimple stmt
, new_stmt
;
3079 gimple_stmt_iterator gsi
;
3083 stmt_vec_info stmt_info
;
3088 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3089 vect_remove_slp_scalar_calls (child
);
3091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3093 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3095 stmt_info
= vinfo_for_stmt (stmt
);
3096 if (stmt_info
== NULL
3097 || is_pattern_stmt_p (stmt_info
)
3098 || !PURE_SLP_STMT (stmt_info
))
3100 lhs
= gimple_call_lhs (stmt
);
3101 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3102 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3103 set_vinfo_for_stmt (stmt
, NULL
);
3104 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3105 gsi
= gsi_for_stmt (stmt
);
3106 gsi_replace (&gsi
, new_stmt
, false);
3107 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3111 /* Generate vector code for all SLP instances in the loop/basic block. */
3114 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3116 vec
<slp_instance
> slp_instances
;
3117 slp_instance instance
;
3119 bool is_store
= false;
3123 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3124 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3128 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3132 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3134 /* Schedule the tree of INSTANCE. */
3135 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3137 if (dump_enabled_p ())
3138 dump_printf_loc (MSG_NOTE
, vect_location
,
3139 "vectorizing stmts using SLP.\n");
3142 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3144 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3147 gimple_stmt_iterator gsi
;
3149 /* Remove scalar call stmts. Do not do this for basic-block
3150 vectorization as not all uses may be vectorized.
3151 ??? Why should this be necessary? DCE should be able to
3152 remove the stmts itself.
3153 ??? For BB vectorization we can as well remove scalar
3154 stmts starting from the SLP tree root if they have no
3157 vect_remove_slp_scalar_calls (root
);
3159 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3160 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3162 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3165 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3166 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3167 /* Free the attached stmt_vec_info and remove the stmt. */
3168 gsi
= gsi_for_stmt (store
);
3169 unlink_stmt_vdef (store
);
3170 gsi_remove (&gsi
, true);
3171 release_defs (store
);
3172 free_stmt_vec_info (store
);
3180 /* Vectorize the basic block. */
3183 vect_slp_transform_bb (basic_block bb
)
3185 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3186 gimple_stmt_iterator si
;
3188 gcc_assert (bb_vinfo
);
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3193 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3195 gimple stmt
= gsi_stmt (si
);
3196 stmt_vec_info stmt_info
;
3198 if (dump_enabled_p ())
3200 dump_printf_loc (MSG_NOTE
, vect_location
,
3201 "------>SLPing statement: ");
3202 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3203 dump_printf (MSG_NOTE
, "\n");
3206 stmt_info
= vinfo_for_stmt (stmt
);
3207 gcc_assert (stmt_info
);
3209 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3210 if (STMT_SLP_TYPE (stmt_info
))
3212 vect_schedule_slp (NULL
, bb_vinfo
);
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_NOTE
, vect_location
,
3219 "BASIC BLOCK VECTORIZED\n");
3221 destroy_bb_vec_info (bb_vinfo
);