1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb
)
50 gimple_stmt_iterator si
;
55 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
58 if (gimple_location (stmt
) != UNKNOWN_LOC
)
59 return gimple_location (stmt
);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node
)
77 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
78 vect_free_slp_tree ((slp_tree
)child
);
80 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
82 if (SLP_TREE_VEC_STMTS (node
))
83 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance
)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
95 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
96 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
100 /* Create an SLP node for SCALAR_STMTS. */
103 vect_create_new_slp_node (VEC (gimple
, heap
) *scalar_stmts
)
105 slp_tree node
= XNEW (struct _slp_tree
);
106 gimple stmt
= VEC_index (gimple
, scalar_stmts
, 0);
109 if (is_gimple_call (stmt
))
110 nops
= gimple_call_num_args (stmt
);
111 else if (is_gimple_assign (stmt
))
113 nops
= gimple_num_ops (stmt
) - 1;
114 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
120 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
121 SLP_TREE_VEC_STMTS (node
) = NULL
;
122 SLP_TREE_CHILDREN (node
) = VEC_alloc (slp_void_p
, heap
, nops
);
123 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
124 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
130 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
132 static VEC (slp_oprnd_info
, heap
) *
133 vect_create_oprnd_info (int nops
, int group_size
)
136 slp_oprnd_info oprnd_info
;
137 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
139 oprnds_info
= VEC_alloc (slp_oprnd_info
, heap
, nops
);
140 for (i
= 0; i
< nops
; i
++)
142 oprnd_info
= XNEW (struct _slp_oprnd_info
);
143 oprnd_info
->def_stmts
= VEC_alloc (gimple
, heap
, group_size
);
144 oprnd_info
->first_dt
= vect_uninitialized_def
;
145 oprnd_info
->first_def_type
= NULL_TREE
;
146 oprnd_info
->first_const_oprnd
= NULL_TREE
;
147 oprnd_info
->first_pattern
= false;
148 VEC_quick_push (slp_oprnd_info
, oprnds_info
, oprnd_info
);
155 /* Free operands info. Free def-stmts in FREE_DEF_STMTS is true.
156 (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
157 succeds. In the later case we don't need the operands info that we used to
158 check isomorphism of the stmts, but we still need the def-stmts - they are
159 used as scalar stmts in SLP nodes. */
161 vect_free_oprnd_info (VEC (slp_oprnd_info
, heap
) **oprnds_info
,
165 slp_oprnd_info oprnd_info
;
168 FOR_EACH_VEC_ELT (slp_oprnd_info
, *oprnds_info
, i
, oprnd_info
)
169 VEC_free (gimple
, heap
, oprnd_info
->def_stmts
);
171 VEC_free (slp_oprnd_info
, heap
, *oprnds_info
);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
181 slp_tree slp_node
, gimple stmt
,
182 int ncopies_for_cost
, bool first
,
183 VEC (slp_oprnd_info
, heap
) **oprnds_info
)
186 unsigned int i
, number_of_oprnds
;
187 tree def
, def_op0
= NULL_TREE
;
189 enum vect_def_type dt
= vect_uninitialized_def
;
190 enum vect_def_type dt_op0
= vect_uninitialized_def
;
191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
192 tree lhs
= gimple_get_lhs (stmt
);
193 struct loop
*loop
= NULL
;
194 enum tree_code rhs_code
;
195 bool different_types
= false;
196 bool pattern
= false;
197 slp_oprnd_info oprnd_info
, oprnd0_info
, oprnd1_info
;
199 tree compare_rhs
= NULL_TREE
;
202 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
204 if (is_gimple_call (stmt
))
205 number_of_oprnds
= gimple_call_num_args (stmt
);
206 else if (is_gimple_assign (stmt
))
208 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
209 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
215 for (i
= 0; i
< number_of_oprnds
; i
++)
220 compare_rhs
= NULL_TREE
;
223 oprnd
= gimple_op (stmt
, op_idx
++);
225 oprnd_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, i
);
227 if (COMPARISON_CLASS_P (oprnd
))
229 compare_rhs
= TREE_OPERAND (oprnd
, 1);
230 oprnd
= TREE_OPERAND (oprnd
, 0);
233 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
235 || (!def_stmt
&& dt
!= vect_constant_def
))
237 if (vect_print_dump_info (REPORT_SLP
))
239 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
240 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
246 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
247 from the pattern. Check that all the stmts of the node are in the
249 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
250 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
251 && vinfo_for_stmt (def_stmt
)
252 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
253 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
254 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
257 if (!first
&& !oprnd_info
->first_pattern
)
259 if (vect_print_dump_info (REPORT_DETAILS
))
261 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
262 " are in a pattern, and others are not ");
263 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
269 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
270 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
272 if (dt
== vect_unknown_def_type
)
274 if (vect_print_dump_info (REPORT_DETAILS
))
275 fprintf (vect_dump
, "Unsupported pattern.");
279 switch (gimple_code (def_stmt
))
282 def
= gimple_phi_result (def_stmt
);
286 def
= gimple_assign_lhs (def_stmt
);
290 if (vect_print_dump_info (REPORT_DETAILS
))
291 fprintf (vect_dump
, "unsupported defining stmt: ");
298 oprnd_info
->first_dt
= dt
;
299 oprnd_info
->first_pattern
= pattern
;
302 oprnd_info
->first_def_type
= TREE_TYPE (def
);
303 oprnd_info
->first_const_oprnd
= NULL_TREE
;
307 oprnd_info
->first_def_type
= NULL_TREE
;
308 oprnd_info
->first_const_oprnd
= oprnd
;
315 /* Analyze costs (for the first stmt of the group only). */
316 if (REFERENCE_CLASS_P (lhs
))
318 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
321 /* Not memory operation (we don't call this function for
323 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, &dt
,
329 /* Not first stmt of the group, check that the def-stmt/s match
330 the def-stmt/s of the first stmt. Allow different definition
331 types for reduction chains: the first stmt must be a
332 vect_reduction_def (a phi node), and the rest
333 vect_internal_def. */
334 if (((oprnd_info
->first_dt
!= dt
335 && !(oprnd_info
->first_dt
== vect_reduction_def
336 && dt
== vect_internal_def
))
337 || (oprnd_info
->first_def_type
!= NULL_TREE
339 && !types_compatible_p (oprnd_info
->first_def_type
,
342 && !types_compatible_p (TREE_TYPE (oprnd_info
->first_const_oprnd
),
346 if (number_of_oprnds
!= 2)
348 if (vect_print_dump_info (REPORT_SLP
))
349 fprintf (vect_dump
, "Build SLP failed: different types ");
354 /* Try to swap operands in case of binary operation. */
356 different_types
= true;
359 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
360 if (is_gimple_assign (stmt
)
361 && (rhs_code
= gimple_assign_rhs_code (stmt
))
362 && TREE_CODE_CLASS (rhs_code
) == tcc_binary
363 && commutative_tree_code (rhs_code
)
364 && oprnd0_info
->first_dt
== dt
365 && oprnd_info
->first_dt
== dt_op0
367 && !(oprnd0_info
->first_def_type
368 && !types_compatible_p (oprnd0_info
->first_def_type
,
370 && !(oprnd_info
->first_def_type
371 && !types_compatible_p (oprnd_info
->first_def_type
,
372 TREE_TYPE (def_op0
))))
374 if (vect_print_dump_info (REPORT_SLP
))
376 fprintf (vect_dump
, "Swapping operands of ");
377 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
380 swap_tree_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
381 gimple_assign_rhs2_ptr (stmt
));
385 if (vect_print_dump_info (REPORT_SLP
))
386 fprintf (vect_dump
, "Build SLP failed: different types ");
394 /* Check the types of the definitions. */
397 case vect_constant_def
:
398 case vect_external_def
:
399 case vect_reduction_def
:
402 case vect_internal_def
:
405 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
406 oprnd1_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
408 VEC_quick_push (gimple
, oprnd1_info
->def_stmts
, def_stmt
);
410 VEC_quick_push (gimple
, oprnd0_info
->def_stmts
, def_stmt
);
413 VEC_quick_push (gimple
, oprnd_info
->def_stmts
, def_stmt
);
418 /* FORNOW: Not supported. */
419 if (vect_print_dump_info (REPORT_SLP
))
421 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
422 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
433 /* Recursively build an SLP tree starting from NODE.
434 Fail (and return FALSE) if def-stmts are not isomorphic, require data
435 permutation or are of unsupported types of operation. Otherwise, return
439 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
440 slp_tree
*node
, unsigned int group_size
,
441 int *inside_cost
, int *outside_cost
,
442 int ncopies_for_cost
, unsigned int *max_nunits
,
443 VEC (int, heap
) **load_permutation
,
444 VEC (slp_tree
, heap
) **loads
,
445 unsigned int vectorization_factor
, bool *loads_permuted
)
448 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
449 gimple stmt
= VEC_index (gimple
, stmts
, 0);
450 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
451 enum tree_code first_cond_code
= ERROR_MARK
;
453 bool stop_recursion
= false, need_same_oprnds
= false;
454 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
455 unsigned int ncopies
;
458 enum machine_mode optab_op2_mode
;
459 enum machine_mode vec_mode
;
460 struct data_reference
*first_dr
;
462 bool permutation
= false;
463 unsigned int load_place
;
464 gimple first_load
, prev_first_load
= NULL
;
465 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
467 slp_oprnd_info oprnd_info
;
470 if (is_gimple_call (stmt
))
471 nops
= gimple_call_num_args (stmt
);
472 else if (is_gimple_assign (stmt
))
474 nops
= gimple_num_ops (stmt
) - 1;
475 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
481 oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
483 /* For every stmt in NODE find its def stmt/s. */
484 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
486 if (vect_print_dump_info (REPORT_SLP
))
488 fprintf (vect_dump
, "Build SLP for ");
489 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
492 /* Fail to vectorize statements marked as unvectorizable. */
493 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
495 if (vect_print_dump_info (REPORT_SLP
))
498 "Build SLP failed: unvectorizable statement ");
499 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
502 vect_free_oprnd_info (&oprnds_info
, true);
506 lhs
= gimple_get_lhs (stmt
);
507 if (lhs
== NULL_TREE
)
509 if (vect_print_dump_info (REPORT_SLP
))
512 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
513 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
516 vect_free_oprnd_info (&oprnds_info
, true);
520 if (is_gimple_assign (stmt
)
521 && gimple_assign_rhs_code (stmt
) == COND_EXPR
522 && (cond
= gimple_assign_rhs1 (stmt
))
523 && !COMPARISON_CLASS_P (cond
))
525 if (vect_print_dump_info (REPORT_SLP
))
528 "Build SLP failed: condition is not comparison ");
529 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
532 vect_free_oprnd_info (&oprnds_info
, true);
536 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
537 vectype
= get_vectype_for_scalar_type (scalar_type
);
540 if (vect_print_dump_info (REPORT_SLP
))
542 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
543 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
546 vect_free_oprnd_info (&oprnds_info
, true);
550 /* In case of multiple types we need to detect the smallest type. */
551 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
553 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
555 vectorization_factor
= *max_nunits
;
558 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
560 if (is_gimple_call (stmt
))
561 rhs_code
= CALL_EXPR
;
563 rhs_code
= gimple_assign_rhs_code (stmt
);
565 /* Check the operation. */
568 first_stmt_code
= rhs_code
;
570 /* Shift arguments should be equal in all the packed stmts for a
571 vector shift with scalar shift operand. */
572 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
573 || rhs_code
== LROTATE_EXPR
574 || rhs_code
== RROTATE_EXPR
)
576 vec_mode
= TYPE_MODE (vectype
);
578 /* First see if we have a vector/vector shift. */
579 optab
= optab_for_tree_code (rhs_code
, vectype
,
583 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
585 /* No vector/vector shift, try for a vector/scalar shift. */
586 optab
= optab_for_tree_code (rhs_code
, vectype
,
591 if (vect_print_dump_info (REPORT_SLP
))
592 fprintf (vect_dump
, "Build SLP failed: no optab.");
593 vect_free_oprnd_info (&oprnds_info
, true);
596 icode
= (int) optab_handler (optab
, vec_mode
);
597 if (icode
== CODE_FOR_nothing
)
599 if (vect_print_dump_info (REPORT_SLP
))
600 fprintf (vect_dump
, "Build SLP failed: "
601 "op not supported by target.");
602 vect_free_oprnd_info (&oprnds_info
, true);
605 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
606 if (!VECTOR_MODE_P (optab_op2_mode
))
608 need_same_oprnds
= true;
609 first_op1
= gimple_assign_rhs2 (stmt
);
613 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
615 need_same_oprnds
= true;
616 first_op1
= gimple_assign_rhs2 (stmt
);
621 if (first_stmt_code
!= rhs_code
622 && (first_stmt_code
!= IMAGPART_EXPR
623 || rhs_code
!= REALPART_EXPR
)
624 && (first_stmt_code
!= REALPART_EXPR
625 || rhs_code
!= IMAGPART_EXPR
)
626 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
627 && (first_stmt_code
== ARRAY_REF
628 || first_stmt_code
== INDIRECT_REF
629 || first_stmt_code
== COMPONENT_REF
630 || first_stmt_code
== MEM_REF
)))
632 if (vect_print_dump_info (REPORT_SLP
))
635 "Build SLP failed: different operation in stmt ");
636 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
639 vect_free_oprnd_info (&oprnds_info
, true);
644 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
646 if (vect_print_dump_info (REPORT_SLP
))
649 "Build SLP failed: different shift arguments in ");
650 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
653 vect_free_oprnd_info (&oprnds_info
, true);
658 /* Strided store or load. */
659 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
661 if (REFERENCE_CLASS_P (lhs
))
664 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
665 stmt
, ncopies_for_cost
,
666 (i
== 0), &oprnds_info
))
668 vect_free_oprnd_info (&oprnds_info
, true);
675 /* FORNOW: Check that there is no gap between the loads. */
676 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
677 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
678 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
679 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
681 if (vect_print_dump_info (REPORT_SLP
))
683 fprintf (vect_dump
, "Build SLP failed: strided "
685 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
688 vect_free_oprnd_info (&oprnds_info
, true);
692 /* Check that the size of interleaved loads group is not
693 greater than the SLP group size. */
695 && GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
697 if (vect_print_dump_info (REPORT_SLP
))
699 fprintf (vect_dump
, "Build SLP failed: the number of "
700 "interleaved loads is greater than"
701 " the SLP group size ");
702 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
705 vect_free_oprnd_info (&oprnds_info
, true);
709 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
712 /* Check that there are no loads from different interleaving
713 chains in the same node. The only exception is complex
715 if (prev_first_load
!= first_load
716 && rhs_code
!= REALPART_EXPR
717 && rhs_code
!= IMAGPART_EXPR
)
719 if (vect_print_dump_info (REPORT_SLP
))
721 fprintf (vect_dump
, "Build SLP failed: different "
722 "interleaving chains in one node ");
723 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
726 vect_free_oprnd_info (&oprnds_info
, true);
731 prev_first_load
= first_load
;
733 if (first_load
== stmt
)
735 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
736 if (vect_supportable_dr_alignment (first_dr
, false)
737 == dr_unaligned_unsupported
)
739 if (vect_print_dump_info (REPORT_SLP
))
741 fprintf (vect_dump
, "Build SLP failed: unsupported "
743 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
746 vect_free_oprnd_info (&oprnds_info
, true);
750 /* Analyze costs (for the first stmt in the group). */
751 vect_model_load_cost (vinfo_for_stmt (stmt
),
752 ncopies_for_cost
, false, *node
);
755 /* Store the place of this load in the interleaving chain. In
756 case that permutation is needed we later decide if a specific
757 permutation is supported. */
758 load_place
= vect_get_place_in_interleaving_chain (stmt
,
763 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
765 /* We stop the tree when we reach a group of loads. */
766 stop_recursion
= true;
769 } /* Strided access. */
772 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
774 /* Not strided load. */
775 if (vect_print_dump_info (REPORT_SLP
))
777 fprintf (vect_dump
, "Build SLP failed: not strided load ");
778 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
781 /* FORNOW: Not strided loads are not supported. */
782 vect_free_oprnd_info (&oprnds_info
, true);
786 /* Not memory operation. */
787 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
788 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
789 && rhs_code
!= COND_EXPR
)
791 if (vect_print_dump_info (REPORT_SLP
))
793 fprintf (vect_dump
, "Build SLP failed: operation");
794 fprintf (vect_dump
, " unsupported ");
795 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
798 vect_free_oprnd_info (&oprnds_info
, true);
802 if (rhs_code
== COND_EXPR
)
804 tree cond_expr
= gimple_assign_rhs1 (stmt
);
807 first_cond_code
= TREE_CODE (cond_expr
);
808 else if (first_cond_code
!= TREE_CODE (cond_expr
))
810 if (vect_print_dump_info (REPORT_SLP
))
812 fprintf (vect_dump
, "Build SLP failed: different"
814 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
817 vect_free_oprnd_info (&oprnds_info
, true);
822 /* Find the def-stmts. */
823 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
824 ncopies_for_cost
, (i
== 0),
827 vect_free_oprnd_info (&oprnds_info
, true);
833 /* Add the costs of the node to the overall instance costs. */
834 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
835 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
837 /* Strided loads were reached - stop the recursion. */
840 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
844 *loads_permuted
= true;
846 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
851 /* We don't check here complex numbers chains, so we set
852 LOADS_PERMUTED for further check in
853 vect_supported_load_permutation_p. */
854 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
855 *loads_permuted
= true;
861 /* Create SLP_TREE nodes for the definition node/s. */
862 FOR_EACH_VEC_ELT (slp_oprnd_info
, oprnds_info
, i
, oprnd_info
)
866 if (oprnd_info
->first_dt
!= vect_internal_def
)
869 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
871 || !vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
, group_size
,
872 inside_cost
, outside_cost
, ncopies_for_cost
,
873 max_nunits
, load_permutation
, loads
,
874 vectorization_factor
, loads_permuted
))
877 vect_free_oprnd_info (&oprnds_info
, true);
881 VEC_quick_push (slp_void_p
, SLP_TREE_CHILDREN (*node
), child
);
884 vect_free_oprnd_info (&oprnds_info
, false);
890 vect_print_slp_tree (slp_tree node
)
899 fprintf (vect_dump
, "node ");
900 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
902 fprintf (vect_dump
, "\n\tstmt %d ", i
);
903 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
905 fprintf (vect_dump
, "\n");
907 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
908 vect_print_slp_tree ((slp_tree
) child
);
912 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
913 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
914 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
915 stmts in NODE are to be marked. */
918 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
927 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
929 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
931 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
932 vect_mark_slp_stmts ((slp_tree
) child
, mark
, j
);
936 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
939 vect_mark_slp_stmts_relevant (slp_tree node
)
943 stmt_vec_info stmt_info
;
949 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
951 stmt_info
= vinfo_for_stmt (stmt
);
952 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
953 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
954 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
957 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
958 vect_mark_slp_stmts_relevant ((slp_tree
) child
);
962 /* Check if the permutation required by the SLP INSTANCE is supported.
963 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
966 vect_supported_slp_permutation_p (slp_instance instance
)
968 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
969 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
970 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
971 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
973 slp_tree
*tmp_loads
= NULL
;
974 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
977 /* FORNOW: The only supported loads permutation is loads from the same
978 location in all the loads in the node, when the data-refs in
979 nodes of LOADS constitute an interleaving chain.
980 Sort the nodes according to the order of accesses in the chain. */
981 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
983 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
984 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
985 i
+= group_size
, j
++)
987 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
988 /* Check that the loads are all in the same interleaving chain. */
989 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
991 if (vect_print_dump_info (REPORT_DETAILS
))
993 fprintf (vect_dump
, "Build SLP failed: unsupported data "
995 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
1002 tmp_loads
[index
] = load
;
1005 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1006 for (i
= 0; i
< group_size
; i
++)
1007 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
1009 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
1010 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
1013 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
1014 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
1022 /* Rearrange the statements of NODE according to PERMUTATION. */
1025 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1026 VEC (int, heap
) *permutation
)
1029 VEC (gimple
, heap
) *tmp_stmts
;
1030 unsigned int index
, i
;
1036 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1037 vect_slp_rearrange_stmts ((slp_tree
) child
, group_size
, permutation
);
1039 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
1040 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1042 for (i
= 0; i
< group_size
; i
++)
1043 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
1045 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1047 index
= VEC_index (int, permutation
, i
);
1048 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
1051 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
1052 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1056 /* Check if the required load permutation is supported.
1057 LOAD_PERMUTATION contains a list of indices of the loads.
1058 In SLP this permutation is relative to the order of strided stores that are
1059 the base of the SLP instance. */
1062 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
1063 VEC (int, heap
) *load_permutation
)
1065 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
1066 bool supported
, bad_permutation
= false;
1068 slp_tree node
, other_complex_node
;
1069 gimple stmt
, first
= NULL
, other_node_first
, load
, next_load
, first_load
;
1070 unsigned complex_numbers
= 0;
1071 struct data_reference
*dr
;
1072 bb_vec_info bb_vinfo
;
1074 /* FORNOW: permutations are only supported in SLP. */
1078 if (vect_print_dump_info (REPORT_SLP
))
1080 fprintf (vect_dump
, "Load permutation ");
1081 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
1082 fprintf (vect_dump
, "%d ", next
);
1085 /* In case of reduction every load permutation is allowed, since the order
1086 of the reduction statements is not important (as opposed to the case of
1087 strided stores). The only condition we need to check is that all the
1088 load nodes are of the same size and have the same permutation (and then
1089 rearrange all the nodes of the SLP instance according to this
1092 /* Check that all the load nodes are of the same size. */
1093 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1095 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
1096 != (unsigned) group_size
)
1099 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1100 if (is_gimple_assign (stmt
)
1101 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
1102 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
1106 /* Complex operands can be swapped as following:
1107 real_c = real_b + real_a;
1108 imag_c = imag_a + imag_b;
1109 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1110 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1111 chains are mixed, they match the above pattern. */
1112 if (complex_numbers
)
1114 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1116 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
1122 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
1124 if (complex_numbers
!= 2)
1132 other_complex_node
= VEC_index (slp_tree
,
1133 SLP_INSTANCE_LOADS (slp_instn
), k
);
1134 other_node_first
= VEC_index (gimple
,
1135 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
1137 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1138 != other_node_first
)
1146 /* We checked that this case ok, so there is no need to proceed with
1147 permutation tests. */
1148 if (complex_numbers
== 2)
1150 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
1151 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1155 node
= SLP_INSTANCE_TREE (slp_instn
);
1156 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1157 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1158 instance, not all the loads belong to the same node or interleaving
1159 group. Hence, we need to divide them into groups according to
1161 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
1163 /* Reduction (there are no data-refs in the root).
1164 In reduction chain the order of the loads is important. */
1165 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1166 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1168 int first_group_load_index
;
1170 /* Compare all the permutation sequences to the first one. */
1171 for (i
= 1; i
< number_of_groups
; i
++)
1174 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
1176 next
= VEC_index (int, load_permutation
, j
);
1177 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1179 if (next
!= first_group_load_index
)
1181 bad_permutation
= true;
1188 if (bad_permutation
)
1192 if (!bad_permutation
)
1194 /* Check that the loads in the first sequence are different and there
1195 are no gaps between them. */
1196 load_index
= sbitmap_alloc (group_size
);
1197 sbitmap_zero (load_index
);
1198 for (k
= 0; k
< group_size
; k
++)
1200 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1201 if (TEST_BIT (load_index
, first_group_load_index
))
1203 bad_permutation
= true;
1207 SET_BIT (load_index
, first_group_load_index
);
1210 if (!bad_permutation
)
1211 for (k
= 0; k
< group_size
; k
++)
1212 if (!TEST_BIT (load_index
, k
))
1214 bad_permutation
= true;
1218 sbitmap_free (load_index
);
1221 if (!bad_permutation
)
1223 /* This permutation is valid for reduction. Since the order of the
1224 statements in the nodes is not important unless they are memory
1225 accesses, we can rearrange the statements in all the nodes
1226 according to the order of the loads. */
1227 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1229 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1234 /* In basic block vectorization we allow any subchain of an interleaving
1236 FORNOW: not supported in loop SLP because of realignment compications. */
1237 bb_vinfo
= STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
));
1238 bad_permutation
= false;
1239 /* Check that for every node in the instance teh loads form a subchain. */
1242 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1246 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1249 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
));
1251 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
)))
1253 bad_permutation
= true;
1257 if (j
!= 0 && next_load
!= load
)
1259 bad_permutation
= true;
1263 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1266 if (bad_permutation
)
1270 /* Check that the alignment of the first load in every subchain, i.e.,
1271 the first statement in every load node, is supported. */
1272 if (!bad_permutation
)
1274 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1276 first_load
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1278 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1280 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1281 if (vect_supportable_dr_alignment (dr
, false)
1282 == dr_unaligned_unsupported
)
1284 if (vect_print_dump_info (REPORT_SLP
))
1286 fprintf (vect_dump
, "unsupported unaligned load ");
1287 print_gimple_stmt (vect_dump
, first_load
, 0,
1290 bad_permutation
= true;
1296 if (!bad_permutation
)
1298 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1304 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1305 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1306 well (unless it's reduction). */
1307 if (VEC_length (int, load_permutation
)
1308 != (unsigned int) (group_size
* group_size
))
1312 load_index
= sbitmap_alloc (group_size
);
1313 sbitmap_zero (load_index
);
1314 for (j
= 0; j
< group_size
; j
++)
1316 for (i
= j
* group_size
, k
= 0;
1317 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1320 if (i
!= j
* group_size
&& next
!= prev
)
1329 if (TEST_BIT (load_index
, prev
))
1335 SET_BIT (load_index
, prev
);
1338 for (j
= 0; j
< group_size
; j
++)
1339 if (!TEST_BIT (load_index
, j
))
1342 sbitmap_free (load_index
);
1344 if (supported
&& i
== group_size
* group_size
1345 && vect_supported_slp_permutation_p (slp_instn
))
1352 /* Find the first load in the loop that belongs to INSTANCE.
1353 When loads are in several SLP nodes, there can be a case in which the first
1354 load does not appear in the first SLP node to be transformed, causing
1355 incorrect order of statements. Since we generate all the loads together,
1356 they must be inserted before the first load of the SLP instance and not
1357 before the first load of the first node of the instance. */
1360 vect_find_first_load_in_slp_instance (slp_instance instance
)
1364 gimple first_load
= NULL
, load
;
1366 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1367 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1368 first_load
= get_earlier_stmt (load
, first_load
);
1374 /* Find the last store in SLP INSTANCE. */
1377 vect_find_last_store_in_slp_instance (slp_instance instance
)
1381 gimple last_store
= NULL
, store
;
1383 node
= SLP_INSTANCE_TREE (instance
);
1385 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1387 last_store
= get_later_stmt (store
, last_store
);
1393 /* Analyze an SLP instance starting from a group of strided stores. Call
1394 vect_build_slp_tree to build a tree of packed stmts if possible.
1395 Return FALSE if it's impossible to SLP any stmt in the loop. */
1398 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1401 slp_instance new_instance
;
1403 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1404 unsigned int unrolling_factor
= 1, nunits
;
1405 tree vectype
, scalar_type
= NULL_TREE
;
1407 unsigned int vectorization_factor
= 0;
1408 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1409 unsigned int max_nunits
= 0;
1410 VEC (int, heap
) *load_permutation
;
1411 VEC (slp_tree
, heap
) *loads
;
1412 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1413 bool loads_permuted
= false;
1414 VEC (gimple
, heap
) *scalar_stmts
;
1416 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1420 scalar_type
= TREE_TYPE (DR_REF (dr
));
1421 vectype
= get_vectype_for_scalar_type (scalar_type
);
1425 gcc_assert (loop_vinfo
);
1426 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1429 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1433 gcc_assert (loop_vinfo
);
1434 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1435 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1440 if (vect_print_dump_info (REPORT_SLP
))
1442 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1443 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1449 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1451 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1453 vectorization_factor
= nunits
;
1455 /* Calculate the unrolling factor. */
1456 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1457 if (unrolling_factor
!= 1 && !loop_vinfo
)
1459 if (vect_print_dump_info (REPORT_SLP
))
1460 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1466 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1467 scalar_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1469 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1471 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1474 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1475 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1476 VEC_safe_push (gimple
, heap
, scalar_stmts
,
1477 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1479 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1480 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1485 /* Collect reduction statements. */
1486 VEC (gimple
, heap
) *reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1487 for (i
= 0; VEC_iterate (gimple
, reductions
, i
, next
); i
++)
1488 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1491 node
= vect_create_new_slp_node (scalar_stmts
);
1493 /* Calculate the number of vector stmts to create based on the unrolling
1494 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1495 GROUP_SIZE / NUNITS otherwise. */
1496 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1498 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1499 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1501 /* Build the tree for the SLP instance. */
1502 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1503 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1504 &max_nunits
, &load_permutation
, &loads
,
1505 vectorization_factor
, &loads_permuted
))
1507 /* Calculate the unrolling factor based on the smallest type. */
1508 if (max_nunits
> nunits
)
1509 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1512 if (unrolling_factor
!= 1 && !loop_vinfo
)
1514 if (vect_print_dump_info (REPORT_SLP
))
1515 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1520 /* Create a new SLP instance. */
1521 new_instance
= XNEW (struct _slp_instance
);
1522 SLP_INSTANCE_TREE (new_instance
) = node
;
1523 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1524 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1525 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1526 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1527 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1528 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1529 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1533 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1536 if (vect_print_dump_info (REPORT_SLP
))
1538 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1540 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1543 vect_free_slp_instance (new_instance
);
1547 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1548 = vect_find_first_load_in_slp_instance (new_instance
);
1551 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1554 VEC_safe_push (slp_instance
, heap
,
1555 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1558 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1561 if (vect_print_dump_info (REPORT_SLP
))
1562 vect_print_slp_tree (node
);
1567 /* Failed to SLP. */
1568 /* Free the allocated memory. */
1569 vect_free_slp_tree (node
);
1570 VEC_free (int, heap
, load_permutation
);
1571 VEC_free (slp_tree
, heap
, loads
);
1577 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1578 trees of packed scalar stmts if SLP is possible. */
1581 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1584 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1585 gimple first_element
;
1588 if (vect_print_dump_info (REPORT_SLP
))
1589 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1593 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1594 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1595 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1598 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1600 /* Find SLP sequences starting from groups of strided stores. */
1601 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1602 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1605 if (bb_vinfo
&& !ok
)
1607 if (vect_print_dump_info (REPORT_SLP
))
1608 fprintf (vect_dump
, "Failed to SLP the basic block.");
1614 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1616 /* Find SLP sequences starting from reduction chains. */
1617 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1618 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1623 /* Don't try to vectorize SLP reductions if reduction chain was
1628 /* Find SLP sequences starting from groups of reductions. */
1629 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1630 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1631 VEC_index (gimple
, reductions
, 0)))
1638 /* For each possible SLP instance decide whether to SLP it and calculate overall
1639 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1640 least one instance. */
1643 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1645 unsigned int i
, unrolling_factor
= 1;
1646 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1647 slp_instance instance
;
1648 int decided_to_slp
= 0;
1650 if (vect_print_dump_info (REPORT_SLP
))
1651 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1653 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1655 /* FORNOW: SLP if you can. */
1656 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1657 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1659 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1660 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1661 loop-based vectorization. Such stmts will be marked as HYBRID. */
1662 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1666 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1668 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1669 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1670 decided_to_slp
, unrolling_factor
);
1672 return (decided_to_slp
> 0);
1676 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1677 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1680 vect_detect_hybrid_slp_stmts (slp_tree node
)
1684 imm_use_iterator imm_iter
;
1686 stmt_vec_info stmt_vinfo
;
1692 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1693 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1694 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1695 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1696 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1697 && !STMT_SLP_TYPE (stmt_vinfo
)
1698 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1699 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1700 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1701 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1702 == vect_reduction_def
))
1703 vect_mark_slp_stmts (node
, hybrid
, i
);
1705 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1706 vect_detect_hybrid_slp_stmts ((slp_tree
) child
);
1710 /* Find stmts that must be both vectorized and SLPed. */
1713 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1716 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1717 slp_instance instance
;
1719 if (vect_print_dump_info (REPORT_SLP
))
1720 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1722 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1723 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1727 /* Create and initialize a new bb_vec_info struct for BB, as well as
1728 stmt_vec_info structs for all the stmts in it. */
1731 new_bb_vec_info (basic_block bb
)
1733 bb_vec_info res
= NULL
;
1734 gimple_stmt_iterator gsi
;
1736 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1737 BB_VINFO_BB (res
) = bb
;
1739 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1741 gimple stmt
= gsi_stmt (gsi
);
1742 gimple_set_uid (stmt
, 0);
1743 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1746 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1747 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1754 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1755 stmts in the basic block. */
1758 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1761 gimple_stmt_iterator si
;
1766 bb
= BB_VINFO_BB (bb_vinfo
);
1768 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1770 gimple stmt
= gsi_stmt (si
);
1771 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1774 /* Free stmt_vec_info. */
1775 free_stmt_vec_info (stmt
);
1778 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1779 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1780 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1781 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1787 /* Analyze statements contained in SLP tree node after recursively analyzing
1788 the subtree. Return TRUE if the operations are supported. */
1791 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1801 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1802 if (!vect_slp_analyze_node_operations (bb_vinfo
, (slp_tree
) child
))
1805 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1807 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1808 gcc_assert (stmt_info
);
1809 gcc_assert (PURE_SLP_STMT (stmt_info
));
1811 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1819 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1820 operations are supported. */
1823 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1825 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1826 slp_instance instance
;
1829 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1831 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1832 SLP_INSTANCE_TREE (instance
)))
1834 vect_free_slp_instance (instance
);
1835 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1841 if (!VEC_length (slp_instance
, slp_instances
))
1847 /* Check if vectorization of the basic block is profitable. */
1850 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1852 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1853 slp_instance instance
;
1855 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1856 unsigned int stmt_cost
;
1858 gimple_stmt_iterator si
;
1859 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1860 stmt_vec_info stmt_info
= NULL
;
1861 tree dummy_type
= NULL
;
1864 /* Calculate vector costs. */
1865 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1867 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1868 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1871 /* Calculate scalar cost. */
1872 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1874 stmt
= gsi_stmt (si
);
1875 stmt_info
= vinfo_for_stmt (stmt
);
1877 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1878 || !PURE_SLP_STMT (stmt_info
))
1881 if (STMT_VINFO_DATA_REF (stmt_info
))
1883 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1884 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1885 (scalar_load
, dummy_type
, dummy
);
1887 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1888 (scalar_store
, dummy_type
, dummy
);
1891 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1892 (scalar_stmt
, dummy_type
, dummy
);
1894 scalar_cost
+= stmt_cost
;
1897 if (vect_print_dump_info (REPORT_COST
))
1899 fprintf (vect_dump
, "Cost model analysis: \n");
1900 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1902 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1904 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1907 /* Vectorization is profitable if its cost is less than the cost of scalar
1909 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1915 /* Check if the basic block can be vectorized. */
1918 vect_slp_analyze_bb_1 (basic_block bb
)
1920 bb_vec_info bb_vinfo
;
1921 VEC (ddr_p
, heap
) *ddrs
;
1922 VEC (slp_instance
, heap
) *slp_instances
;
1923 slp_instance instance
;
1926 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1928 bb_vinfo
= new_bb_vec_info (bb
);
1932 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1934 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1935 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1938 destroy_bb_vec_info (bb_vinfo
);
1942 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1943 if (!VEC_length (ddr_p
, ddrs
))
1945 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1946 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1949 destroy_bb_vec_info (bb_vinfo
);
1953 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1957 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1958 "in basic block.\n");
1960 destroy_bb_vec_info (bb_vinfo
);
1964 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1967 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1970 destroy_bb_vec_info (bb_vinfo
);
1974 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1976 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1977 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1980 destroy_bb_vec_info (bb_vinfo
);
1984 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1986 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1987 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1990 destroy_bb_vec_info (bb_vinfo
);
1994 /* Check the SLP opportunities in the basic block, analyze and build SLP
1996 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1998 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1999 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
2000 "in basic block.\n");
2002 destroy_bb_vec_info (bb_vinfo
);
2006 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2008 /* Mark all the statements that we want to vectorize as pure SLP and
2010 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2012 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2013 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2016 if (!vect_slp_analyze_operations (bb_vinfo
))
2018 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2019 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
2021 destroy_bb_vec_info (bb_vinfo
);
2025 /* Cost model: check if the vectorization is worthwhile. */
2026 if (flag_vect_cost_model
2027 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2029 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2030 fprintf (vect_dump
, "not vectorized: vectorization is not "
2033 destroy_bb_vec_info (bb_vinfo
);
2037 if (vect_print_dump_info (REPORT_DETAILS
))
2038 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
2045 vect_slp_analyze_bb (basic_block bb
)
2047 bb_vec_info bb_vinfo
;
2049 gimple_stmt_iterator gsi
;
2050 unsigned int vector_sizes
;
2052 if (vect_print_dump_info (REPORT_DETAILS
))
2053 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
2055 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2057 gimple stmt
= gsi_stmt (gsi
);
2058 if (!is_gimple_debug (stmt
)
2059 && !gimple_nop_p (stmt
)
2060 && gimple_code (stmt
) != GIMPLE_LABEL
)
2064 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2067 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
2073 /* Autodetect first vector size we try. */
2074 current_vector_size
= 0;
2075 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2079 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2083 destroy_bb_vec_info (bb_vinfo
);
2085 vector_sizes
&= ~current_vector_size
;
2086 if (vector_sizes
== 0
2087 || current_vector_size
== 0)
2090 /* Try the next biggest vector size. */
2091 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2092 if (vect_print_dump_info (REPORT_DETAILS
))
2093 fprintf (vect_dump
, "***** Re-trying analysis with "
2094 "vector size %d\n", current_vector_size
);
2099 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2100 the number of created vector stmts depends on the unrolling factor).
2101 However, the actual number of vector stmts for every SLP node depends on
2102 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2103 should be updated. In this function we assume that the inside costs
2104 calculated in vect_model_xxx_cost are linear in ncopies. */
2107 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2109 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2110 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2111 slp_instance instance
;
2113 if (vect_print_dump_info (REPORT_SLP
))
2114 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
2116 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2117 /* We assume that costs are linear in ncopies. */
2118 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
2119 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2123 /* For constant and loop invariant defs of SLP_NODE this function returns
2124 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2125 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2126 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2127 REDUC_INDEX is the index of the reduction operand in the statements, unless
2131 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2132 VEC (tree
, heap
) **vec_oprnds
,
2133 unsigned int op_num
, unsigned int number_of_vectors
,
2136 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2137 gimple stmt
= VEC_index (gimple
, stmts
, 0);
2138 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2142 int j
, number_of_places_left_in_vector
;
2145 int group_size
= VEC_length (gimple
, stmts
);
2146 unsigned int vec_num
, i
;
2147 int number_of_copies
= 1;
2148 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
2149 bool constant_p
, is_store
;
2150 tree neutral_op
= NULL
;
2151 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2155 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2156 && reduc_index
!= -1)
2158 op_num
= reduc_index
- 1;
2159 op
= gimple_op (stmt
, reduc_index
);
2160 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2161 we need either neutral operands or the original operands. See
2162 get_initial_def_for_reduction() for details. */
2165 case WIDEN_SUM_EXPR
:
2171 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2172 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2174 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2179 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2180 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2182 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2187 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2192 def_stmt
= SSA_NAME_DEF_STMT (op
);
2193 loop
= (gimple_bb (stmt
))->loop_father
;
2194 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2195 loop_preheader_edge (loop
));
2203 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2206 op
= gimple_assign_rhs1 (stmt
);
2213 if (CONSTANT_CLASS_P (op
))
2218 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2219 gcc_assert (vector_type
);
2220 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2222 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2223 created vectors. It is greater than 1 if unrolling is performed.
2225 For example, we have two scalar operands, s1 and s2 (e.g., group of
2226 strided accesses of size two), while NUNITS is four (i.e., four scalars
2227 of this type can be packed in a vector). The output vector will contain
2228 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2231 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2232 containing the operands.
2234 For example, NUNITS is four as before, and the group size is 8
2235 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2236 {s5, s6, s7, s8}. */
2238 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2240 number_of_places_left_in_vector
= nunits
;
2241 for (j
= 0; j
< number_of_copies
; j
++)
2243 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
2246 op
= gimple_assign_rhs1 (stmt
);
2247 else if (gimple_assign_rhs_code (stmt
) != COND_EXPR
)
2248 op
= gimple_op (stmt
, op_num
+ 1);
2251 if (op_num
== 0 || op_num
== 1)
2253 tree cond
= gimple_assign_rhs1 (stmt
);
2254 op
= TREE_OPERAND (cond
, op_num
);
2259 op
= gimple_assign_rhs2 (stmt
);
2261 op
= gimple_assign_rhs3 (stmt
);
2265 if (reduc_index
!= -1)
2267 loop
= (gimple_bb (stmt
))->loop_father
;
2268 def_stmt
= SSA_NAME_DEF_STMT (op
);
2272 /* Get the def before the loop. In reduction chain we have only
2273 one initial value. */
2274 if ((j
!= (number_of_copies
- 1)
2275 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2280 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2281 loop_preheader_edge (loop
));
2284 /* Create 'vect_ = {op0,op1,...,opn}'. */
2285 t
= tree_cons (NULL_TREE
, op
, t
);
2287 number_of_places_left_in_vector
--;
2289 if (number_of_places_left_in_vector
== 0)
2291 number_of_places_left_in_vector
= nunits
;
2294 vec_cst
= build_vector (vector_type
, t
);
2296 vec_cst
= build_constructor_from_list (vector_type
, t
);
2297 VEC_quick_push (tree
, voprnds
,
2298 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
2304 /* Since the vectors are created in the reverse order, we should invert
2306 vec_num
= VEC_length (tree
, voprnds
);
2307 for (j
= vec_num
- 1; j
>= 0; j
--)
2309 vop
= VEC_index (tree
, voprnds
, j
);
2310 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2313 VEC_free (tree
, heap
, voprnds
);
2315 /* In case that VF is greater than the unrolling factor needed for the SLP
2316 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2317 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2318 to replicate the vectors. */
2319 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2321 tree neutral_vec
= NULL
;
2326 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2328 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2332 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2333 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2339 /* Get vectorized definitions from SLP_NODE that contains corresponding
2340 vectorized def-stmts. */
2343 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2346 gimple vec_def_stmt
;
2349 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2351 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2353 gcc_assert (vec_def_stmt
);
2354 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2355 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2360 /* Get vectorized definitions for SLP_NODE.
2361 If the scalar definitions are loop invariants or constants, collect them and
2362 call vect_get_constant_vectors() to create vector stmts.
2363 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2364 must be stored in the corresponding child of SLP_NODE, and we call
2365 vect_get_slp_vect_defs () to retrieve them. */
2368 vect_get_slp_defs (VEC (tree
, heap
) *ops
, slp_tree slp_node
,
2369 VEC (slp_void_p
, heap
) **vec_oprnds
, int reduc_index
)
2371 gimple first_stmt
, first_def
;
2372 int number_of_vects
= 0, i
;
2373 unsigned int child_index
= 0;
2374 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2375 slp_tree child
= NULL
;
2376 VEC (tree
, heap
) *vec_defs
;
2377 tree oprnd
, def_lhs
;
2378 bool vectorized_defs
;
2380 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2381 FOR_EACH_VEC_ELT (tree
, ops
, i
, oprnd
)
2383 /* For each operand we check if it has vectorized definitions in a child
2384 node or we need to create them (for invariants and constants). We
2385 check if the LHS of the first stmt of the next child matches OPRND.
2386 If it does, we found the correct child. Otherwise, we call
2387 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2388 to check this child node for the next operand. */
2389 vectorized_defs
= false;
2390 if (VEC_length (slp_void_p
, SLP_TREE_CHILDREN (slp_node
)) > child_index
)
2392 child
= (slp_tree
) VEC_index (slp_void_p
,
2393 SLP_TREE_CHILDREN (slp_node
),
2395 first_def
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (child
), 0);
2397 /* In the end of a pattern sequence we have a use of the original stmt,
2398 so we need to compare OPRND with the original def. */
2399 if (is_pattern_stmt_p (vinfo_for_stmt (first_def
))
2400 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt
))
2401 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt
)))
2402 first_def
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2404 if (is_gimple_call (first_def
))
2405 def_lhs
= gimple_call_lhs (first_def
);
2407 def_lhs
= gimple_assign_lhs (first_def
);
2409 if (operand_equal_p (oprnd
, def_lhs
, 0))
2411 /* The number of vector defs is determined by the number of
2412 vector statements in the node from which we get those
2414 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2415 vectorized_defs
= true;
2420 if (!vectorized_defs
)
2424 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2425 /* Number of vector stmts was calculated according to LHS in
2426 vect_schedule_slp_instance (), fix it by replacing LHS with
2427 RHS, if necessary. See vect_get_smallest_scalar_type () for
2429 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2431 if (rhs_size_unit
!= lhs_size_unit
)
2433 number_of_vects
*= rhs_size_unit
;
2434 number_of_vects
/= lhs_size_unit
;
2439 /* Allocate memory for vectorized defs. */
2440 vec_defs
= VEC_alloc (tree
, heap
, number_of_vects
);
2442 /* For reduction defs we call vect_get_constant_vectors (), since we are
2443 looking for initial loop invariant values. */
2444 if (vectorized_defs
&& reduc_index
== -1)
2445 /* The defs are already vectorized. */
2446 vect_get_slp_vect_defs (child
, &vec_defs
);
2448 /* Build vectors from scalar defs. */
2449 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2450 number_of_vects
, reduc_index
);
2452 VEC_quick_push (slp_void_p
, *vec_oprnds
, (slp_void_p
) vec_defs
);
2454 /* For reductions, we only need initial values. */
2455 if (reduc_index
!= -1)
2461 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2462 building a vector of type MASK_TYPE from it) and two input vectors placed in
2463 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2464 shifting by STRIDE elements of DR_CHAIN for every copy.
2465 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2467 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2468 the created stmts must be inserted. */
2471 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2472 tree mask
, int first_vec_indx
, int second_vec_indx
,
2473 gimple_stmt_iterator
*gsi
, slp_tree node
,
2474 tree vectype
, VEC(tree
,heap
) *dr_chain
,
2475 int ncopies
, int vect_stmts_counter
)
2478 gimple perm_stmt
= NULL
;
2479 stmt_vec_info next_stmt_info
;
2481 tree first_vec
, second_vec
, data_ref
;
2483 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2485 /* Initialize the vect stmts of NODE to properly insert the generated
2487 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2488 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2489 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2491 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2492 for (i
= 0; i
< ncopies
; i
++)
2494 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2495 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2497 /* Generate the permute statement. */
2498 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
2499 first_vec
, second_vec
, mask
);
2500 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2501 gimple_set_lhs (perm_stmt
, data_ref
);
2502 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2504 /* Store the vector statement in NODE. */
2505 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2506 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2508 first_vec_indx
+= stride
;
2509 second_vec_indx
+= stride
;
2512 /* Mark the scalar stmt as vectorized. */
2513 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2514 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2518 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2519 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2520 representation. Check that the mask is valid and return FALSE if not.
2521 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2522 the next vector, i.e., the current first vector is not needed. */
2525 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2526 int mask_nunits
, bool only_one_vec
, int index
,
2527 unsigned char *mask
, int *current_mask_element
,
2528 bool *need_next_vector
, int *number_of_mask_fixes
,
2529 bool *mask_fixed
, bool *needs_first_vector
)
2533 /* Convert to target specific representation. */
2534 *current_mask_element
= first_mask_element
+ m
;
2535 /* Adjust the value in case it's a mask for second and third vectors. */
2536 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2538 if (*current_mask_element
< mask_nunits
)
2539 *needs_first_vector
= true;
2541 /* We have only one input vector to permute but the mask accesses values in
2542 the next vector as well. */
2543 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2545 if (vect_print_dump_info (REPORT_DETAILS
))
2547 fprintf (vect_dump
, "permutation requires at least two vectors ");
2548 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2554 /* The mask requires the next vector. */
2555 if (*current_mask_element
>= mask_nunits
* 2)
2557 if (*needs_first_vector
|| *mask_fixed
)
2559 /* We either need the first vector too or have already moved to the
2560 next vector. In both cases, this permutation needs three
2562 if (vect_print_dump_info (REPORT_DETAILS
))
2564 fprintf (vect_dump
, "permutation requires at "
2565 "least three vectors ");
2566 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2572 /* We move to the next vector, dropping the first one and working with
2573 the second and the third - we need to adjust the values of the mask
2575 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2577 for (i
= 0; i
< index
; i
++)
2578 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2580 (*number_of_mask_fixes
)++;
2584 *need_next_vector
= *mask_fixed
;
2586 /* This was the last element of this mask. Start a new one. */
2587 if (index
== mask_nunits
- 1)
2589 *number_of_mask_fixes
= 1;
2590 *mask_fixed
= false;
2591 *needs_first_vector
= false;
2598 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2599 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2600 permute statements for SLP_NODE_INSTANCE. */
2602 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2603 gimple_stmt_iterator
*gsi
, int vf
,
2604 slp_instance slp_node_instance
, bool analyze_only
)
2606 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2607 tree mask_element_type
= NULL_TREE
, mask_type
;
2608 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2610 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2611 gimple next_scalar_stmt
;
2612 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2613 int first_mask_element
;
2614 int index
, unroll_factor
, current_mask_element
, ncopies
;
2615 unsigned char *mask
;
2616 bool only_one_vec
= false, need_next_vector
= false;
2617 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2618 int number_of_mask_fixes
= 1;
2619 bool mask_fixed
= false;
2620 bool needs_first_vector
= false;
2621 enum machine_mode mode
;
2623 mode
= TYPE_MODE (vectype
);
2625 if (!can_vec_perm_p (mode
, false, NULL
))
2627 if (vect_print_dump_info (REPORT_DETAILS
))
2629 fprintf (vect_dump
, "no vect permute for ");
2630 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2635 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2636 same size as the vector element being permuted. */
2638 = lang_hooks
.types
.type_for_size
2639 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype
))), 1);
2640 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2641 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2642 mask
= XALLOCAVEC (unsigned char, nunits
);
2643 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2645 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2646 unrolling factor. */
2647 orig_vec_stmts_num
= group_size
*
2648 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2649 if (orig_vec_stmts_num
== 1)
2650 only_one_vec
= true;
2652 /* Number of copies is determined by the final vectorization factor
2653 relatively to SLP_NODE_INSTANCE unrolling factor. */
2654 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2656 /* Generate permutation masks for every NODE. Number of masks for each NODE
2657 is equal to GROUP_SIZE.
2658 E.g., we have a group of three nodes with three loads from the same
2659 location in each node, and the vector size is 4. I.e., we have a
2660 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2661 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2662 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2665 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2666 The last mask is illegal since we assume two operands for permute
2667 operation, and the mask element values can't be outside that range.
2668 Hence, the last mask must be converted into {2,5,5,5}.
2669 For the first two permutations we need the first and the second input
2670 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2671 we need the second and the third vectors: {b1,c1,a2,b2} and
2674 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2678 vect_stmts_counter
= 0;
2680 first_vec_index
= vec_index
++;
2682 second_vec_index
= first_vec_index
;
2684 second_vec_index
= vec_index
++;
2686 for (j
= 0; j
< unroll_factor
; j
++)
2688 for (k
= 0; k
< group_size
; k
++)
2690 first_mask_element
= i
+ j
* group_size
;
2691 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2692 nunits
, only_one_vec
, index
,
2693 mask
, ¤t_mask_element
,
2695 &number_of_mask_fixes
, &mask_fixed
,
2696 &needs_first_vector
))
2698 mask
[index
++] = current_mask_element
;
2700 if (index
== nunits
)
2702 tree mask_vec
= NULL
;
2704 if (!can_vec_perm_p (mode
, false, mask
))
2706 if (vect_print_dump_info (REPORT_DETAILS
))
2708 fprintf (vect_dump
, "unsupported vect permute { ");
2709 for (i
= 0; i
< nunits
; ++i
)
2710 fprintf (vect_dump
, "%d ", mask
[i
]);
2711 fprintf (vect_dump
, "}\n");
2716 while (--index
>= 0)
2718 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2719 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2721 mask_vec
= build_vector (mask_type
, mask_vec
);
2726 if (need_next_vector
)
2728 first_vec_index
= second_vec_index
;
2729 second_vec_index
= vec_index
;
2732 next_scalar_stmt
= VEC_index (gimple
,
2733 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2735 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2736 mask_vec
, first_vec_index
, second_vec_index
,
2737 gsi
, node
, vectype
, dr_chain
,
2738 ncopies
, vect_stmts_counter
++);
2750 /* Vectorize SLP instance tree in postorder. */
2753 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2754 unsigned int vectorization_factor
)
2757 bool strided_store
, is_store
;
2758 gimple_stmt_iterator si
;
2759 stmt_vec_info stmt_info
;
2760 unsigned int vec_stmts_size
, nunits
, group_size
;
2763 slp_tree loads_node
;
2769 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2770 vect_schedule_slp_instance ((slp_tree
) child
, instance
,
2771 vectorization_factor
);
2773 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2774 stmt_info
= vinfo_for_stmt (stmt
);
2776 /* VECTYPE is the type of the destination. */
2777 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2778 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2779 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2781 /* For each SLP instance calculate number of vector stmts to be created
2782 for the scalar stmts in each node of the SLP tree. Number of vector
2783 elements in one vector iteration is the number of scalar elements in
2784 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2786 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2788 /* In case of load permutation we have to allocate vectorized statements for
2789 all the nodes that participate in that permutation. */
2790 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2792 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2794 if (!SLP_TREE_VEC_STMTS (loads_node
))
2796 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2798 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2803 if (!SLP_TREE_VEC_STMTS (node
))
2805 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2806 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2809 if (vect_print_dump_info (REPORT_DETAILS
))
2811 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2812 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2815 /* Loads should be inserted before the first load. */
2816 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2817 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2818 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
2819 && SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2820 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2821 else if (is_pattern_stmt_p (stmt_info
))
2822 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2824 si
= gsi_for_stmt (stmt
);
2826 /* Stores should be inserted just before the last store. */
2827 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2828 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2830 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2831 si
= gsi_for_stmt (last_store
);
2834 /* Mark the first element of the reduction chain as reduction to properly
2835 transform the node. In the analysis phase only the last element of the
2836 chain is marked as reduction. */
2837 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2838 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2840 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2841 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2844 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2849 /* Generate vector code for all SLP instances in the loop/basic block. */
2852 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2854 VEC (slp_instance
, heap
) *slp_instances
;
2855 slp_instance instance
;
2857 bool is_store
= false;
2861 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2862 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2866 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2870 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2872 /* Schedule the tree of INSTANCE. */
2873 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2875 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2876 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2877 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2880 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2882 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2885 gimple_stmt_iterator gsi
;
2887 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2888 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2890 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2893 /* Free the attached stmt_vec_info and remove the stmt. */
2894 gsi
= gsi_for_stmt (store
);
2895 gsi_remove (&gsi
, true);
2896 free_stmt_vec_info (store
);
2904 /* Vectorize the basic block. */
2907 vect_slp_transform_bb (basic_block bb
)
2909 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2910 gimple_stmt_iterator si
;
2912 gcc_assert (bb_vinfo
);
2914 if (vect_print_dump_info (REPORT_DETAILS
))
2915 fprintf (vect_dump
, "SLPing BB\n");
2917 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2919 gimple stmt
= gsi_stmt (si
);
2920 stmt_vec_info stmt_info
;
2922 if (vect_print_dump_info (REPORT_DETAILS
))
2924 fprintf (vect_dump
, "------>SLPing statement: ");
2925 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2928 stmt_info
= vinfo_for_stmt (stmt
);
2929 gcc_assert (stmt_info
);
2931 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2932 if (STMT_SLP_TYPE (stmt_info
))
2934 vect_schedule_slp (NULL
, bb_vinfo
);
2939 mark_sym_for_renaming (gimple_vop (cfun
));
2940 /* The memory tags and pointers in vectorized statements need to
2941 have their SSA forms updated. FIXME, why can't this be delayed
2942 until all the loops have been transformed? */
2943 update_ssa (TODO_update_ssa
);
2945 if (vect_print_dump_info (REPORT_DETAILS
))
2946 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2948 destroy_bb_vec_info (bb_vinfo
);