1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb
)
49 gimple_stmt_iterator si
;
54 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
57 if (gimple_location (stmt
) != UNKNOWN_LOC
)
58 return gimple_location (stmt
);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node
)
73 if (SLP_TREE_LEFT (node
))
74 vect_free_slp_tree (SLP_TREE_LEFT (node
));
76 if (SLP_TREE_RIGHT (node
))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
79 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
81 if (SLP_TREE_VEC_STMTS (node
))
82 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
94 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
95 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
105 slp_tree slp_node
, gimple stmt
,
106 VEC (gimple
, heap
) **def_stmts0
,
107 VEC (gimple
, heap
) **def_stmts1
,
108 enum vect_def_type
*first_stmt_dt0
,
109 enum vect_def_type
*first_stmt_dt1
,
110 tree
*first_stmt_def0_type
,
111 tree
*first_stmt_def1_type
,
112 tree
*first_stmt_const_oprnd
,
113 int ncopies_for_cost
,
114 bool *pattern0
, bool *pattern1
)
117 unsigned int i
, number_of_oprnds
;
120 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
121 stmt_vec_info stmt_info
=
122 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
123 enum gimple_rhs_class rhs_class
;
124 struct loop
*loop
= NULL
;
127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
130 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
132 for (i
= 0; i
< number_of_oprnds
; i
++)
134 oprnd
= gimple_op (stmt
, i
+ 1);
136 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
138 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
140 if (vect_print_dump_info (REPORT_SLP
))
142 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
153 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
154 && vinfo_for_stmt (def_stmt
)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
)))
157 if (!*first_stmt_dt0
)
161 if (i
== 1 && !*first_stmt_dt1
)
163 else if ((i
== 0 && !*pattern0
) || (i
== 1 && !*pattern1
))
165 if (vect_print_dump_info (REPORT_DETAILS
))
167 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
176 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
177 dt
[i
] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
179 if (*dt
== vect_unknown_def_type
)
181 if (vect_print_dump_info (REPORT_DETAILS
))
182 fprintf (vect_dump
, "Unsupported pattern.");
186 switch (gimple_code (def_stmt
))
189 def
= gimple_phi_result (def_stmt
);
193 def
= gimple_assign_lhs (def_stmt
);
197 if (vect_print_dump_info (REPORT_DETAILS
))
198 fprintf (vect_dump
, "unsupported defining stmt: ");
203 if (!*first_stmt_dt0
)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0
= dt
[i
];
208 *first_stmt_def0_type
= TREE_TYPE (def
);
210 *first_stmt_const_oprnd
= oprnd
;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
218 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
223 if (!*first_stmt_dt1
&& i
== 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1
= dt
[i
];
228 *first_stmt_def1_type
= TREE_TYPE (def
);
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd
)
235 if (vect_print_dump_info (REPORT_SLP
))
236 fprintf (vect_dump
, "Build SLP failed: two constant "
240 *first_stmt_const_oprnd
= oprnd
;
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
248 && (*first_stmt_dt0
!= dt
[i
]
249 || (*first_stmt_def0_type
&& def
250 && !types_compatible_p (*first_stmt_def0_type
,
253 && (*first_stmt_dt1
!= dt
[i
]
254 || (*first_stmt_def1_type
&& def
255 && !types_compatible_p (*first_stmt_def1_type
,
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd
),
261 if (vect_print_dump_info (REPORT_SLP
))
262 fprintf (vect_dump
, "Build SLP failed: different types ");
269 /* Check the types of the definitions. */
272 case vect_constant_def
:
273 case vect_external_def
:
276 case vect_internal_def
:
277 case vect_reduction_def
:
279 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
281 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP
))
288 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
306 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
307 slp_tree
*node
, unsigned int group_size
,
308 int *inside_cost
, int *outside_cost
,
309 int ncopies_for_cost
, unsigned int *max_nunits
,
310 VEC (int, heap
) **load_permutation
,
311 VEC (slp_tree
, heap
) **loads
,
312 unsigned int vectorization_factor
)
314 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
315 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
317 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
318 gimple stmt
= VEC_index (gimple
, stmts
, 0);
319 enum vect_def_type first_stmt_dt0
= vect_uninitialized_def
;
320 enum vect_def_type first_stmt_dt1
= vect_uninitialized_def
;
321 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
322 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
324 bool stop_recursion
= false, need_same_oprnds
= false;
325 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
326 unsigned int ncopies
;
329 enum machine_mode optab_op2_mode
;
330 enum machine_mode vec_mode
;
331 tree first_stmt_const_oprnd
= NULL_TREE
;
332 struct data_reference
*first_dr
;
333 bool pattern0
= false, pattern1
= false;
335 bool permutation
= false;
336 unsigned int load_place
;
337 gimple first_load
, prev_first_load
= NULL
;
339 /* For every stmt in NODE find its def stmt/s. */
340 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
342 if (vect_print_dump_info (REPORT_SLP
))
344 fprintf (vect_dump
, "Build SLP for ");
345 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
351 if (vect_print_dump_info (REPORT_SLP
))
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
361 lhs
= gimple_get_lhs (stmt
);
362 if (lhs
== NULL_TREE
)
364 if (vect_print_dump_info (REPORT_SLP
))
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
374 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
375 vectype
= get_vectype_for_scalar_type (scalar_type
);
378 if (vect_print_dump_info (REPORT_SLP
))
380 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
386 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
389 if (vect_print_dump_info (REPORT_SLP
))
390 fprintf (vect_dump
, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
399 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
401 if (is_gimple_call (stmt
))
402 rhs_code
= CALL_EXPR
;
404 rhs_code
= gimple_assign_rhs_code (stmt
);
406 /* Check the operation. */
409 first_stmt_code
= rhs_code
;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
414 || rhs_code
== LROTATE_EXPR
415 || rhs_code
== RROTATE_EXPR
)
417 vec_mode
= TYPE_MODE (vectype
);
419 /* First see if we have a vector/vector shift. */
420 optab
= optab_for_tree_code (rhs_code
, vectype
,
424 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab
= optab_for_tree_code (rhs_code
, vectype
,
432 if (vect_print_dump_info (REPORT_SLP
))
433 fprintf (vect_dump
, "Build SLP failed: no optab.");
436 icode
= (int) optab_handler (optab
, vec_mode
);
437 if (icode
== CODE_FOR_nothing
)
439 if (vect_print_dump_info (REPORT_SLP
))
440 fprintf (vect_dump
, "Build SLP failed: "
441 "op not supported by target.");
444 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
445 if (!VECTOR_MODE_P (optab_op2_mode
))
447 need_same_oprnds
= true;
448 first_op1
= gimple_assign_rhs2 (stmt
);
455 if (first_stmt_code
!= rhs_code
456 && (first_stmt_code
!= IMAGPART_EXPR
457 || rhs_code
!= REALPART_EXPR
)
458 && (first_stmt_code
!= REALPART_EXPR
459 || rhs_code
!= IMAGPART_EXPR
)
460 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
461 && (first_stmt_code
== ARRAY_REF
462 || first_stmt_code
== INDIRECT_REF
463 || first_stmt_code
== COMPONENT_REF
464 || first_stmt_code
== MEM_REF
)))
466 if (vect_print_dump_info (REPORT_SLP
))
469 "Build SLP failed: different operation in stmt ");
470 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
477 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
479 if (vect_print_dump_info (REPORT_SLP
))
482 "Build SLP failed: different shift arguments in ");
483 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
490 /* Strided store or load. */
491 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
493 if (REFERENCE_CLASS_P (lhs
))
496 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
497 stmt
, &def_stmts0
, &def_stmts1
,
500 &first_stmt_def0_type
,
501 &first_stmt_def1_type
,
502 &first_stmt_const_oprnd
,
504 &pattern0
, &pattern1
))
510 /* FORNOW: Check that there is no gap between the loads. */
511 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
512 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
513 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
514 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
516 if (vect_print_dump_info (REPORT_SLP
))
518 fprintf (vect_dump
, "Build SLP failed: strided "
520 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
526 /* Check that the size of interleaved loads group is not
527 greater than the SLP group size. */
528 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
530 if (vect_print_dump_info (REPORT_SLP
))
532 fprintf (vect_dump
, "Build SLP failed: the number of "
533 "interleaved loads is greater than"
534 " the SLP group size ");
535 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
541 first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
544 /* Check that there are no loads from different interleaving
545 chains in the same node. The only exception is complex
547 if (prev_first_load
!= first_load
548 && rhs_code
!= REALPART_EXPR
549 && rhs_code
!= IMAGPART_EXPR
)
551 if (vect_print_dump_info (REPORT_SLP
))
553 fprintf (vect_dump
, "Build SLP failed: different "
554 "interleaving chains in one node ");
555 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
562 prev_first_load
= first_load
;
564 if (first_load
== stmt
)
566 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
567 if (vect_supportable_dr_alignment (first_dr
, false)
568 == dr_unaligned_unsupported
)
570 if (vect_print_dump_info (REPORT_SLP
))
572 fprintf (vect_dump
, "Build SLP failed: unsupported "
574 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
580 /* Analyze costs (for the first stmt in the group). */
581 vect_model_load_cost (vinfo_for_stmt (stmt
),
582 ncopies_for_cost
, *node
);
585 /* Store the place of this load in the interleaving chain. In
586 case that permutation is needed we later decide if a specific
587 permutation is supported. */
588 load_place
= vect_get_place_in_interleaving_chain (stmt
,
593 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
595 /* We stop the tree when we reach a group of loads. */
596 stop_recursion
= true;
599 } /* Strided access. */
602 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
604 /* Not strided load. */
605 if (vect_print_dump_info (REPORT_SLP
))
607 fprintf (vect_dump
, "Build SLP failed: not strided load ");
608 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
611 /* FORNOW: Not strided loads are not supported. */
615 /* Not memory operation. */
616 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
617 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
619 if (vect_print_dump_info (REPORT_SLP
))
621 fprintf (vect_dump
, "Build SLP failed: operation");
622 fprintf (vect_dump
, " unsupported ");
623 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
629 /* Find the def-stmts. */
630 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
631 &def_stmts0
, &def_stmts1
,
632 &first_stmt_dt0
, &first_stmt_dt1
,
633 &first_stmt_def0_type
,
634 &first_stmt_def1_type
,
635 &first_stmt_const_oprnd
,
637 &pattern0
, &pattern1
))
642 /* Add the costs of the node to the overall instance costs. */
643 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
644 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
646 /* Strided loads were reached - stop the recursion. */
651 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
653 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
658 /* We don't check here complex numbers chains, so we keep them in
659 LOADS for further check in vect_supported_load_permutation_p. */
660 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
661 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
667 /* Create SLP_TREE nodes for the definition node/s. */
668 if (first_stmt_dt0
== vect_internal_def
)
670 slp_tree left_node
= XNEW (struct _slp_tree
);
671 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
672 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
673 SLP_TREE_LEFT (left_node
) = NULL
;
674 SLP_TREE_RIGHT (left_node
) = NULL
;
675 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
676 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
677 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &left_node
, group_size
,
678 inside_cost
, outside_cost
, ncopies_for_cost
,
679 max_nunits
, load_permutation
, loads
,
680 vectorization_factor
))
683 SLP_TREE_LEFT (*node
) = left_node
;
686 if (first_stmt_dt1
== vect_internal_def
)
688 slp_tree right_node
= XNEW (struct _slp_tree
);
689 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
690 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
691 SLP_TREE_LEFT (right_node
) = NULL
;
692 SLP_TREE_RIGHT (right_node
) = NULL
;
693 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
694 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
695 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &right_node
, group_size
,
696 inside_cost
, outside_cost
, ncopies_for_cost
,
697 max_nunits
, load_permutation
, loads
,
698 vectorization_factor
))
701 SLP_TREE_RIGHT (*node
) = right_node
;
709 vect_print_slp_tree (slp_tree node
)
717 fprintf (vect_dump
, "node ");
718 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
720 fprintf (vect_dump
, "\n\tstmt %d ", i
);
721 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
723 fprintf (vect_dump
, "\n");
725 vect_print_slp_tree (SLP_TREE_LEFT (node
));
726 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
730 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
731 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
732 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
733 stmts in NODE are to be marked. */
736 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
744 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
746 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
748 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
749 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
753 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
756 vect_mark_slp_stmts_relevant (slp_tree node
)
760 stmt_vec_info stmt_info
;
765 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
767 stmt_info
= vinfo_for_stmt (stmt
);
768 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
769 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
770 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
773 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node
));
774 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node
));
778 /* Check if the permutation required by the SLP INSTANCE is supported.
779 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
782 vect_supported_slp_permutation_p (slp_instance instance
)
784 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
785 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
786 gimple first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
787 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
789 slp_tree
*tmp_loads
= NULL
;
790 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
793 /* FORNOW: The only supported loads permutation is loads from the same
794 location in all the loads in the node, when the data-refs in
795 nodes of LOADS constitute an interleaving chain.
796 Sort the nodes according to the order of accesses in the chain. */
797 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
799 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
800 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
801 i
+= group_size
, j
++)
803 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
804 /* Check that the loads are all in the same interleaving chain. */
805 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt
)) != first_load
)
807 if (vect_print_dump_info (REPORT_DETAILS
))
809 fprintf (vect_dump
, "Build SLP failed: unsupported data "
811 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
818 tmp_loads
[index
] = load
;
821 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
822 for (i
= 0; i
< group_size
; i
++)
823 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
825 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
826 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
829 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
830 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
838 /* Rearrange the statements of NODE according to PERMUTATION. */
841 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
842 VEC (int, heap
) *permutation
)
845 VEC (gimple
, heap
) *tmp_stmts
;
846 unsigned int index
, i
;
851 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node
), group_size
, permutation
);
852 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node
), group_size
, permutation
);
854 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
855 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
857 for (i
= 0; i
< group_size
; i
++)
858 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
860 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
862 index
= VEC_index (int, permutation
, i
);
863 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
866 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
867 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
871 /* Check if the required load permutation is supported.
872 LOAD_PERMUTATION contains a list of indices of the loads.
873 In SLP this permutation is relative to the order of strided stores that are
874 the base of the SLP instance. */
877 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
878 VEC (int, heap
) *load_permutation
)
880 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
881 bool supported
, bad_permutation
= false;
883 slp_tree node
, other_complex_node
;
884 gimple stmt
, first
= NULL
, other_node_first
;
885 unsigned complex_numbers
= 0;
887 /* FORNOW: permutations are only supported in SLP. */
891 if (vect_print_dump_info (REPORT_SLP
))
893 fprintf (vect_dump
, "Load permutation ");
894 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
895 fprintf (vect_dump
, "%d ", next
);
898 /* In case of reduction every load permutation is allowed, since the order
899 of the reduction statements is not important (as opposed to the case of
900 strided stores). The only condition we need to check is that all the
901 load nodes are of the same size and have the same permutation (and then
902 rearrange all the nodes of the SLP instance according to this
905 /* Check that all the load nodes are of the same size. */
906 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
908 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
909 != (unsigned) group_size
)
912 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
913 if (is_gimple_assign (stmt
)
914 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
915 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
919 /* Complex operands can be swapped as following:
920 real_c = real_b + real_a;
921 imag_c = imag_a + imag_b;
922 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
923 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
924 chains are mixed, they match the above pattern. */
927 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
929 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
935 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != first
)
937 if (complex_numbers
!= 2)
945 other_complex_node
= VEC_index (slp_tree
,
946 SLP_INSTANCE_LOADS (slp_instn
), k
);
947 other_node_first
= VEC_index (gimple
,
948 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
950 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
))
959 /* We checked that this case ok, so there is no need to proceed with
960 permutation tests. */
961 if (complex_numbers
== 2)
963 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
964 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
968 node
= SLP_INSTANCE_TREE (slp_instn
);
969 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
970 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
971 instance, not all the loads belong to the same node or interleaving
972 group. Hence, we need to divide them into groups according to
974 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
976 /* Reduction (there are no data-refs in the root). */
977 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
979 int first_group_load_index
;
981 /* Compare all the permutation sequences to the first one. */
982 for (i
= 1; i
< number_of_groups
; i
++)
985 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
987 next
= VEC_index (int, load_permutation
, j
);
988 first_group_load_index
= VEC_index (int, load_permutation
, k
);
990 if (next
!= first_group_load_index
)
992 bad_permutation
= true;
1003 if (!bad_permutation
)
1005 /* This permutaion is valid for reduction. Since the order of the
1006 statements in the nodes is not important unless they are memory
1007 accesses, we can rearrange the statements in all the nodes
1008 according to the order of the loads. */
1009 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1011 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1016 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1017 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1018 well (unless it's reduction). */
1019 if (VEC_length (int, load_permutation
)
1020 != (unsigned int) (group_size
* group_size
))
1024 load_index
= sbitmap_alloc (group_size
);
1025 sbitmap_zero (load_index
);
1026 for (j
= 0; j
< group_size
; j
++)
1028 for (i
= j
* group_size
, k
= 0;
1029 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1032 if (i
!= j
* group_size
&& next
!= prev
)
1041 if (TEST_BIT (load_index
, prev
))
1047 SET_BIT (load_index
, prev
);
1050 for (j
= 0; j
< group_size
; j
++)
1051 if (!TEST_BIT (load_index
, j
))
1054 sbitmap_free (load_index
);
1056 if (supported
&& i
== group_size
* group_size
1057 && vect_supported_slp_permutation_p (slp_instn
))
1064 /* Find the first load in the loop that belongs to INSTANCE.
1065 When loads are in several SLP nodes, there can be a case in which the first
1066 load does not appear in the first SLP node to be transformed, causing
1067 incorrect order of statements. Since we generate all the loads together,
1068 they must be inserted before the first load of the SLP instance and not
1069 before the first load of the first node of the instance. */
1072 vect_find_first_load_in_slp_instance (slp_instance instance
)
1076 gimple first_load
= NULL
, load
;
1078 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1079 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1080 first_load
= get_earlier_stmt (load
, first_load
);
1086 /* Find the last store in SLP INSTANCE. */
1089 vect_find_last_store_in_slp_instance (slp_instance instance
)
1093 gimple last_store
= NULL
, store
;
1095 node
= SLP_INSTANCE_TREE (instance
);
1097 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1099 last_store
= get_later_stmt (store
, last_store
);
1105 /* Analyze an SLP instance starting from a group of strided stores. Call
1106 vect_build_slp_tree to build a tree of packed stmts if possible.
1107 Return FALSE if it's impossible to SLP any stmt in the loop. */
1110 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1113 slp_instance new_instance
;
1114 slp_tree node
= XNEW (struct _slp_tree
);
1115 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1116 unsigned int unrolling_factor
= 1, nunits
;
1117 tree vectype
, scalar_type
= NULL_TREE
;
1119 unsigned int vectorization_factor
= 0;
1120 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1121 unsigned int max_nunits
= 0;
1122 VEC (int, heap
) *load_permutation
;
1123 VEC (slp_tree
, heap
) *loads
;
1124 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1128 scalar_type
= TREE_TYPE (DR_REF (dr
));
1129 vectype
= get_vectype_for_scalar_type (scalar_type
);
1130 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1134 gcc_assert (loop_vinfo
);
1135 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1136 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1141 if (vect_print_dump_info (REPORT_SLP
))
1143 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1144 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1150 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1152 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1154 /* No multitypes in BB SLP. */
1155 vectorization_factor
= nunits
;
1157 /* Calculate the unrolling factor. */
1158 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1159 if (unrolling_factor
!= 1 && !loop_vinfo
)
1161 if (vect_print_dump_info (REPORT_SLP
))
1162 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1168 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1169 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
1173 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1176 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1177 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1182 /* Collect reduction statements. */
1183 for (i
= 0; VEC_iterate (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
,
1187 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1188 if (vect_print_dump_info (REPORT_DETAILS
))
1190 fprintf (vect_dump
, "pushing reduction into node: ");
1191 print_gimple_stmt (vect_dump
, next
, 0, TDF_SLIM
);
1196 SLP_TREE_VEC_STMTS (node
) = NULL
;
1197 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
1198 SLP_TREE_LEFT (node
) = NULL
;
1199 SLP_TREE_RIGHT (node
) = NULL
;
1200 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
1201 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
1203 /* Calculate the number of vector stmts to create based on the unrolling
1204 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1205 GROUP_SIZE / NUNITS otherwise. */
1206 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1208 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1209 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1211 /* Build the tree for the SLP instance. */
1212 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1213 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1214 &max_nunits
, &load_permutation
, &loads
,
1215 vectorization_factor
))
1217 /* Create a new SLP instance. */
1218 new_instance
= XNEW (struct _slp_instance
);
1219 SLP_INSTANCE_TREE (new_instance
) = node
;
1220 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1221 /* Calculate the unrolling factor based on the smallest type in the
1223 if (max_nunits
> nunits
)
1224 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1227 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1228 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1229 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1230 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1231 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1232 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1233 if (VEC_length (slp_tree
, loads
))
1235 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1238 if (vect_print_dump_info (REPORT_SLP
))
1240 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1242 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1245 vect_free_slp_instance (new_instance
);
1249 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1250 = vect_find_first_load_in_slp_instance (new_instance
);
1253 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1256 VEC_safe_push (slp_instance
, heap
,
1257 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1260 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1263 if (vect_print_dump_info (REPORT_SLP
))
1264 vect_print_slp_tree (node
);
1269 /* Failed to SLP. */
1270 /* Free the allocated memory. */
1271 vect_free_slp_tree (node
);
1272 VEC_free (int, heap
, load_permutation
);
1273 VEC_free (slp_tree
, heap
, loads
);
1279 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1280 trees of packed scalar stmts if SLP is possible. */
1283 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1286 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
;
1290 if (vect_print_dump_info (REPORT_SLP
))
1291 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1295 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1296 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1299 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1301 /* Find SLP sequences starting from groups of strided stores. */
1302 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, store
)
1303 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, store
))
1306 if (bb_vinfo
&& !ok
)
1308 if (vect_print_dump_info (REPORT_SLP
))
1309 fprintf (vect_dump
, "Failed to SLP the basic block.");
1314 /* Find SLP sequences starting from groups of reductions. */
1315 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1316 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1317 VEC_index (gimple
, reductions
, 0)))
1324 /* For each possible SLP instance decide whether to SLP it and calculate overall
1325 unrolling factor needed to SLP the loop. */
1328 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1330 unsigned int i
, unrolling_factor
= 1;
1331 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1332 slp_instance instance
;
1333 int decided_to_slp
= 0;
1335 if (vect_print_dump_info (REPORT_SLP
))
1336 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1338 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1340 /* FORNOW: SLP if you can. */
1341 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1342 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1344 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1345 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1346 loop-based vectorization. Such stmts will be marked as HYBRID. */
1347 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1351 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1353 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1354 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1355 decided_to_slp
, unrolling_factor
);
1359 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1360 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1363 vect_detect_hybrid_slp_stmts (slp_tree node
)
1367 imm_use_iterator imm_iter
;
1369 stmt_vec_info stmt_vinfo
;
1374 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1375 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1376 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1377 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1378 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1379 && !STMT_SLP_TYPE (stmt_vinfo
)
1380 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1381 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1382 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1383 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1384 == vect_reduction_def
))
1385 vect_mark_slp_stmts (node
, hybrid
, i
);
1387 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
1388 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
1392 /* Find stmts that must be both vectorized and SLPed. */
1395 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1398 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1399 slp_instance instance
;
1401 if (vect_print_dump_info (REPORT_SLP
))
1402 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1404 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1405 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1409 /* Create and initialize a new bb_vec_info struct for BB, as well as
1410 stmt_vec_info structs for all the stmts in it. */
1413 new_bb_vec_info (basic_block bb
)
1415 bb_vec_info res
= NULL
;
1416 gimple_stmt_iterator gsi
;
1418 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1419 BB_VINFO_BB (res
) = bb
;
1421 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1423 gimple stmt
= gsi_stmt (gsi
);
1424 gimple_set_uid (stmt
, 0);
1425 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1428 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1429 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1436 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1437 stmts in the basic block. */
1440 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1443 gimple_stmt_iterator si
;
1448 bb
= BB_VINFO_BB (bb_vinfo
);
1450 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1452 gimple stmt
= gsi_stmt (si
);
1453 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1456 /* Free stmt_vec_info. */
1457 free_stmt_vec_info (stmt
);
1460 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1461 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1467 /* Analyze statements contained in SLP tree node after recursively analyzing
1468 the subtree. Return TRUE if the operations are supported. */
1471 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1480 if (!vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_LEFT (node
))
1481 || !vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_RIGHT (node
)))
1484 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1486 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1487 gcc_assert (stmt_info
);
1488 gcc_assert (PURE_SLP_STMT (stmt_info
));
1490 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1498 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1499 operations are supported. */
1502 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1504 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1505 slp_instance instance
;
1508 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1510 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1511 SLP_INSTANCE_TREE (instance
)))
1513 vect_free_slp_instance (instance
);
1514 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1520 if (!VEC_length (slp_instance
, slp_instances
))
1526 /* Check if loads and stores are mixed in the basic block (in that
1527 case if we are not sure that the accesses differ, we can't vectorize the
1528 basic block). Also return FALSE in case that there is statement marked as
1529 not vectorizable. */
1532 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo
)
1534 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1535 gimple_stmt_iterator si
;
1536 bool detected_store
= false;
1538 struct data_reference
*dr
;
1540 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1542 stmt
= gsi_stmt (si
);
1544 /* We can't allow not analyzed statements, since they may contain data
1546 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
1549 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
1552 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1553 if (DR_IS_READ (dr
) && detected_store
)
1556 if (!DR_IS_READ (dr
))
1557 detected_store
= true;
1563 /* Check if vectorization of the basic block is profitable. */
1566 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1568 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1569 slp_instance instance
;
1571 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1572 unsigned int stmt_cost
;
1574 gimple_stmt_iterator si
;
1575 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1576 stmt_vec_info stmt_info
= NULL
;
1577 tree dummy_type
= NULL
;
1580 /* Calculate vector costs. */
1581 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1583 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1584 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1587 /* Calculate scalar cost. */
1588 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1590 stmt
= gsi_stmt (si
);
1591 stmt_info
= vinfo_for_stmt (stmt
);
1593 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1594 || !PURE_SLP_STMT (stmt_info
))
1597 if (STMT_VINFO_DATA_REF (stmt_info
))
1599 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1600 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1601 (scalar_load
, dummy_type
, dummy
);
1603 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1604 (scalar_store
, dummy_type
, dummy
);
1607 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1608 (scalar_stmt
, dummy_type
, dummy
);
1610 scalar_cost
+= stmt_cost
;
1613 if (vect_print_dump_info (REPORT_COST
))
1615 fprintf (vect_dump
, "Cost model analysis: \n");
1616 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1618 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1620 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1623 /* Vectorization is profitable if its cost is less than the cost of scalar
1625 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1631 /* Check if the basic block can be vectorized. */
1634 vect_slp_analyze_bb (basic_block bb
)
1636 bb_vec_info bb_vinfo
;
1637 VEC (ddr_p
, heap
) *ddrs
;
1638 VEC (slp_instance
, heap
) *slp_instances
;
1639 slp_instance instance
;
1641 gimple_stmt_iterator gsi
;
1643 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1644 bool data_dependence_in_bb
= false;
1646 current_vector_size
= 0;
1648 if (vect_print_dump_info (REPORT_DETAILS
))
1649 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
1651 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1653 gimple stmt
= gsi_stmt (gsi
);
1654 if (!is_gimple_debug (stmt
)
1655 && !gimple_nop_p (stmt
)
1656 && gimple_code (stmt
) != GIMPLE_LABEL
)
1660 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
1662 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1663 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
1669 bb_vinfo
= new_bb_vec_info (bb
);
1673 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1675 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1676 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1679 destroy_bb_vec_info (bb_vinfo
);
1683 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1684 if (!VEC_length (ddr_p
, ddrs
))
1686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1687 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1690 destroy_bb_vec_info (bb_vinfo
);
1694 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
,
1695 &data_dependence_in_bb
)
1697 || (data_dependence_in_bb
1698 && !vect_bb_vectorizable_with_dependencies (bb_vinfo
)))
1700 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1701 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1702 "in basic block.\n");
1704 destroy_bb_vec_info (bb_vinfo
);
1708 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1710 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1711 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1714 destroy_bb_vec_info (bb_vinfo
);
1718 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1720 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1721 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1724 destroy_bb_vec_info (bb_vinfo
);
1728 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1730 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1731 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1734 destroy_bb_vec_info (bb_vinfo
);
1738 /* Check the SLP opportunities in the basic block, analyze and build SLP
1740 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1742 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1743 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
1744 "in basic block.\n");
1746 destroy_bb_vec_info (bb_vinfo
);
1750 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1752 /* Mark all the statements that we want to vectorize as pure SLP and
1754 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1756 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1757 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
1760 if (!vect_slp_analyze_operations (bb_vinfo
))
1762 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1763 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
1765 destroy_bb_vec_info (bb_vinfo
);
1769 /* Cost model: check if the vectorization is worthwhile. */
1770 if (flag_vect_cost_model
1771 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
1773 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1774 fprintf (vect_dump
, "not vectorized: vectorization is not "
1777 destroy_bb_vec_info (bb_vinfo
);
1781 if (vect_print_dump_info (REPORT_DETAILS
))
1782 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
1788 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1789 the number of created vector stmts depends on the unrolling factor).
1790 However, the actual number of vector stmts for every SLP node depends on
1791 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1792 should be updated. In this function we assume that the inside costs
1793 calculated in vect_model_xxx_cost are linear in ncopies. */
1796 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
1798 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1799 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1800 slp_instance instance
;
1802 if (vect_print_dump_info (REPORT_SLP
))
1803 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
1805 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1806 /* We assume that costs are linear in ncopies. */
1807 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
1808 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1812 /* For constant and loop invariant defs of SLP_NODE this function returns
1813 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1814 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1815 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1816 REDUC_INDEX is the index of the reduction operand in the statements, unless
1820 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1821 unsigned int op_num
, unsigned int number_of_vectors
,
1824 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1825 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1826 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1830 int j
, number_of_places_left_in_vector
;
1833 int group_size
= VEC_length (gimple
, stmts
);
1834 unsigned int vec_num
, i
;
1835 int number_of_copies
= 1;
1836 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1837 bool constant_p
, is_store
;
1838 tree neutral_op
= NULL
;
1839 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1841 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1843 if (reduc_index
== -1)
1845 VEC_free (tree
, heap
, *vec_oprnds
);
1849 op_num
= reduc_index
- 1;
1850 op
= gimple_op (stmt
, op_num
+ 1);
1851 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1852 we need either neutral operands or the original operands. See
1853 get_initial_def_for_reduction() for details. */
1856 case WIDEN_SUM_EXPR
:
1862 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1863 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
1865 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
1870 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1871 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
1873 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
1878 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
1886 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1889 op
= gimple_assign_rhs1 (stmt
);
1894 op
= gimple_op (stmt
, op_num
+ 1);
1897 if (CONSTANT_CLASS_P (op
))
1902 /* For POINTER_PLUS_EXPR we use the type of the constant/invariant itself.
1903 If OP is the first operand of POINTER_PLUS_EXPR, its type is the type of
1904 the statement, so it's OK to use OP's type for both first and second
1906 if (code
== POINTER_PLUS_EXPR
)
1907 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1909 vector_type
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1911 gcc_assert (vector_type
);
1912 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1914 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1915 created vectors. It is greater than 1 if unrolling is performed.
1917 For example, we have two scalar operands, s1 and s2 (e.g., group of
1918 strided accesses of size two), while NUNITS is four (i.e., four scalars
1919 of this type can be packed in a vector). The output vector will contain
1920 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1923 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1924 containing the operands.
1926 For example, NUNITS is four as before, and the group size is 8
1927 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1928 {s5, s6, s7, s8}. */
1930 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1932 number_of_places_left_in_vector
= nunits
;
1933 for (j
= 0; j
< number_of_copies
; j
++)
1935 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1938 op
= gimple_assign_rhs1 (stmt
);
1940 op
= gimple_op (stmt
, op_num
+ 1);
1942 if (reduc_index
!= -1)
1944 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1945 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
1948 /* Get the def before the loop. */
1949 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
1950 loop_preheader_edge (loop
));
1951 if (j
!= (number_of_copies
- 1) && neutral_op
)
1955 /* Create 'vect_ = {op0,op1,...,opn}'. */
1956 t
= tree_cons (NULL_TREE
, op
, t
);
1958 number_of_places_left_in_vector
--;
1960 if (number_of_places_left_in_vector
== 0)
1962 number_of_places_left_in_vector
= nunits
;
1965 vec_cst
= build_vector (vector_type
, t
);
1967 vec_cst
= build_constructor_from_list (vector_type
, t
);
1968 VEC_quick_push (tree
, voprnds
,
1969 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
1975 /* Since the vectors are created in the reverse order, we should invert
1977 vec_num
= VEC_length (tree
, voprnds
);
1978 for (j
= vec_num
- 1; j
>= 0; j
--)
1980 vop
= VEC_index (tree
, voprnds
, j
);
1981 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1984 VEC_free (tree
, heap
, voprnds
);
1986 /* In case that VF is greater than the unrolling factor needed for the SLP
1987 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1988 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1989 to replicate the vectors. */
1990 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1992 tree neutral_vec
= NULL
;
1999 for (i
= 0; i
< (unsigned) nunits
; i
++)
2000 t
= tree_cons (NULL_TREE
, neutral_op
, t
);
2001 neutral_vec
= build_vector (vector_type
, t
);
2004 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2008 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2009 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2015 /* Get vectorized definitions from SLP_NODE that contains corresponding
2016 vectorized def-stmts. */
2019 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2022 gimple vec_def_stmt
;
2025 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2027 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2029 gcc_assert (vec_def_stmt
);
2030 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2031 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2036 /* Get vectorized definitions for SLP_NODE.
2037 If the scalar definitions are loop invariants or constants, collect them and
2038 call vect_get_constant_vectors() to create vector stmts.
2039 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2040 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2041 vect_get_slp_vect_defs() to retrieve them.
2042 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2043 the right node. This is used when the second operand must remain scalar. */
2046 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
2047 VEC (tree
,heap
) **vec_oprnds1
, int reduc_index
)
2050 enum tree_code code
;
2051 int number_of_vects
;
2052 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2054 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2055 /* The number of vector defs is determined by the number of vector statements
2056 in the node from which we get those statements. */
2057 if (SLP_TREE_LEFT (slp_node
))
2058 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
2061 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2062 /* Number of vector stmts was calculated according to LHS in
2063 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2064 necessary. See vect_get_smallest_scalar_type () for details. */
2065 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2067 if (rhs_size_unit
!= lhs_size_unit
)
2069 number_of_vects
*= rhs_size_unit
;
2070 number_of_vects
/= lhs_size_unit
;
2074 /* Allocate memory for vectorized defs. */
2075 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
2077 /* SLP_NODE corresponds either to a group of stores or to a group of
2078 unary/binary operations. We don't call this function for loads.
2079 For reduction defs we call vect_get_constant_vectors(), since we are
2080 looking for initial loop invariant values. */
2081 if (SLP_TREE_LEFT (slp_node
) && reduc_index
== -1)
2082 /* The defs are already vectorized. */
2083 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
2085 /* Build vectors from scalar defs. */
2086 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0, number_of_vects
,
2089 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
2090 /* Since we don't call this function with loads, this is a group of
2094 /* For reductions, we only need initial values. */
2095 if (reduc_index
!= -1)
2098 code
= gimple_assign_rhs_code (first_stmt
);
2099 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
2102 /* The number of vector defs is determined by the number of vector statements
2103 in the node from which we get those statements. */
2104 if (SLP_TREE_RIGHT (slp_node
))
2105 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
2107 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2109 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
2111 if (SLP_TREE_RIGHT (slp_node
))
2112 /* The defs are already vectorized. */
2113 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
2115 /* Build vectors from scalar defs. */
2116 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1, number_of_vects
, -1);
2120 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2121 building a vector of type MASK_TYPE from it) and two input vectors placed in
2122 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2123 shifting by STRIDE elements of DR_CHAIN for every copy.
2124 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2126 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2127 the created stmts must be inserted. */
2130 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2131 tree mask
, int first_vec_indx
, int second_vec_indx
,
2132 gimple_stmt_iterator
*gsi
, slp_tree node
,
2133 tree builtin_decl
, tree vectype
,
2134 VEC(tree
,heap
) *dr_chain
,
2135 int ncopies
, int vect_stmts_counter
)
2138 gimple perm_stmt
= NULL
;
2139 stmt_vec_info next_stmt_info
;
2141 tree first_vec
, second_vec
, data_ref
;
2143 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2145 /* Initialize the vect stmts of NODE to properly insert the generated
2147 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2148 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2149 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2151 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2152 for (i
= 0; i
< ncopies
; i
++)
2154 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2155 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2157 /* Generate the permute statement. */
2158 perm_stmt
= gimple_build_call (builtin_decl
,
2159 3, first_vec
, second_vec
, mask
);
2160 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2161 gimple_call_set_lhs (perm_stmt
, data_ref
);
2162 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2164 /* Store the vector statement in NODE. */
2165 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2166 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2168 first_vec_indx
+= stride
;
2169 second_vec_indx
+= stride
;
2172 /* Mark the scalar stmt as vectorized. */
2173 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2174 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2178 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2179 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2180 representation. Check that the mask is valid and return FALSE if not.
2181 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2182 the next vector, i.e., the current first vector is not needed. */
2185 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2186 int mask_nunits
, bool only_one_vec
, int index
,
2187 int *mask
, int *current_mask_element
,
2188 bool *need_next_vector
, int *number_of_mask_fixes
,
2189 bool *mask_fixed
, bool *needs_first_vector
)
2193 /* Convert to target specific representation. */
2194 *current_mask_element
= first_mask_element
+ m
;
2195 /* Adjust the value in case it's a mask for second and third vectors. */
2196 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2198 if (*current_mask_element
< mask_nunits
)
2199 *needs_first_vector
= true;
2201 /* We have only one input vector to permute but the mask accesses values in
2202 the next vector as well. */
2203 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2205 if (vect_print_dump_info (REPORT_DETAILS
))
2207 fprintf (vect_dump
, "permutation requires at least two vectors ");
2208 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2214 /* The mask requires the next vector. */
2215 if (*current_mask_element
>= mask_nunits
* 2)
2217 if (*needs_first_vector
|| *mask_fixed
)
2219 /* We either need the first vector too or have already moved to the
2220 next vector. In both cases, this permutation needs three
2222 if (vect_print_dump_info (REPORT_DETAILS
))
2224 fprintf (vect_dump
, "permutation requires at "
2225 "least three vectors ");
2226 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2232 /* We move to the next vector, dropping the first one and working with
2233 the second and the third - we need to adjust the values of the mask
2235 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2237 for (i
= 0; i
< index
; i
++)
2238 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2240 (*number_of_mask_fixes
)++;
2244 *need_next_vector
= *mask_fixed
;
2246 /* This was the last element of this mask. Start a new one. */
2247 if (index
== mask_nunits
- 1)
2249 *number_of_mask_fixes
= 1;
2250 *mask_fixed
= false;
2251 *needs_first_vector
= false;
2258 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2259 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2260 permute statements for SLP_NODE_INSTANCE. */
2262 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2263 gimple_stmt_iterator
*gsi
, int vf
,
2264 slp_instance slp_node_instance
, bool analyze_only
)
2266 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2267 tree mask_element_type
= NULL_TREE
, mask_type
;
2268 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
2270 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
2271 gimple next_scalar_stmt
;
2272 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2273 int first_mask_element
;
2274 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
2275 bool only_one_vec
= false, need_next_vector
= false;
2276 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2277 int number_of_mask_fixes
= 1;
2278 bool mask_fixed
= false;
2279 bool needs_first_vector
= false;
2281 if (!targetm
.vectorize
.builtin_vec_perm
)
2283 if (vect_print_dump_info (REPORT_DETAILS
))
2285 fprintf (vect_dump
, "no builtin for vect permute for ");
2286 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2292 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
2293 &mask_element_type
);
2294 if (!builtin_decl
|| !mask_element_type
)
2296 if (vect_print_dump_info (REPORT_DETAILS
))
2298 fprintf (vect_dump
, "no builtin for vect permute for ");
2299 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2305 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2306 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
2307 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
2308 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2309 scale
= mask_nunits
/ nunits
;
2310 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2312 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2313 unrolling factor. */
2314 orig_vec_stmts_num
= group_size
*
2315 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2316 if (orig_vec_stmts_num
== 1)
2317 only_one_vec
= true;
2319 /* Number of copies is determined by the final vectorization factor
2320 relatively to SLP_NODE_INSTANCE unrolling factor. */
2321 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2323 /* Generate permutation masks for every NODE. Number of masks for each NODE
2324 is equal to GROUP_SIZE.
2325 E.g., we have a group of three nodes with three loads from the same
2326 location in each node, and the vector size is 4. I.e., we have a
2327 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2328 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2329 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2332 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2333 scpecific type, e.g., in bytes for Altivec.
2334 The last mask is illegal since we assume two operands for permute
2335 operation, and the mask element values can't be outside that range.
2336 Hence, the last mask must be converted into {2,5,5,5}.
2337 For the first two permutations we need the first and the second input
2338 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2339 we need the second and the third vectors: {b1,c1,a2,b2} and
2342 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2346 vect_stmts_counter
= 0;
2348 first_vec_index
= vec_index
++;
2350 second_vec_index
= first_vec_index
;
2352 second_vec_index
= vec_index
++;
2354 for (j
= 0; j
< unroll_factor
; j
++)
2356 for (k
= 0; k
< group_size
; k
++)
2358 first_mask_element
= (i
+ j
* group_size
) * scale
;
2359 for (m
= 0; m
< scale
; m
++)
2361 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
2362 mask_nunits
, only_one_vec
, index
, mask
,
2363 ¤t_mask_element
, &need_next_vector
,
2364 &number_of_mask_fixes
, &mask_fixed
,
2365 &needs_first_vector
))
2368 mask
[index
++] = current_mask_element
;
2371 if (index
== mask_nunits
)
2373 tree mask_vec
= NULL
;
2375 while (--index
>= 0)
2377 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2378 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2380 mask_vec
= build_vector (mask_type
, mask_vec
);
2383 if (!targetm
.vectorize
.builtin_vec_perm_ok (vectype
,
2386 if (vect_print_dump_info (REPORT_DETAILS
))
2388 fprintf (vect_dump
, "unsupported vect permute ");
2389 print_generic_expr (vect_dump
, mask_vec
, 0);
2397 if (need_next_vector
)
2399 first_vec_index
= second_vec_index
;
2400 second_vec_index
= vec_index
;
2403 next_scalar_stmt
= VEC_index (gimple
,
2404 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2406 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2407 mask_vec
, first_vec_index
, second_vec_index
,
2408 gsi
, node
, builtin_decl
, vectype
, dr_chain
,
2409 ncopies
, vect_stmts_counter
++);
2422 /* Vectorize SLP instance tree in postorder. */
2425 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2426 unsigned int vectorization_factor
)
2429 bool strided_store
, is_store
;
2430 gimple_stmt_iterator si
;
2431 stmt_vec_info stmt_info
;
2432 unsigned int vec_stmts_size
, nunits
, group_size
;
2435 slp_tree loads_node
;
2440 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
2441 vectorization_factor
);
2442 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
2443 vectorization_factor
);
2445 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2446 stmt_info
= vinfo_for_stmt (stmt
);
2448 /* VECTYPE is the type of the destination. */
2449 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2450 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2451 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2453 /* For each SLP instance calculate number of vector stmts to be created
2454 for the scalar stmts in each node of the SLP tree. Number of vector
2455 elements in one vector iteration is the number of scalar elements in
2456 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2458 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2460 /* In case of load permutation we have to allocate vectorized statements for
2461 all the nodes that participate in that permutation. */
2462 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2464 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2466 if (!SLP_TREE_VEC_STMTS (loads_node
))
2468 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2470 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2475 if (!SLP_TREE_VEC_STMTS (node
))
2477 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2478 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2481 if (vect_print_dump_info (REPORT_DETAILS
))
2483 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2484 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2487 /* Loads should be inserted before the first load. */
2488 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2489 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2490 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2491 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2493 si
= gsi_for_stmt (stmt
);
2495 /* Stores should be inserted just before the last store. */
2496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2497 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2499 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2500 si
= gsi_for_stmt (last_store
);
2503 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2508 /* Generate vector code for all SLP instances in the loop/basic block. */
2511 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2513 VEC (slp_instance
, heap
) *slp_instances
;
2514 slp_instance instance
;
2516 bool is_store
= false;
2520 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2521 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2525 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2529 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2531 /* Schedule the tree of INSTANCE. */
2532 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2534 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2535 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2536 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2539 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2541 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2544 gimple_stmt_iterator gsi
;
2546 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2547 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2549 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2552 /* Free the attached stmt_vec_info and remove the stmt. */
2553 gsi
= gsi_for_stmt (store
);
2554 gsi_remove (&gsi
, true);
2555 free_stmt_vec_info (store
);
2563 /* Vectorize the basic block. */
2566 vect_slp_transform_bb (basic_block bb
)
2568 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2569 gimple_stmt_iterator si
;
2571 gcc_assert (bb_vinfo
);
2573 if (vect_print_dump_info (REPORT_DETAILS
))
2574 fprintf (vect_dump
, "SLPing BB\n");
2576 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2578 gimple stmt
= gsi_stmt (si
);
2579 stmt_vec_info stmt_info
;
2581 if (vect_print_dump_info (REPORT_DETAILS
))
2583 fprintf (vect_dump
, "------>SLPing statement: ");
2584 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2587 stmt_info
= vinfo_for_stmt (stmt
);
2588 gcc_assert (stmt_info
);
2590 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2591 if (STMT_SLP_TYPE (stmt_info
))
2593 vect_schedule_slp (NULL
, bb_vinfo
);
2598 mark_sym_for_renaming (gimple_vop (cfun
));
2599 /* The memory tags and pointers in vectorized statements need to
2600 have their SSA forms updated. FIXME, why can't this be delayed
2601 until all the loops have been transformed? */
2602 update_ssa (TODO_update_ssa
);
2604 if (vect_print_dump_info (REPORT_DETAILS
))
2605 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2607 destroy_bb_vec_info (bb_vinfo
);