1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb
)
48 gimple_stmt_iterator si
;
53 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
56 if (gimple_location (stmt
) != UNKNOWN_LOC
)
57 return gimple_location (stmt
);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node
)
72 if (SLP_TREE_LEFT (node
))
73 vect_free_slp_tree (SLP_TREE_LEFT (node
));
75 if (SLP_TREE_RIGHT (node
))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
78 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
80 if (SLP_TREE_VEC_STMTS (node
))
81 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
87 /* Free the memory allocated for the SLP instance. */
90 vect_free_slp_instance (slp_instance instance
)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
93 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
94 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
104 slp_tree slp_node
, gimple stmt
,
105 VEC (gimple
, heap
) **def_stmts0
,
106 VEC (gimple
, heap
) **def_stmts1
,
107 enum vect_def_type
*first_stmt_dt0
,
108 enum vect_def_type
*first_stmt_dt1
,
109 tree
*first_stmt_def0_type
,
110 tree
*first_stmt_def1_type
,
111 tree
*first_stmt_const_oprnd
,
112 int ncopies_for_cost
,
113 bool *pattern0
, bool *pattern1
)
116 unsigned int i
, number_of_oprnds
;
119 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
120 stmt_vec_info stmt_info
=
121 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
122 enum gimple_rhs_class rhs_class
;
123 struct loop
*loop
= NULL
;
126 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
128 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
129 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
131 for (i
= 0; i
< number_of_oprnds
; i
++)
133 oprnd
= gimple_op (stmt
, i
+ 1);
135 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
137 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
139 if (vect_print_dump_info (REPORT_SLP
))
141 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
151 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
152 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
153 && vinfo_for_stmt (def_stmt
)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
)))
156 if (!*first_stmt_dt0
)
160 if (i
== 1 && !*first_stmt_dt1
)
162 else if ((i
== 0 && !*pattern0
) || (i
== 1 && !*pattern1
))
164 if (vect_print_dump_info (REPORT_DETAILS
))
166 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
175 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
176 dt
[i
] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
178 if (*dt
== vect_unknown_def_type
)
180 if (vect_print_dump_info (REPORT_DETAILS
))
181 fprintf (vect_dump
, "Unsupported pattern.");
185 switch (gimple_code (def_stmt
))
188 def
= gimple_phi_result (def_stmt
);
192 def
= gimple_assign_lhs (def_stmt
);
196 if (vect_print_dump_info (REPORT_DETAILS
))
197 fprintf (vect_dump
, "unsupported defining stmt: ");
202 if (!*first_stmt_dt0
)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0
= dt
[i
];
207 *first_stmt_def0_type
= TREE_TYPE (def
);
209 *first_stmt_const_oprnd
= oprnd
;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
217 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
222 if (!*first_stmt_dt1
&& i
== 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1
= dt
[i
];
227 *first_stmt_def1_type
= TREE_TYPE (def
);
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd
)
234 if (vect_print_dump_info (REPORT_SLP
))
235 fprintf (vect_dump
, "Build SLP failed: two constant "
239 *first_stmt_const_oprnd
= oprnd
;
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
247 && (*first_stmt_dt0
!= dt
[i
]
248 || (*first_stmt_def0_type
&& def
249 && !types_compatible_p (*first_stmt_def0_type
,
252 && (*first_stmt_dt1
!= dt
[i
]
253 || (*first_stmt_def1_type
&& def
254 && !types_compatible_p (*first_stmt_def1_type
,
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd
),
260 if (vect_print_dump_info (REPORT_SLP
))
261 fprintf (vect_dump
, "Build SLP failed: different types ");
268 /* Check the types of the definitions. */
271 case vect_constant_def
:
272 case vect_external_def
:
275 case vect_internal_def
:
276 case vect_reduction_def
:
278 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
280 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
284 /* FORNOW: Not supported. */
285 if (vect_print_dump_info (REPORT_SLP
))
287 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
288 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
299 /* Recursively build an SLP tree starting from NODE.
300 Fail (and return FALSE) if def-stmts are not isomorphic, require data
301 permutation or are of unsupported types of operation. Otherwise, return
305 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
306 slp_tree
*node
, unsigned int group_size
,
307 int *inside_cost
, int *outside_cost
,
308 int ncopies_for_cost
, unsigned int *max_nunits
,
309 VEC (int, heap
) **load_permutation
,
310 VEC (slp_tree
, heap
) **loads
,
311 unsigned int vectorization_factor
)
313 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
314 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
316 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
317 gimple stmt
= VEC_index (gimple
, stmts
, 0);
318 enum vect_def_type first_stmt_dt0
= vect_uninitialized_def
;
319 enum vect_def_type first_stmt_dt1
= vect_uninitialized_def
;
320 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
;
321 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
323 bool stop_recursion
= false, need_same_oprnds
= false;
324 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
325 unsigned int ncopies
;
328 enum machine_mode optab_op2_mode
;
329 enum machine_mode vec_mode
;
330 tree first_stmt_const_oprnd
= NULL_TREE
;
331 struct data_reference
*first_dr
;
332 bool pattern0
= false, pattern1
= false;
334 bool permutation
= false;
335 unsigned int load_place
;
336 gimple first_load
, prev_first_load
= NULL
;
338 /* For every stmt in NODE find its def stmt/s. */
339 for (i
= 0; VEC_iterate (gimple
, stmts
, i
, stmt
); i
++)
341 if (vect_print_dump_info (REPORT_SLP
))
343 fprintf (vect_dump
, "Build SLP for ");
344 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
347 /* Fail to vectorize statements marked as unvectorizable. */
348 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
350 if (vect_print_dump_info (REPORT_SLP
))
353 "Build SLP failed: unvectorizable statement ");
354 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
360 lhs
= gimple_get_lhs (stmt
);
361 if (lhs
== NULL_TREE
)
363 if (vect_print_dump_info (REPORT_SLP
))
366 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
367 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
373 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
374 vectype
= get_vectype_for_scalar_type (scalar_type
);
377 if (vect_print_dump_info (REPORT_SLP
))
379 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
380 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
385 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
388 if (vect_print_dump_info (REPORT_SLP
))
389 fprintf (vect_dump
, "SLP with multiple types ");
391 /* FORNOW: multiple types are unsupported in BB SLP. */
396 /* In case of multiple types we need to detect the smallest type. */
397 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
398 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
400 if (is_gimple_call (stmt
))
401 rhs_code
= CALL_EXPR
;
403 rhs_code
= gimple_assign_rhs_code (stmt
);
405 /* Check the operation. */
408 first_stmt_code
= rhs_code
;
410 /* Shift arguments should be equal in all the packed stmts for a
411 vector shift with scalar shift operand. */
412 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
413 || rhs_code
== LROTATE_EXPR
414 || rhs_code
== RROTATE_EXPR
)
416 vec_mode
= TYPE_MODE (vectype
);
418 /* First see if we have a vector/vector shift. */
419 optab
= optab_for_tree_code (rhs_code
, vectype
,
423 || (optab
->handlers
[(int) vec_mode
].insn_code
424 == CODE_FOR_nothing
))
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab
= optab_for_tree_code (rhs_code
, vectype
,
432 if (vect_print_dump_info (REPORT_SLP
))
433 fprintf (vect_dump
, "Build SLP failed: no optab.");
436 icode
= (int) optab
->handlers
[(int) vec_mode
].insn_code
;
437 if (icode
== CODE_FOR_nothing
)
439 if (vect_print_dump_info (REPORT_SLP
))
440 fprintf (vect_dump
, "Build SLP failed: "
441 "op not supported by target.");
444 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
445 if (!VECTOR_MODE_P (optab_op2_mode
))
447 need_same_oprnds
= true;
448 first_op1
= gimple_assign_rhs2 (stmt
);
455 if (first_stmt_code
!= rhs_code
456 && (first_stmt_code
!= IMAGPART_EXPR
457 || rhs_code
!= REALPART_EXPR
)
458 && (first_stmt_code
!= REALPART_EXPR
459 || rhs_code
!= IMAGPART_EXPR
))
461 if (vect_print_dump_info (REPORT_SLP
))
464 "Build SLP failed: different operation in stmt ");
465 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
472 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
474 if (vect_print_dump_info (REPORT_SLP
))
477 "Build SLP failed: different shift arguments in ");
478 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
485 /* Strided store or load. */
486 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
488 if (REFERENCE_CLASS_P (lhs
))
491 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
492 stmt
, &def_stmts0
, &def_stmts1
,
495 &first_stmt_def0_type
,
496 &first_stmt_def1_type
,
497 &first_stmt_const_oprnd
,
499 &pattern0
, &pattern1
))
505 /* FORNOW: Check that there is no gap between the loads. */
506 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
507 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
508 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
511 if (vect_print_dump_info (REPORT_SLP
))
513 fprintf (vect_dump
, "Build SLP failed: strided "
515 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
521 /* Check that the size of interleaved loads group is not
522 greater than the SLP group size. */
523 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
525 if (vect_print_dump_info (REPORT_SLP
))
527 fprintf (vect_dump
, "Build SLP failed: the number of "
528 "interleaved loads is greater than"
529 " the SLP group size ");
530 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
536 first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
539 /* Check that there are no loads from different interleaving
540 chains in the same node. The only exception is complex
542 if (prev_first_load
!= first_load
543 && rhs_code
!= REALPART_EXPR
544 && rhs_code
!= IMAGPART_EXPR
)
546 if (vect_print_dump_info (REPORT_SLP
))
548 fprintf (vect_dump
, "Build SLP failed: different "
549 "interleaving chains in one node ");
550 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
557 prev_first_load
= first_load
;
559 if (first_load
== stmt
)
561 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
562 if (vect_supportable_dr_alignment (first_dr
)
563 == dr_unaligned_unsupported
)
565 if (vect_print_dump_info (REPORT_SLP
))
567 fprintf (vect_dump
, "Build SLP failed: unsupported "
569 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
575 /* Analyze costs (for the first stmt in the group). */
576 vect_model_load_cost (vinfo_for_stmt (stmt
),
577 ncopies_for_cost
, *node
);
580 /* Store the place of this load in the interleaving chain. In
581 case that permutation is needed we later decide if a specific
582 permutation is supported. */
583 load_place
= vect_get_place_in_interleaving_chain (stmt
,
588 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
590 /* We stop the tree when we reach a group of loads. */
591 stop_recursion
= true;
594 } /* Strided access. */
597 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
599 /* Not strided load. */
600 if (vect_print_dump_info (REPORT_SLP
))
602 fprintf (vect_dump
, "Build SLP failed: not strided load ");
603 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
606 /* FORNOW: Not strided loads are not supported. */
610 /* Not memory operation. */
611 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
612 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
614 if (vect_print_dump_info (REPORT_SLP
))
616 fprintf (vect_dump
, "Build SLP failed: operation");
617 fprintf (vect_dump
, " unsupported ");
618 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
624 /* Find the def-stmts. */
625 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
626 &def_stmts0
, &def_stmts1
,
627 &first_stmt_dt0
, &first_stmt_dt1
,
628 &first_stmt_def0_type
,
629 &first_stmt_def1_type
,
630 &first_stmt_const_oprnd
,
632 &pattern0
, &pattern1
))
637 /* Add the costs of the node to the overall instance costs. */
638 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
639 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
641 /* Strided loads were reached - stop the recursion. */
646 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
647 *inside_cost
+= TARG_VEC_PERMUTE_COST
* group_size
;
653 /* Create SLP_TREE nodes for the definition node/s. */
654 if (first_stmt_dt0
== vect_internal_def
)
656 slp_tree left_node
= XNEW (struct _slp_tree
);
657 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
658 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
659 SLP_TREE_LEFT (left_node
) = NULL
;
660 SLP_TREE_RIGHT (left_node
) = NULL
;
661 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
662 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
663 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &left_node
, group_size
,
664 inside_cost
, outside_cost
, ncopies_for_cost
,
665 max_nunits
, load_permutation
, loads
,
666 vectorization_factor
))
669 SLP_TREE_LEFT (*node
) = left_node
;
672 if (first_stmt_dt1
== vect_internal_def
)
674 slp_tree right_node
= XNEW (struct _slp_tree
);
675 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
676 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
677 SLP_TREE_LEFT (right_node
) = NULL
;
678 SLP_TREE_RIGHT (right_node
) = NULL
;
679 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
680 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
681 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &right_node
, group_size
,
682 inside_cost
, outside_cost
, ncopies_for_cost
,
683 max_nunits
, load_permutation
, loads
,
684 vectorization_factor
))
687 SLP_TREE_RIGHT (*node
) = right_node
;
695 vect_print_slp_tree (slp_tree node
)
703 fprintf (vect_dump
, "node ");
704 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
706 fprintf (vect_dump
, "\n\tstmt %d ", i
);
707 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
709 fprintf (vect_dump
, "\n");
711 vect_print_slp_tree (SLP_TREE_LEFT (node
));
712 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
716 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
717 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
718 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
719 stmts in NODE are to be marked. */
722 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
730 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
732 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
734 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
735 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
739 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
742 vect_mark_slp_stmts_relevant (slp_tree node
)
746 stmt_vec_info stmt_info
;
751 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
753 stmt_info
= vinfo_for_stmt (stmt
);
754 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
755 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
756 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
759 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node
));
760 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node
));
764 /* Check if the permutation required by the SLP INSTANCE is supported.
765 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
768 vect_supported_slp_permutation_p (slp_instance instance
)
770 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
771 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
772 gimple first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
773 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
775 slp_tree
*tmp_loads
= NULL
;
776 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
779 /* FORNOW: The only supported loads permutation is loads from the same
780 location in all the loads in the node, when the data-refs in
781 nodes of LOADS constitute an interleaving chain.
782 Sort the nodes according to the order of accesses in the chain. */
783 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
785 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
786 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
787 i
+= group_size
, j
++)
789 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
790 /* Check that the loads are all in the same interleaving chain. */
791 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt
)) != first_load
)
793 if (vect_print_dump_info (REPORT_DETAILS
))
795 fprintf (vect_dump
, "Build SLP failed: unsupported data "
797 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
804 tmp_loads
[index
] = load
;
807 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
808 for (i
= 0; i
< group_size
; i
++)
809 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
811 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
812 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
815 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
816 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
824 /* Rearrange the statements of NODE according to PERMUTATION. */
827 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
828 VEC (int, heap
) *permutation
)
831 VEC (gimple
, heap
) *tmp_stmts
;
832 unsigned int index
, i
;
837 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node
), group_size
, permutation
);
838 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node
), group_size
, permutation
);
840 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
841 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
843 for (i
= 0; i
< group_size
; i
++)
844 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
846 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
848 index
= VEC_index (int, permutation
, i
);
849 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
852 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
853 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
857 /* Check if the required load permutation is supported.
858 LOAD_PERMUTATION contains a list of indices of the loads.
859 In SLP this permutation is relative to the order of strided stores that are
860 the base of the SLP instance. */
863 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
864 VEC (int, heap
) *load_permutation
)
866 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
867 bool supported
, bad_permutation
= false;
872 /* FORNOW: permutations are only supported in SLP. */
876 if (vect_print_dump_info (REPORT_SLP
))
878 fprintf (vect_dump
, "Load permutation ");
879 for (i
= 0; VEC_iterate (int, load_permutation
, i
, next
); i
++)
880 fprintf (vect_dump
, "%d ", next
);
883 /* In case of reduction every load permutation is allowed, since the order
884 of the reduction statements is not important (as opposed to the case of
885 strided stores). The only condition we need to check is that all the
886 load nodes are of the same size and have the same permutation (and then
887 rearrange all the nodes of the SLP instance according to this
890 /* Check that all the load nodes are of the same size. */
892 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
);
894 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
895 != (unsigned) group_size
)
898 node
= SLP_INSTANCE_TREE (slp_instn
);
899 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
900 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
901 instance, not all the loads belong to the same node or interleaving
902 group. Hence, we need to divide them into groups according to
904 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
906 /* Reduction (there are no data-refs in the root). */
907 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
909 int first_group_load_index
;
911 /* Compare all the permutation sequences to the first one. */
912 for (i
= 1; i
< number_of_groups
; i
++)
915 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
917 next
= VEC_index (int, load_permutation
, j
);
918 first_group_load_index
= VEC_index (int, load_permutation
, k
);
920 if (next
!= first_group_load_index
)
922 bad_permutation
= true;
933 if (!bad_permutation
)
935 /* This permutaion is valid for reduction. Since the order of the
936 statements in the nodes is not important unless they are memory
937 accesses, we can rearrange the statements in all the nodes
938 according to the order of the loads. */
939 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
941 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
946 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
947 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
948 well (unless it's reduction). */
949 if (VEC_length (int, load_permutation
)
950 != (unsigned int) (group_size
* group_size
))
954 load_index
= sbitmap_alloc (group_size
);
955 sbitmap_zero (load_index
);
956 for (j
= 0; j
< group_size
; j
++)
958 for (i
= j
* group_size
, k
= 0;
959 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
962 if (i
!= j
* group_size
&& next
!= prev
)
971 if (TEST_BIT (load_index
, prev
))
977 SET_BIT (load_index
, prev
);
980 for (j
= 0; j
< group_size
; j
++)
981 if (!TEST_BIT (load_index
, j
))
984 sbitmap_free (load_index
);
986 if (supported
&& i
== group_size
* group_size
987 && vect_supported_slp_permutation_p (slp_instn
))
994 /* Find the first load in the loop that belongs to INSTANCE.
995 When loads are in several SLP nodes, there can be a case in which the first
996 load does not appear in the first SLP node to be transformed, causing
997 incorrect order of statements. Since we generate all the loads together,
998 they must be inserted before the first load of the SLP instance and not
999 before the first load of the first node of the instance. */
1001 vect_find_first_load_in_slp_instance (slp_instance instance
)
1005 gimple first_load
= NULL
, load
;
1008 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
);
1011 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
);
1013 first_load
= get_earlier_stmt (load
, first_load
);
1019 /* Analyze an SLP instance starting from a group of strided stores. Call
1020 vect_build_slp_tree to build a tree of packed stmts if possible.
1021 Return FALSE if it's impossible to SLP any stmt in the loop. */
1024 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1027 slp_instance new_instance
;
1028 slp_tree node
= XNEW (struct _slp_tree
);
1029 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1030 unsigned int unrolling_factor
= 1, nunits
;
1031 tree vectype
, scalar_type
= NULL_TREE
;
1033 unsigned int vectorization_factor
= 0;
1034 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1035 unsigned int max_nunits
= 0;
1036 VEC (int, heap
) *load_permutation
;
1037 VEC (slp_tree
, heap
) *loads
;
1038 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1042 scalar_type
= TREE_TYPE (DR_REF (dr
));
1043 vectype
= get_vectype_for_scalar_type (scalar_type
);
1044 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1048 gcc_assert (loop_vinfo
);
1049 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1050 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1055 if (vect_print_dump_info (REPORT_SLP
))
1057 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1058 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1064 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1066 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1068 /* No multitypes in BB SLP. */
1069 vectorization_factor
= nunits
;
1071 /* Calculate the unrolling factor. */
1072 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1073 if (unrolling_factor
!= 1 && !loop_vinfo
)
1075 if (vect_print_dump_info (REPORT_SLP
))
1076 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1082 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1083 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
1087 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1090 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1091 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1096 /* Collect reduction statements. */
1097 for (i
= 0; VEC_iterate (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
,
1101 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1102 if (vect_print_dump_info (REPORT_DETAILS
))
1104 fprintf (vect_dump
, "pushing reduction into node: ");
1105 print_gimple_stmt (vect_dump
, next
, 0, TDF_SLIM
);
1110 SLP_TREE_VEC_STMTS (node
) = NULL
;
1111 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
1112 SLP_TREE_LEFT (node
) = NULL
;
1113 SLP_TREE_RIGHT (node
) = NULL
;
1114 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
1115 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
1117 /* Calculate the number of vector stmts to create based on the unrolling
1118 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1119 GROUP_SIZE / NUNITS otherwise. */
1120 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1122 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1123 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1125 /* Build the tree for the SLP instance. */
1126 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1127 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1128 &max_nunits
, &load_permutation
, &loads
,
1129 vectorization_factor
))
1131 /* Create a new SLP instance. */
1132 new_instance
= XNEW (struct _slp_instance
);
1133 SLP_INSTANCE_TREE (new_instance
) = node
;
1134 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1135 /* Calculate the unrolling factor based on the smallest type in the
1137 if (max_nunits
> nunits
)
1138 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1141 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1142 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1143 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1144 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1145 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1146 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1147 if (VEC_length (slp_tree
, loads
))
1149 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1152 if (vect_print_dump_info (REPORT_SLP
))
1154 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1156 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1159 vect_free_slp_instance (new_instance
);
1163 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1164 = vect_find_first_load_in_slp_instance (new_instance
);
1167 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1170 VEC_safe_push (slp_instance
, heap
,
1171 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1174 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1177 if (vect_print_dump_info (REPORT_SLP
))
1178 vect_print_slp_tree (node
);
1183 /* Failed to SLP. */
1184 /* Free the allocated memory. */
1185 vect_free_slp_tree (node
);
1186 VEC_free (int, heap
, load_permutation
);
1187 VEC_free (slp_tree
, heap
, loads
);
1193 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1194 trees of packed scalar stmts if SLP is possible. */
1197 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1200 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
;
1204 if (vect_print_dump_info (REPORT_SLP
))
1205 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1209 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1210 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1213 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1215 /* Find SLP sequences starting from groups of strided stores. */
1216 for (i
= 0; VEC_iterate (gimple
, strided_stores
, i
, store
); i
++)
1217 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, store
))
1220 if (bb_vinfo
&& !ok
)
1222 if (vect_print_dump_info (REPORT_SLP
))
1223 fprintf (vect_dump
, "Failed to SLP the basic block.");
1228 /* Find SLP sequences starting from groups of reductions. */
1229 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1230 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1231 VEC_index (gimple
, reductions
, 0)))
1238 /* For each possible SLP instance decide whether to SLP it and calculate overall
1239 unrolling factor needed to SLP the loop. */
1242 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1244 unsigned int i
, unrolling_factor
= 1;
1245 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1246 slp_instance instance
;
1247 int decided_to_slp
= 0;
1249 if (vect_print_dump_info (REPORT_SLP
))
1250 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1252 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1254 /* FORNOW: SLP if you can. */
1255 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1256 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1258 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1259 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1260 loop-based vectorization. Such stmts will be marked as HYBRID. */
1261 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1265 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1267 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1268 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1269 decided_to_slp
, unrolling_factor
);
1273 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1274 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1277 vect_detect_hybrid_slp_stmts (slp_tree node
)
1281 imm_use_iterator imm_iter
;
1283 stmt_vec_info stmt_vinfo
;
1288 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
1289 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1290 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1291 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1292 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1293 && !STMT_SLP_TYPE (stmt_vinfo
)
1294 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1295 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1296 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1297 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1298 == vect_reduction_def
))
1299 vect_mark_slp_stmts (node
, hybrid
, i
);
1301 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
1302 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
1306 /* Find stmts that must be both vectorized and SLPed. */
1309 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1312 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1313 slp_instance instance
;
1315 if (vect_print_dump_info (REPORT_SLP
))
1316 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1318 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1319 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1323 /* Create and initialize a new bb_vec_info struct for BB, as well as
1324 stmt_vec_info structs for all the stmts in it. */
1327 new_bb_vec_info (basic_block bb
)
1329 bb_vec_info res
= NULL
;
1330 gimple_stmt_iterator gsi
;
1332 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1333 BB_VINFO_BB (res
) = bb
;
1335 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1337 gimple stmt
= gsi_stmt (gsi
);
1338 gimple_set_uid (stmt
, 0);
1339 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1342 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1343 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1350 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1351 stmts in the basic block. */
1354 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1357 gimple_stmt_iterator si
;
1362 bb
= BB_VINFO_BB (bb_vinfo
);
1364 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1366 gimple stmt
= gsi_stmt (si
);
1367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1370 /* Free stmt_vec_info. */
1371 free_stmt_vec_info (stmt
);
1374 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1375 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1381 /* Analyze statements contained in SLP tree node after recursively analyzing
1382 the subtree. Return TRUE if the operations are supported. */
1385 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1394 if (!vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_LEFT (node
))
1395 || !vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_RIGHT (node
)))
1398 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
1400 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1401 gcc_assert (stmt_info
);
1402 gcc_assert (PURE_SLP_STMT (stmt_info
));
1404 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1412 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1413 operations are supported. */
1416 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1418 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1419 slp_instance instance
;
1422 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1424 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1425 SLP_INSTANCE_TREE (instance
)))
1427 vect_free_slp_instance (instance
);
1428 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1434 if (!VEC_length (slp_instance
, slp_instances
))
1441 /* Cheick if the basic block can be vectorized. */
1444 vect_slp_analyze_bb (basic_block bb
)
1446 bb_vec_info bb_vinfo
;
1447 VEC (ddr_p
, heap
) *ddrs
;
1448 VEC (slp_instance
, heap
) *slp_instances
;
1449 slp_instance instance
;
1451 gimple_stmt_iterator gsi
;
1453 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1455 if (vect_print_dump_info (REPORT_DETAILS
))
1456 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
1458 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1460 gimple stmt
= gsi_stmt (gsi
);
1461 if (!is_gimple_debug (stmt
)
1462 && !gimple_nop_p (stmt
)
1463 && gimple_code (stmt
) != GIMPLE_LABEL
)
1467 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
1469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1470 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
1476 bb_vinfo
= new_bb_vec_info (bb
);
1480 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1483 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1486 destroy_bb_vec_info (bb_vinfo
);
1490 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1491 if (!VEC_length (ddr_p
, ddrs
))
1493 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1494 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1497 destroy_bb_vec_info (bb_vinfo
);
1501 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
1504 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1505 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1506 "in basic block.\n");
1508 destroy_bb_vec_info (bb_vinfo
);
1512 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1514 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1515 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1518 destroy_bb_vec_info (bb_vinfo
);
1522 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1525 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1528 destroy_bb_vec_info (bb_vinfo
);
1532 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1535 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1538 destroy_bb_vec_info (bb_vinfo
);
1542 /* Check the SLP opportunities in the basic block, analyze and build SLP
1544 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1547 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
1548 "in basic block.\n");
1550 destroy_bb_vec_info (bb_vinfo
);
1554 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1556 /* Mark all the statements that we want to vectorize as pure SLP and
1558 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1560 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1561 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
1564 if (!vect_slp_analyze_operations (bb_vinfo
))
1566 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1567 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
1569 destroy_bb_vec_info (bb_vinfo
);
1573 if (vect_print_dump_info (REPORT_DETAILS
))
1574 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
1580 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1581 the number of created vector stmts depends on the unrolling factor). However,
1582 the actual number of vector stmts for every SLP node depends on VF which is
1583 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1584 In this function we assume that the inside costs calculated in
1585 vect_model_xxx_cost are linear in ncopies. */
1588 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
1590 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1591 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1592 slp_instance instance
;
1594 if (vect_print_dump_info (REPORT_SLP
))
1595 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
1597 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1598 /* We assume that costs are linear in ncopies. */
1599 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
1600 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1604 /* For constant and loop invariant defs of SLP_NODE this function returns
1605 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1606 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1607 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1608 REDUC_INDEX is the index of the reduction operand in the statements, unless
1612 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1613 unsigned int op_num
, unsigned int number_of_vectors
,
1616 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1617 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1618 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1622 int j
, number_of_places_left_in_vector
;
1625 int group_size
= VEC_length (gimple
, stmts
);
1626 unsigned int vec_num
, i
;
1627 int number_of_copies
= 1;
1628 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1629 bool constant_p
, is_store
;
1630 tree neutral_op
= NULL
;
1632 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1634 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1635 if (reduc_index
== -1)
1637 VEC_free (tree
, heap
, *vec_oprnds
);
1641 op_num
= reduc_index
- 1;
1642 op
= gimple_op (stmt
, op_num
+ 1);
1643 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1644 we need either neutral operands or the original operands. See
1645 get_initial_def_for_reduction() for details. */
1648 case WIDEN_SUM_EXPR
:
1654 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1655 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
1657 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
1663 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1664 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
1666 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
1675 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1678 op
= gimple_assign_rhs1 (stmt
);
1683 op
= gimple_op (stmt
, op_num
+ 1);
1686 if (CONSTANT_CLASS_P (op
))
1691 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1692 gcc_assert (vector_type
);
1694 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1696 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1697 created vectors. It is greater than 1 if unrolling is performed.
1699 For example, we have two scalar operands, s1 and s2 (e.g., group of
1700 strided accesses of size two), while NUNITS is four (i.e., four scalars
1701 of this type can be packed in a vector). The output vector will contain
1702 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1705 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1706 containing the operands.
1708 For example, NUNITS is four as before, and the group size is 8
1709 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1710 {s5, s6, s7, s8}. */
1712 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1714 number_of_places_left_in_vector
= nunits
;
1715 for (j
= 0; j
< number_of_copies
; j
++)
1717 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1720 op
= gimple_assign_rhs1 (stmt
);
1722 op
= gimple_op (stmt
, op_num
+ 1);
1724 if (reduc_index
!= -1)
1726 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1727 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
1730 /* Get the def before the loop. */
1731 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
1732 loop_preheader_edge (loop
));
1733 if (j
!= (number_of_copies
- 1) && neutral_op
)
1737 /* Create 'vect_ = {op0,op1,...,opn}'. */
1738 t
= tree_cons (NULL_TREE
, op
, t
);
1740 number_of_places_left_in_vector
--;
1742 if (number_of_places_left_in_vector
== 0)
1744 number_of_places_left_in_vector
= nunits
;
1747 vec_cst
= build_vector (vector_type
, t
);
1749 vec_cst
= build_constructor_from_list (vector_type
, t
);
1750 VEC_quick_push (tree
, voprnds
,
1751 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
1757 /* Since the vectors are created in the reverse order, we should invert
1759 vec_num
= VEC_length (tree
, voprnds
);
1760 for (j
= vec_num
- 1; j
>= 0; j
--)
1762 vop
= VEC_index (tree
, voprnds
, j
);
1763 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1766 VEC_free (tree
, heap
, voprnds
);
1768 /* In case that VF is greater than the unrolling factor needed for the SLP
1769 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1770 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1771 to replicate the vectors. */
1772 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1774 tree neutral_vec
= NULL
;
1781 for (i
= 0; i
< (unsigned) nunits
; i
++)
1782 t
= tree_cons (NULL_TREE
, neutral_op
, t
);
1783 neutral_vec
= build_vector (vector_type
, t
);
1786 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
1790 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
1791 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1797 /* Get vectorized definitions from SLP_NODE that contains corresponding
1798 vectorized def-stmts. */
1801 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
1804 gimple vec_def_stmt
;
1807 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
1810 VEC_iterate (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
);
1813 gcc_assert (vec_def_stmt
);
1814 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
1815 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
1820 /* Get vectorized definitions for SLP_NODE.
1821 If the scalar definitions are loop invariants or constants, collect them and
1822 call vect_get_constant_vectors() to create vector stmts.
1823 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1824 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1825 vect_get_slp_vect_defs() to retrieve them.
1826 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1827 the right node. This is used when the second operand must remain scalar. */
1830 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
1831 VEC (tree
,heap
) **vec_oprnds1
, int reduc_index
)
1834 enum tree_code code
;
1835 int number_of_vects
;
1836 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
1838 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
1839 /* The number of vector defs is determined by the number of vector statements
1840 in the node from which we get those statements. */
1841 if (SLP_TREE_LEFT (slp_node
))
1842 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
1845 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1846 /* Number of vector stmts was calculated according to LHS in
1847 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1848 necessary. See vect_get_smallest_scalar_type() for details. */
1849 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
1851 if (rhs_size_unit
!= lhs_size_unit
)
1853 number_of_vects
*= rhs_size_unit
;
1854 number_of_vects
/= lhs_size_unit
;
1858 /* Allocate memory for vectorized defs. */
1859 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
1861 /* SLP_NODE corresponds either to a group of stores or to a group of
1862 unary/binary operations. We don't call this function for loads.
1863 For reduction defs we call vect_get_constant_vectors(), since we are
1864 looking for initial loop invariant values. */
1865 if (SLP_TREE_LEFT (slp_node
) && reduc_index
== -1)
1866 /* The defs are already vectorized. */
1867 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
1869 /* Build vectors from scalar defs. */
1870 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0, number_of_vects
,
1873 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
1874 /* Since we don't call this function with loads, this is a group of
1878 /* For reductions, we only need initial values. */
1879 if (reduc_index
!= -1)
1882 code
= gimple_assign_rhs_code (first_stmt
);
1883 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
1886 /* The number of vector defs is determined by the number of vector statements
1887 in the node from which we get those statements. */
1888 if (SLP_TREE_RIGHT (slp_node
))
1889 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
1891 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1893 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
1895 if (SLP_TREE_RIGHT (slp_node
))
1896 /* The defs are already vectorized. */
1897 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
1899 /* Build vectors from scalar defs. */
1900 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1, number_of_vects
, -1);
1904 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1905 building a vector of type MASK_TYPE from it) and two input vectors placed in
1906 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1907 shifting by STRIDE elements of DR_CHAIN for every copy.
1908 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1910 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1911 the created stmts must be inserted. */
1914 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
1915 tree mask
, int first_vec_indx
, int second_vec_indx
,
1916 gimple_stmt_iterator
*gsi
, slp_tree node
,
1917 tree builtin_decl
, tree vectype
,
1918 VEC(tree
,heap
) *dr_chain
,
1919 int ncopies
, int vect_stmts_counter
)
1922 gimple perm_stmt
= NULL
;
1923 stmt_vec_info next_stmt_info
;
1925 tree first_vec
, second_vec
, data_ref
;
1926 VEC (tree
, heap
) *params
= NULL
;
1928 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
1930 /* Initialize the vect stmts of NODE to properly insert the generated
1932 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
1933 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
1934 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
1936 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
1937 for (i
= 0; i
< ncopies
; i
++)
1939 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
1940 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
1942 /* Build argument list for the vectorized call. */
1943 VEC_free (tree
, heap
, params
);
1944 params
= VEC_alloc (tree
, heap
, 3);
1945 VEC_quick_push (tree
, params
, first_vec
);
1946 VEC_quick_push (tree
, params
, second_vec
);
1947 VEC_quick_push (tree
, params
, mask
);
1949 /* Generate the permute statement. */
1950 perm_stmt
= gimple_build_call_vec (builtin_decl
, params
);
1951 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
1952 gimple_call_set_lhs (perm_stmt
, data_ref
);
1953 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
1955 /* Store the vector statement in NODE. */
1956 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
1957 stride
* i
+ vect_stmts_counter
, perm_stmt
);
1959 first_vec_indx
+= stride
;
1960 second_vec_indx
+= stride
;
1963 /* Mark the scalar stmt as vectorized. */
1964 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
1965 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
1969 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1970 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1971 representation. Check that the mask is valid and return FALSE if not.
1972 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1973 the next vector, i.e., the current first vector is not needed. */
1976 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
1977 int mask_nunits
, bool only_one_vec
, int index
,
1978 int *mask
, int *current_mask_element
,
1979 bool *need_next_vector
)
1982 static int number_of_mask_fixes
= 1;
1983 static bool mask_fixed
= false;
1984 static bool needs_first_vector
= false;
1986 /* Convert to target specific representation. */
1987 *current_mask_element
= first_mask_element
+ m
;
1988 /* Adjust the value in case it's a mask for second and third vectors. */
1989 *current_mask_element
-= mask_nunits
* (number_of_mask_fixes
- 1);
1991 if (*current_mask_element
< mask_nunits
)
1992 needs_first_vector
= true;
1994 /* We have only one input vector to permute but the mask accesses values in
1995 the next vector as well. */
1996 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
1998 if (vect_print_dump_info (REPORT_DETAILS
))
2000 fprintf (vect_dump
, "permutation requires at least two vectors ");
2001 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2007 /* The mask requires the next vector. */
2008 if (*current_mask_element
>= mask_nunits
* 2)
2010 if (needs_first_vector
|| mask_fixed
)
2012 /* We either need the first vector too or have already moved to the
2013 next vector. In both cases, this permutation needs three
2015 if (vect_print_dump_info (REPORT_DETAILS
))
2017 fprintf (vect_dump
, "permutation requires at "
2018 "least three vectors ");
2019 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2025 /* We move to the next vector, dropping the first one and working with
2026 the second and the third - we need to adjust the values of the mask
2028 *current_mask_element
-= mask_nunits
* number_of_mask_fixes
;
2030 for (i
= 0; i
< index
; i
++)
2031 mask
[i
] -= mask_nunits
* number_of_mask_fixes
;
2033 (number_of_mask_fixes
)++;
2037 *need_next_vector
= mask_fixed
;
2039 /* This was the last element of this mask. Start a new one. */
2040 if (index
== mask_nunits
- 1)
2042 number_of_mask_fixes
= 1;
2044 needs_first_vector
= false;
2051 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2052 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2053 permute statements for SLP_NODE_INSTANCE. */
2055 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2056 gimple_stmt_iterator
*gsi
, int vf
,
2057 slp_instance slp_node_instance
, bool analyze_only
)
2059 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2060 tree mask_element_type
= NULL_TREE
, mask_type
;
2061 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
2063 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
2064 gimple next_scalar_stmt
;
2065 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2066 int first_mask_element
;
2067 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
2068 bool only_one_vec
= false, need_next_vector
= false;
2069 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2071 if (!targetm
.vectorize
.builtin_vec_perm
)
2073 if (vect_print_dump_info (REPORT_DETAILS
))
2075 fprintf (vect_dump
, "no builtin for vect permute for ");
2076 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2082 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
2083 &mask_element_type
);
2084 if (!builtin_decl
|| !mask_element_type
)
2086 if (vect_print_dump_info (REPORT_DETAILS
))
2088 fprintf (vect_dump
, "no builtin for vect permute for ");
2089 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2095 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2096 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
2097 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
2098 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2099 scale
= mask_nunits
/ nunits
;
2100 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2102 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2103 unrolling factor. */
2104 orig_vec_stmts_num
= group_size
*
2105 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2106 if (orig_vec_stmts_num
== 1)
2107 only_one_vec
= true;
2109 /* Number of copies is determined by the final vectorization factor
2110 relatively to SLP_NODE_INSTANCE unrolling factor. */
2111 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2113 /* Generate permutation masks for every NODE. Number of masks for each NODE
2114 is equal to GROUP_SIZE.
2115 E.g., we have a group of three nodes with three loads from the same
2116 location in each node, and the vector size is 4. I.e., we have a
2117 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2118 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2119 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2122 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2123 scpecific type, e.g., in bytes for Altivec.
2124 The last mask is illegal since we assume two operands for permute
2125 operation, and the mask element values can't be outside that range. Hence,
2126 the last mask must be converted into {2,5,5,5}.
2127 For the first two permutations we need the first and the second input
2128 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2129 we need the second and the third vectors: {b1,c1,a2,b2} and
2133 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
),
2139 vect_stmts_counter
= 0;
2141 first_vec_index
= vec_index
++;
2143 second_vec_index
= first_vec_index
;
2145 second_vec_index
= vec_index
++;
2147 for (j
= 0; j
< unroll_factor
; j
++)
2149 for (k
= 0; k
< group_size
; k
++)
2151 first_mask_element
= (i
+ j
* group_size
) * scale
;
2152 for (m
= 0; m
< scale
; m
++)
2154 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
2155 mask_nunits
, only_one_vec
, index
, mask
,
2156 ¤t_mask_element
, &need_next_vector
))
2159 mask
[index
++] = current_mask_element
;
2162 if (index
== mask_nunits
)
2164 tree mask_vec
= NULL
;
2166 while (--index
>= 0)
2168 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2169 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2171 mask_vec
= build_vector (mask_type
, mask_vec
);
2174 if (!targetm
.vectorize
.builtin_vec_perm_ok (vectype
,
2177 if (vect_print_dump_info (REPORT_DETAILS
))
2179 fprintf (vect_dump
, "unsupported vect permute ");
2180 print_generic_expr (vect_dump
, mask_vec
, 0);
2188 if (need_next_vector
)
2190 first_vec_index
= second_vec_index
;
2191 second_vec_index
= vec_index
;
2194 next_scalar_stmt
= VEC_index (gimple
,
2195 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2197 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2198 mask_vec
, first_vec_index
, second_vec_index
,
2199 gsi
, node
, builtin_decl
, vectype
, dr_chain
,
2200 ncopies
, vect_stmts_counter
++);
2213 /* Vectorize SLP instance tree in postorder. */
2216 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2217 unsigned int vectorization_factor
)
2220 bool strided_store
, is_store
;
2221 gimple_stmt_iterator si
;
2222 stmt_vec_info stmt_info
;
2223 unsigned int vec_stmts_size
, nunits
, group_size
;
2226 slp_tree loads_node
;
2231 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
2232 vectorization_factor
);
2233 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
2234 vectorization_factor
);
2236 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2237 stmt_info
= vinfo_for_stmt (stmt
);
2239 /* VECTYPE is the type of the destination. */
2240 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2241 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2242 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2244 /* For each SLP instance calculate number of vector stmts to be created
2245 for the scalar stmts in each node of the SLP tree. Number of vector
2246 elements in one vector iteration is the number of scalar elements in
2247 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2249 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2251 /* In case of load permutation we have to allocate vectorized statements for
2252 all the nodes that participate in that permutation. */
2253 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2256 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
);
2259 if (!SLP_TREE_VEC_STMTS (loads_node
))
2261 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2263 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2268 if (!SLP_TREE_VEC_STMTS (node
))
2270 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2271 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2274 if (vect_print_dump_info (REPORT_DETAILS
))
2276 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2277 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2280 /* Loads should be inserted before the first load. */
2281 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2282 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2283 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2284 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2286 si
= gsi_for_stmt (stmt
);
2288 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2294 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2296 VEC (slp_instance
, heap
) *slp_instances
;
2297 slp_instance instance
;
2299 bool is_store
= false;
2303 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2304 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2308 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2312 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
2314 /* Schedule the tree of INSTANCE. */
2315 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2317 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2318 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2319 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2322 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
2324 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2327 gimple_stmt_iterator gsi
;
2329 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2330 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2332 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2335 /* Free the attached stmt_vec_info and remove the stmt. */
2336 gsi
= gsi_for_stmt (store
);
2337 gsi_remove (&gsi
, true);
2338 free_stmt_vec_info (store
);
2346 /* Vectorize the basic block. */
2349 vect_slp_transform_bb (basic_block bb
)
2351 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2352 gimple_stmt_iterator si
;
2354 gcc_assert (bb_vinfo
);
2356 if (vect_print_dump_info (REPORT_DETAILS
))
2357 fprintf (vect_dump
, "SLPing BB\n");
2359 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2361 gimple stmt
= gsi_stmt (si
);
2362 stmt_vec_info stmt_info
;
2364 if (vect_print_dump_info (REPORT_DETAILS
))
2366 fprintf (vect_dump
, "------>SLPing statement: ");
2367 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2370 stmt_info
= vinfo_for_stmt (stmt
);
2371 gcc_assert (stmt_info
);
2373 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2374 if (STMT_SLP_TYPE (stmt_info
))
2376 vect_schedule_slp (NULL
, bb_vinfo
);
2381 mark_sym_for_renaming (gimple_vop (cfun
));
2382 /* The memory tags and pointers in vectorized statements need to
2383 have their SSA forms updated. FIXME, why can't this be delayed
2384 until all the loops have been transformed? */
2385 update_ssa (TODO_update_ssa
);
2387 if (vect_print_dump_info (REPORT_DETAILS
))
2388 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2390 destroy_bb_vec_info (bb_vinfo
);