1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb
)
49 gimple_stmt_iterator si
;
54 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
57 if (gimple_location (stmt
) != UNKNOWN_LOC
)
58 return gimple_location (stmt
);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node
)
73 if (SLP_TREE_LEFT (node
))
74 vect_free_slp_tree (SLP_TREE_LEFT (node
));
76 if (SLP_TREE_RIGHT (node
))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
79 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
81 if (SLP_TREE_VEC_STMTS (node
))
82 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
94 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
95 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
105 slp_tree slp_node
, gimple stmt
,
106 VEC (gimple
, heap
) **def_stmts0
,
107 VEC (gimple
, heap
) **def_stmts1
,
108 enum vect_def_type
*first_stmt_dt0
,
109 enum vect_def_type
*first_stmt_dt1
,
110 tree
*first_stmt_def0_type
,
111 tree
*first_stmt_def1_type
,
112 tree
*first_stmt_const_oprnd
,
113 int ncopies_for_cost
,
114 bool *pattern0
, bool *pattern1
)
117 unsigned int i
, number_of_oprnds
;
120 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
121 stmt_vec_info stmt_info
=
122 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
123 enum gimple_rhs_class rhs_class
;
124 struct loop
*loop
= NULL
;
127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
130 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
132 for (i
= 0; i
< number_of_oprnds
; i
++)
134 oprnd
= gimple_op (stmt
, i
+ 1);
136 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
138 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
140 if (vect_print_dump_info (REPORT_SLP
))
142 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
153 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
154 && vinfo_for_stmt (def_stmt
)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
)))
157 if (!*first_stmt_dt0
)
161 if (i
== 1 && !*first_stmt_dt1
)
163 else if ((i
== 0 && !*pattern0
) || (i
== 1 && !*pattern1
))
165 if (vect_print_dump_info (REPORT_DETAILS
))
167 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
176 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
177 dt
[i
] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
179 if (*dt
== vect_unknown_def_type
)
181 if (vect_print_dump_info (REPORT_DETAILS
))
182 fprintf (vect_dump
, "Unsupported pattern.");
186 switch (gimple_code (def_stmt
))
189 def
= gimple_phi_result (def_stmt
);
193 def
= gimple_assign_lhs (def_stmt
);
197 if (vect_print_dump_info (REPORT_DETAILS
))
198 fprintf (vect_dump
, "unsupported defining stmt: ");
203 if (!*first_stmt_dt0
)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0
= dt
[i
];
208 *first_stmt_def0_type
= TREE_TYPE (def
);
210 *first_stmt_const_oprnd
= oprnd
;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
218 vect_model_store_cost (stmt_info
, ncopies_for_cost
, dt
[0], slp_node
);
223 if (!*first_stmt_dt1
&& i
== 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1
= dt
[i
];
228 *first_stmt_def1_type
= TREE_TYPE (def
);
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd
)
235 if (vect_print_dump_info (REPORT_SLP
))
236 fprintf (vect_dump
, "Build SLP failed: two constant "
240 *first_stmt_const_oprnd
= oprnd
;
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
248 && (*first_stmt_dt0
!= dt
[i
]
249 || (*first_stmt_def0_type
&& def
250 && !types_compatible_p (*first_stmt_def0_type
,
253 && (*first_stmt_dt1
!= dt
[i
]
254 || (*first_stmt_def1_type
&& def
255 && !types_compatible_p (*first_stmt_def1_type
,
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd
),
261 if (vect_print_dump_info (REPORT_SLP
))
262 fprintf (vect_dump
, "Build SLP failed: different types ");
269 /* Check the types of the definitions. */
272 case vect_constant_def
:
273 case vect_external_def
:
276 case vect_internal_def
:
277 case vect_reduction_def
:
279 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
281 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP
))
288 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
306 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
307 slp_tree
*node
, unsigned int group_size
,
308 int *inside_cost
, int *outside_cost
,
309 int ncopies_for_cost
, unsigned int *max_nunits
,
310 VEC (int, heap
) **load_permutation
,
311 VEC (slp_tree
, heap
) **loads
,
312 unsigned int vectorization_factor
)
314 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
315 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
317 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
318 gimple stmt
= VEC_index (gimple
, stmts
, 0);
319 enum vect_def_type first_stmt_dt0
= vect_uninitialized_def
;
320 enum vect_def_type first_stmt_dt1
= vect_uninitialized_def
;
321 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
322 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
324 bool stop_recursion
= false, need_same_oprnds
= false;
325 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
326 unsigned int ncopies
;
329 enum machine_mode optab_op2_mode
;
330 enum machine_mode vec_mode
;
331 tree first_stmt_const_oprnd
= NULL_TREE
;
332 struct data_reference
*first_dr
;
333 bool pattern0
= false, pattern1
= false;
335 bool permutation
= false;
336 unsigned int load_place
;
337 gimple first_load
, prev_first_load
= NULL
;
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i
= 0; VEC_iterate (gimple
, stmts
, i
, stmt
); i
++)
342 if (vect_print_dump_info (REPORT_SLP
))
344 fprintf (vect_dump
, "Build SLP for ");
345 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
351 if (vect_print_dump_info (REPORT_SLP
))
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
361 lhs
= gimple_get_lhs (stmt
);
362 if (lhs
== NULL_TREE
)
364 if (vect_print_dump_info (REPORT_SLP
))
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
374 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
375 vectype
= get_vectype_for_scalar_type (scalar_type
);
378 if (vect_print_dump_info (REPORT_SLP
))
380 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
386 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
389 if (vect_print_dump_info (REPORT_SLP
))
390 fprintf (vect_dump
, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
399 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
401 if (is_gimple_call (stmt
))
402 rhs_code
= CALL_EXPR
;
404 rhs_code
= gimple_assign_rhs_code (stmt
);
406 /* Check the operation. */
409 first_stmt_code
= rhs_code
;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
414 || rhs_code
== LROTATE_EXPR
415 || rhs_code
== RROTATE_EXPR
)
417 vec_mode
= TYPE_MODE (vectype
);
419 /* First see if we have a vector/vector shift. */
420 optab
= optab_for_tree_code (rhs_code
, vectype
,
424 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab
= optab_for_tree_code (rhs_code
, vectype
,
432 if (vect_print_dump_info (REPORT_SLP
))
433 fprintf (vect_dump
, "Build SLP failed: no optab.");
436 icode
= (int) optab_handler (optab
, vec_mode
);
437 if (icode
== CODE_FOR_nothing
)
439 if (vect_print_dump_info (REPORT_SLP
))
440 fprintf (vect_dump
, "Build SLP failed: "
441 "op not supported by target.");
444 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
445 if (!VECTOR_MODE_P (optab_op2_mode
))
447 need_same_oprnds
= true;
448 first_op1
= gimple_assign_rhs2 (stmt
);
455 if (first_stmt_code
!= rhs_code
456 && (first_stmt_code
!= IMAGPART_EXPR
457 || rhs_code
!= REALPART_EXPR
)
458 && (first_stmt_code
!= REALPART_EXPR
459 || rhs_code
!= IMAGPART_EXPR
))
461 if (vect_print_dump_info (REPORT_SLP
))
464 "Build SLP failed: different operation in stmt ");
465 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
472 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
474 if (vect_print_dump_info (REPORT_SLP
))
477 "Build SLP failed: different shift arguments in ");
478 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
485 /* Strided store or load. */
486 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
488 if (REFERENCE_CLASS_P (lhs
))
491 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
492 stmt
, &def_stmts0
, &def_stmts1
,
495 &first_stmt_def0_type
,
496 &first_stmt_def1_type
,
497 &first_stmt_const_oprnd
,
499 &pattern0
, &pattern1
))
505 /* FORNOW: Check that there is no gap between the loads. */
506 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
507 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
508 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
511 if (vect_print_dump_info (REPORT_SLP
))
513 fprintf (vect_dump
, "Build SLP failed: strided "
515 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
521 /* Check that the size of interleaved loads group is not
522 greater than the SLP group size. */
523 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
525 if (vect_print_dump_info (REPORT_SLP
))
527 fprintf (vect_dump
, "Build SLP failed: the number of "
528 "interleaved loads is greater than"
529 " the SLP group size ");
530 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
536 first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
539 /* Check that there are no loads from different interleaving
540 chains in the same node. The only exception is complex
542 if (prev_first_load
!= first_load
543 && rhs_code
!= REALPART_EXPR
544 && rhs_code
!= IMAGPART_EXPR
)
546 if (vect_print_dump_info (REPORT_SLP
))
548 fprintf (vect_dump
, "Build SLP failed: different "
549 "interleaving chains in one node ");
550 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
557 prev_first_load
= first_load
;
559 if (first_load
== stmt
)
561 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
562 if (vect_supportable_dr_alignment (first_dr
, false)
563 == dr_unaligned_unsupported
)
565 if (vect_print_dump_info (REPORT_SLP
))
567 fprintf (vect_dump
, "Build SLP failed: unsupported "
569 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
575 /* Analyze costs (for the first stmt in the group). */
576 vect_model_load_cost (vinfo_for_stmt (stmt
),
577 ncopies_for_cost
, *node
);
580 /* Store the place of this load in the interleaving chain. In
581 case that permutation is needed we later decide if a specific
582 permutation is supported. */
583 load_place
= vect_get_place_in_interleaving_chain (stmt
,
588 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
590 /* We stop the tree when we reach a group of loads. */
591 stop_recursion
= true;
594 } /* Strided access. */
597 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
599 /* Not strided load. */
600 if (vect_print_dump_info (REPORT_SLP
))
602 fprintf (vect_dump
, "Build SLP failed: not strided load ");
603 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
606 /* FORNOW: Not strided loads are not supported. */
610 /* Not memory operation. */
611 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
612 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
614 if (vect_print_dump_info (REPORT_SLP
))
616 fprintf (vect_dump
, "Build SLP failed: operation");
617 fprintf (vect_dump
, " unsupported ");
618 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
624 /* Find the def-stmts. */
625 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
626 &def_stmts0
, &def_stmts1
,
627 &first_stmt_dt0
, &first_stmt_dt1
,
628 &first_stmt_def0_type
,
629 &first_stmt_def1_type
,
630 &first_stmt_const_oprnd
,
632 &pattern0
, &pattern1
))
637 /* Add the costs of the node to the overall instance costs. */
638 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
639 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
641 /* Strided loads were reached - stop the recursion. */
646 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
648 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
653 /* We don't check here complex numbers chains, so we keep them in
654 LOADS for further check in vect_supported_load_permutation_p. */
655 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
656 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
662 /* Create SLP_TREE nodes for the definition node/s. */
663 if (first_stmt_dt0
== vect_internal_def
)
665 slp_tree left_node
= XNEW (struct _slp_tree
);
666 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
667 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
668 SLP_TREE_LEFT (left_node
) = NULL
;
669 SLP_TREE_RIGHT (left_node
) = NULL
;
670 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
671 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
672 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &left_node
, group_size
,
673 inside_cost
, outside_cost
, ncopies_for_cost
,
674 max_nunits
, load_permutation
, loads
,
675 vectorization_factor
))
678 SLP_TREE_LEFT (*node
) = left_node
;
681 if (first_stmt_dt1
== vect_internal_def
)
683 slp_tree right_node
= XNEW (struct _slp_tree
);
684 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
685 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
686 SLP_TREE_LEFT (right_node
) = NULL
;
687 SLP_TREE_RIGHT (right_node
) = NULL
;
688 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
689 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
690 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &right_node
, group_size
,
691 inside_cost
, outside_cost
, ncopies_for_cost
,
692 max_nunits
, load_permutation
, loads
,
693 vectorization_factor
))
696 SLP_TREE_RIGHT (*node
) = right_node
;
704 vect_print_slp_tree (slp_tree node
)
712 fprintf (vect_dump
, "node ");
713 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
715 fprintf (vect_dump
, "\n\tstmt %d ", i
);
716 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
718 fprintf (vect_dump
, "\n");
720 vect_print_slp_tree (SLP_TREE_LEFT (node
));
721 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
725 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
726 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
727 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
728 stmts in NODE are to be marked. */
731 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
739 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
741 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
743 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
744 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
748 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
751 vect_mark_slp_stmts_relevant (slp_tree node
)
755 stmt_vec_info stmt_info
;
760 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
762 stmt_info
= vinfo_for_stmt (stmt
);
763 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
764 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
765 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
768 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node
));
769 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node
));
773 /* Check if the permutation required by the SLP INSTANCE is supported.
774 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
777 vect_supported_slp_permutation_p (slp_instance instance
)
779 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
780 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
781 gimple first_load
= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
));
782 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
784 slp_tree
*tmp_loads
= NULL
;
785 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
788 /* FORNOW: The only supported loads permutation is loads from the same
789 location in all the loads in the node, when the data-refs in
790 nodes of LOADS constitute an interleaving chain.
791 Sort the nodes according to the order of accesses in the chain. */
792 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
794 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
795 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
796 i
+= group_size
, j
++)
798 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
799 /* Check that the loads are all in the same interleaving chain. */
800 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt
)) != first_load
)
802 if (vect_print_dump_info (REPORT_DETAILS
))
804 fprintf (vect_dump
, "Build SLP failed: unsupported data "
806 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
813 tmp_loads
[index
] = load
;
816 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
817 for (i
= 0; i
< group_size
; i
++)
818 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
820 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
821 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
824 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
825 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
833 /* Rearrange the statements of NODE according to PERMUTATION. */
836 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
837 VEC (int, heap
) *permutation
)
840 VEC (gimple
, heap
) *tmp_stmts
;
841 unsigned int index
, i
;
846 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node
), group_size
, permutation
);
847 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node
), group_size
, permutation
);
849 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
850 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
852 for (i
= 0; i
< group_size
; i
++)
853 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
855 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
857 index
= VEC_index (int, permutation
, i
);
858 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
861 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
862 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
866 /* Check if the required load permutation is supported.
867 LOAD_PERMUTATION contains a list of indices of the loads.
868 In SLP this permutation is relative to the order of strided stores that are
869 the base of the SLP instance. */
872 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
873 VEC (int, heap
) *load_permutation
)
875 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
876 bool supported
, bad_permutation
= false;
878 slp_tree node
, other_complex_node
;
879 gimple stmt
, first
= NULL
, other_node_first
;
880 unsigned complex_numbers
= 0;
882 /* FORNOW: permutations are only supported in SLP. */
886 if (vect_print_dump_info (REPORT_SLP
))
888 fprintf (vect_dump
, "Load permutation ");
889 for (i
= 0; VEC_iterate (int, load_permutation
, i
, next
); i
++)
890 fprintf (vect_dump
, "%d ", next
);
893 /* In case of reduction every load permutation is allowed, since the order
894 of the reduction statements is not important (as opposed to the case of
895 strided stores). The only condition we need to check is that all the
896 load nodes are of the same size and have the same permutation (and then
897 rearrange all the nodes of the SLP instance according to this
900 /* Check that all the load nodes are of the same size. */
902 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
);
905 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
906 != (unsigned) group_size
)
909 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
910 if (is_gimple_assign (stmt
)
911 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
912 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
916 /* Complex operands can be swapped as following:
917 real_c = real_b + real_a;
918 imag_c = imag_a + imag_b;
919 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
920 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
921 chains are mixed, they match the above pattern. */
925 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
);
929 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
);
936 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) != first
)
938 if (complex_numbers
!= 2)
946 other_complex_node
= VEC_index (slp_tree
,
947 SLP_INSTANCE_LOADS (slp_instn
), k
);
948 other_node_first
= VEC_index (gimple
,
949 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
951 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
))
960 /* We checked that this case ok, so there is no need to proceed with
961 permutation tests. */
962 if (complex_numbers
== 2)
964 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
965 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
969 node
= SLP_INSTANCE_TREE (slp_instn
);
970 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
971 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
972 instance, not all the loads belong to the same node or interleaving
973 group. Hence, we need to divide them into groups according to
975 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
977 /* Reduction (there are no data-refs in the root). */
978 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
980 int first_group_load_index
;
982 /* Compare all the permutation sequences to the first one. */
983 for (i
= 1; i
< number_of_groups
; i
++)
986 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
988 next
= VEC_index (int, load_permutation
, j
);
989 first_group_load_index
= VEC_index (int, load_permutation
, k
);
991 if (next
!= first_group_load_index
)
993 bad_permutation
= true;
1000 if (bad_permutation
)
1004 if (!bad_permutation
)
1006 /* This permutaion is valid for reduction. Since the order of the
1007 statements in the nodes is not important unless they are memory
1008 accesses, we can rearrange the statements in all the nodes
1009 according to the order of the loads. */
1010 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1012 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1017 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1018 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1019 well (unless it's reduction). */
1020 if (VEC_length (int, load_permutation
)
1021 != (unsigned int) (group_size
* group_size
))
1025 load_index
= sbitmap_alloc (group_size
);
1026 sbitmap_zero (load_index
);
1027 for (j
= 0; j
< group_size
; j
++)
1029 for (i
= j
* group_size
, k
= 0;
1030 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1033 if (i
!= j
* group_size
&& next
!= prev
)
1042 if (TEST_BIT (load_index
, prev
))
1048 SET_BIT (load_index
, prev
);
1051 for (j
= 0; j
< group_size
; j
++)
1052 if (!TEST_BIT (load_index
, j
))
1055 sbitmap_free (load_index
);
1057 if (supported
&& i
== group_size
* group_size
1058 && vect_supported_slp_permutation_p (slp_instn
))
1065 /* Find the first load in the loop that belongs to INSTANCE.
1066 When loads are in several SLP nodes, there can be a case in which the first
1067 load does not appear in the first SLP node to be transformed, causing
1068 incorrect order of statements. Since we generate all the loads together,
1069 they must be inserted before the first load of the SLP instance and not
1070 before the first load of the first node of the instance. */
1072 vect_find_first_load_in_slp_instance (slp_instance instance
)
1076 gimple first_load
= NULL
, load
;
1079 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
);
1082 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
);
1084 first_load
= get_earlier_stmt (load
, first_load
);
1090 /* Analyze an SLP instance starting from a group of strided stores. Call
1091 vect_build_slp_tree to build a tree of packed stmts if possible.
1092 Return FALSE if it's impossible to SLP any stmt in the loop. */
1095 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1098 slp_instance new_instance
;
1099 slp_tree node
= XNEW (struct _slp_tree
);
1100 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1101 unsigned int unrolling_factor
= 1, nunits
;
1102 tree vectype
, scalar_type
= NULL_TREE
;
1104 unsigned int vectorization_factor
= 0;
1105 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1106 unsigned int max_nunits
= 0;
1107 VEC (int, heap
) *load_permutation
;
1108 VEC (slp_tree
, heap
) *loads
;
1109 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1113 scalar_type
= TREE_TYPE (DR_REF (dr
));
1114 vectype
= get_vectype_for_scalar_type (scalar_type
);
1115 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (stmt
));
1119 gcc_assert (loop_vinfo
);
1120 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1121 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1126 if (vect_print_dump_info (REPORT_SLP
))
1128 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1129 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1135 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1137 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1139 /* No multitypes in BB SLP. */
1140 vectorization_factor
= nunits
;
1142 /* Calculate the unrolling factor. */
1143 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1144 if (unrolling_factor
!= 1 && !loop_vinfo
)
1146 if (vect_print_dump_info (REPORT_SLP
))
1147 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1153 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1154 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
1158 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1161 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1162 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
1167 /* Collect reduction statements. */
1168 for (i
= 0; VEC_iterate (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
,
1172 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1173 if (vect_print_dump_info (REPORT_DETAILS
))
1175 fprintf (vect_dump
, "pushing reduction into node: ");
1176 print_gimple_stmt (vect_dump
, next
, 0, TDF_SLIM
);
1181 SLP_TREE_VEC_STMTS (node
) = NULL
;
1182 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
1183 SLP_TREE_LEFT (node
) = NULL
;
1184 SLP_TREE_RIGHT (node
) = NULL
;
1185 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
1186 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
1188 /* Calculate the number of vector stmts to create based on the unrolling
1189 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1190 GROUP_SIZE / NUNITS otherwise. */
1191 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1193 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1194 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1196 /* Build the tree for the SLP instance. */
1197 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1198 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1199 &max_nunits
, &load_permutation
, &loads
,
1200 vectorization_factor
))
1202 /* Create a new SLP instance. */
1203 new_instance
= XNEW (struct _slp_instance
);
1204 SLP_INSTANCE_TREE (new_instance
) = node
;
1205 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1206 /* Calculate the unrolling factor based on the smallest type in the
1208 if (max_nunits
> nunits
)
1209 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1212 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1213 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1214 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1215 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1216 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1217 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1218 if (VEC_length (slp_tree
, loads
))
1220 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1223 if (vect_print_dump_info (REPORT_SLP
))
1225 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1227 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1230 vect_free_slp_instance (new_instance
);
1234 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1235 = vect_find_first_load_in_slp_instance (new_instance
);
1238 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1241 VEC_safe_push (slp_instance
, heap
,
1242 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1245 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1248 if (vect_print_dump_info (REPORT_SLP
))
1249 vect_print_slp_tree (node
);
1254 /* Failed to SLP. */
1255 /* Free the allocated memory. */
1256 vect_free_slp_tree (node
);
1257 VEC_free (int, heap
, load_permutation
);
1258 VEC_free (slp_tree
, heap
, loads
);
1264 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1265 trees of packed scalar stmts if SLP is possible. */
1268 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1271 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
;
1275 if (vect_print_dump_info (REPORT_SLP
))
1276 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1280 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1281 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1284 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1286 /* Find SLP sequences starting from groups of strided stores. */
1287 for (i
= 0; VEC_iterate (gimple
, strided_stores
, i
, store
); i
++)
1288 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, store
))
1291 if (bb_vinfo
&& !ok
)
1293 if (vect_print_dump_info (REPORT_SLP
))
1294 fprintf (vect_dump
, "Failed to SLP the basic block.");
1299 /* Find SLP sequences starting from groups of reductions. */
1300 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1301 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1302 VEC_index (gimple
, reductions
, 0)))
1309 /* For each possible SLP instance decide whether to SLP it and calculate overall
1310 unrolling factor needed to SLP the loop. */
1313 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1315 unsigned int i
, unrolling_factor
= 1;
1316 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1317 slp_instance instance
;
1318 int decided_to_slp
= 0;
1320 if (vect_print_dump_info (REPORT_SLP
))
1321 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1323 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1325 /* FORNOW: SLP if you can. */
1326 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1327 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1329 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1330 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1331 loop-based vectorization. Such stmts will be marked as HYBRID. */
1332 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1336 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1338 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1339 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1340 decided_to_slp
, unrolling_factor
);
1344 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1345 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1348 vect_detect_hybrid_slp_stmts (slp_tree node
)
1352 imm_use_iterator imm_iter
;
1354 stmt_vec_info stmt_vinfo
;
1359 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
1360 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1361 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1362 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1363 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1364 && !STMT_SLP_TYPE (stmt_vinfo
)
1365 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1366 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1367 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1368 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1369 == vect_reduction_def
))
1370 vect_mark_slp_stmts (node
, hybrid
, i
);
1372 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
1373 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
1377 /* Find stmts that must be both vectorized and SLPed. */
1380 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1383 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1384 slp_instance instance
;
1386 if (vect_print_dump_info (REPORT_SLP
))
1387 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1389 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1390 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1394 /* Create and initialize a new bb_vec_info struct for BB, as well as
1395 stmt_vec_info structs for all the stmts in it. */
1398 new_bb_vec_info (basic_block bb
)
1400 bb_vec_info res
= NULL
;
1401 gimple_stmt_iterator gsi
;
1403 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1404 BB_VINFO_BB (res
) = bb
;
1406 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1408 gimple stmt
= gsi_stmt (gsi
);
1409 gimple_set_uid (stmt
, 0);
1410 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1413 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1414 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1421 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1422 stmts in the basic block. */
1425 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1428 gimple_stmt_iterator si
;
1433 bb
= BB_VINFO_BB (bb_vinfo
);
1435 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1437 gimple stmt
= gsi_stmt (si
);
1438 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1441 /* Free stmt_vec_info. */
1442 free_stmt_vec_info (stmt
);
1445 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1446 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1452 /* Analyze statements contained in SLP tree node after recursively analyzing
1453 the subtree. Return TRUE if the operations are supported. */
1456 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1465 if (!vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_LEFT (node
))
1466 || !vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_RIGHT (node
)))
1469 for (i
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
); i
++)
1471 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1472 gcc_assert (stmt_info
);
1473 gcc_assert (PURE_SLP_STMT (stmt_info
));
1475 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1483 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1484 operations are supported. */
1487 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1489 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1490 slp_instance instance
;
1493 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1495 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1496 SLP_INSTANCE_TREE (instance
)))
1498 vect_free_slp_instance (instance
);
1499 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1505 if (!VEC_length (slp_instance
, slp_instances
))
1512 /* Cheick if the basic block can be vectorized. */
1515 vect_slp_analyze_bb (basic_block bb
)
1517 bb_vec_info bb_vinfo
;
1518 VEC (ddr_p
, heap
) *ddrs
;
1519 VEC (slp_instance
, heap
) *slp_instances
;
1520 slp_instance instance
;
1522 gimple_stmt_iterator gsi
;
1524 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1526 if (vect_print_dump_info (REPORT_DETAILS
))
1527 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
1529 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1531 gimple stmt
= gsi_stmt (gsi
);
1532 if (!is_gimple_debug (stmt
)
1533 && !gimple_nop_p (stmt
)
1534 && gimple_code (stmt
) != GIMPLE_LABEL
)
1538 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
1540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1541 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
1547 bb_vinfo
= new_bb_vec_info (bb
);
1551 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1554 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1557 destroy_bb_vec_info (bb_vinfo
);
1561 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1562 if (!VEC_length (ddr_p
, ddrs
))
1564 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1565 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1568 destroy_bb_vec_info (bb_vinfo
);
1572 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
1575 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1576 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1577 "in basic block.\n");
1579 destroy_bb_vec_info (bb_vinfo
);
1583 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1585 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1586 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1589 destroy_bb_vec_info (bb_vinfo
);
1593 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1595 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1596 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1599 destroy_bb_vec_info (bb_vinfo
);
1603 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1606 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1609 destroy_bb_vec_info (bb_vinfo
);
1613 /* Check the SLP opportunities in the basic block, analyze and build SLP
1615 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1617 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1618 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
1619 "in basic block.\n");
1621 destroy_bb_vec_info (bb_vinfo
);
1625 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1627 /* Mark all the statements that we want to vectorize as pure SLP and
1629 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1631 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1632 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
1635 if (!vect_slp_analyze_operations (bb_vinfo
))
1637 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1638 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
1640 destroy_bb_vec_info (bb_vinfo
);
1644 if (vect_print_dump_info (REPORT_DETAILS
))
1645 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
1651 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1652 the number of created vector stmts depends on the unrolling factor). However,
1653 the actual number of vector stmts for every SLP node depends on VF which is
1654 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1655 In this function we assume that the inside costs calculated in
1656 vect_model_xxx_cost are linear in ncopies. */
1659 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
1661 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1662 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1663 slp_instance instance
;
1665 if (vect_print_dump_info (REPORT_SLP
))
1666 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
1668 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
1669 /* We assume that costs are linear in ncopies. */
1670 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
1671 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1675 /* For constant and loop invariant defs of SLP_NODE this function returns
1676 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1677 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1678 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1679 REDUC_INDEX is the index of the reduction operand in the statements, unless
1683 vect_get_constant_vectors (slp_tree slp_node
, VEC(tree
,heap
) **vec_oprnds
,
1684 unsigned int op_num
, unsigned int number_of_vectors
,
1687 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1688 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1689 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1693 int j
, number_of_places_left_in_vector
;
1696 int group_size
= VEC_length (gimple
, stmts
);
1697 unsigned int vec_num
, i
;
1698 int number_of_copies
= 1;
1699 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1700 bool constant_p
, is_store
;
1701 tree neutral_op
= NULL
;
1703 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1705 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1706 if (reduc_index
== -1)
1708 VEC_free (tree
, heap
, *vec_oprnds
);
1712 op_num
= reduc_index
- 1;
1713 op
= gimple_op (stmt
, op_num
+ 1);
1714 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1715 we need either neutral operands or the original operands. See
1716 get_initial_def_for_reduction() for details. */
1719 case WIDEN_SUM_EXPR
:
1725 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1726 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
1728 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
1733 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1734 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
1736 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
1741 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
1749 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1752 op
= gimple_assign_rhs1 (stmt
);
1757 op
= gimple_op (stmt
, op_num
+ 1);
1760 if (CONSTANT_CLASS_P (op
))
1765 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1766 gcc_assert (vector_type
);
1768 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1770 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1771 created vectors. It is greater than 1 if unrolling is performed.
1773 For example, we have two scalar operands, s1 and s2 (e.g., group of
1774 strided accesses of size two), while NUNITS is four (i.e., four scalars
1775 of this type can be packed in a vector). The output vector will contain
1776 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1779 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1780 containing the operands.
1782 For example, NUNITS is four as before, and the group size is 8
1783 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1784 {s5, s6, s7, s8}. */
1786 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1788 number_of_places_left_in_vector
= nunits
;
1789 for (j
= 0; j
< number_of_copies
; j
++)
1791 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1794 op
= gimple_assign_rhs1 (stmt
);
1796 op
= gimple_op (stmt
, op_num
+ 1);
1798 if (reduc_index
!= -1)
1800 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1801 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
1804 /* Get the def before the loop. */
1805 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
1806 loop_preheader_edge (loop
));
1807 if (j
!= (number_of_copies
- 1) && neutral_op
)
1811 /* Create 'vect_ = {op0,op1,...,opn}'. */
1812 t
= tree_cons (NULL_TREE
, op
, t
);
1814 number_of_places_left_in_vector
--;
1816 if (number_of_places_left_in_vector
== 0)
1818 number_of_places_left_in_vector
= nunits
;
1821 vec_cst
= build_vector (vector_type
, t
);
1823 vec_cst
= build_constructor_from_list (vector_type
, t
);
1824 VEC_quick_push (tree
, voprnds
,
1825 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
1831 /* Since the vectors are created in the reverse order, we should invert
1833 vec_num
= VEC_length (tree
, voprnds
);
1834 for (j
= vec_num
- 1; j
>= 0; j
--)
1836 vop
= VEC_index (tree
, voprnds
, j
);
1837 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1840 VEC_free (tree
, heap
, voprnds
);
1842 /* In case that VF is greater than the unrolling factor needed for the SLP
1843 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1844 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1845 to replicate the vectors. */
1846 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
1848 tree neutral_vec
= NULL
;
1855 for (i
= 0; i
< (unsigned) nunits
; i
++)
1856 t
= tree_cons (NULL_TREE
, neutral_op
, t
);
1857 neutral_vec
= build_vector (vector_type
, t
);
1860 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
1864 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
1865 VEC_quick_push (tree
, *vec_oprnds
, vop
);
1871 /* Get vectorized definitions from SLP_NODE that contains corresponding
1872 vectorized def-stmts. */
1875 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
1878 gimple vec_def_stmt
;
1881 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
1884 VEC_iterate (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
);
1887 gcc_assert (vec_def_stmt
);
1888 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
1889 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
1894 /* Get vectorized definitions for SLP_NODE.
1895 If the scalar definitions are loop invariants or constants, collect them and
1896 call vect_get_constant_vectors() to create vector stmts.
1897 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1898 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1899 vect_get_slp_vect_defs() to retrieve them.
1900 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1901 the right node. This is used when the second operand must remain scalar. */
1904 vect_get_slp_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds0
,
1905 VEC (tree
,heap
) **vec_oprnds1
, int reduc_index
)
1908 enum tree_code code
;
1909 int number_of_vects
;
1910 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
1912 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
1913 /* The number of vector defs is determined by the number of vector statements
1914 in the node from which we get those statements. */
1915 if (SLP_TREE_LEFT (slp_node
))
1916 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
1919 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1920 /* Number of vector stmts was calculated according to LHS in
1921 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1922 necessary. See vect_get_smallest_scalar_type() for details. */
1923 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
1925 if (rhs_size_unit
!= lhs_size_unit
)
1927 number_of_vects
*= rhs_size_unit
;
1928 number_of_vects
/= lhs_size_unit
;
1932 /* Allocate memory for vectorized defs. */
1933 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
1935 /* SLP_NODE corresponds either to a group of stores or to a group of
1936 unary/binary operations. We don't call this function for loads.
1937 For reduction defs we call vect_get_constant_vectors(), since we are
1938 looking for initial loop invariant values. */
1939 if (SLP_TREE_LEFT (slp_node
) && reduc_index
== -1)
1940 /* The defs are already vectorized. */
1941 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
1943 /* Build vectors from scalar defs. */
1944 vect_get_constant_vectors (slp_node
, vec_oprnds0
, 0, number_of_vects
,
1947 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
1948 /* Since we don't call this function with loads, this is a group of
1952 /* For reductions, we only need initial values. */
1953 if (reduc_index
!= -1)
1956 code
= gimple_assign_rhs_code (first_stmt
);
1957 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
1960 /* The number of vector defs is determined by the number of vector statements
1961 in the node from which we get those statements. */
1962 if (SLP_TREE_RIGHT (slp_node
))
1963 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
1965 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1967 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
1969 if (SLP_TREE_RIGHT (slp_node
))
1970 /* The defs are already vectorized. */
1971 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
1973 /* Build vectors from scalar defs. */
1974 vect_get_constant_vectors (slp_node
, vec_oprnds1
, 1, number_of_vects
, -1);
1978 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1979 building a vector of type MASK_TYPE from it) and two input vectors placed in
1980 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1981 shifting by STRIDE elements of DR_CHAIN for every copy.
1982 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1984 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1985 the created stmts must be inserted. */
1988 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
1989 tree mask
, int first_vec_indx
, int second_vec_indx
,
1990 gimple_stmt_iterator
*gsi
, slp_tree node
,
1991 tree builtin_decl
, tree vectype
,
1992 VEC(tree
,heap
) *dr_chain
,
1993 int ncopies
, int vect_stmts_counter
)
1996 gimple perm_stmt
= NULL
;
1997 stmt_vec_info next_stmt_info
;
1999 tree first_vec
, second_vec
, data_ref
;
2001 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2003 /* Initialize the vect stmts of NODE to properly insert the generated
2005 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2006 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2007 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2009 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2010 for (i
= 0; i
< ncopies
; i
++)
2012 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2013 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2015 /* Generate the permute statement. */
2016 perm_stmt
= gimple_build_call (builtin_decl
,
2017 3, first_vec
, second_vec
, mask
);
2018 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2019 gimple_call_set_lhs (perm_stmt
, data_ref
);
2020 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2022 /* Store the vector statement in NODE. */
2023 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2024 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2026 first_vec_indx
+= stride
;
2027 second_vec_indx
+= stride
;
2030 /* Mark the scalar stmt as vectorized. */
2031 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2032 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2036 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2037 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2038 representation. Check that the mask is valid and return FALSE if not.
2039 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2040 the next vector, i.e., the current first vector is not needed. */
2043 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2044 int mask_nunits
, bool only_one_vec
, int index
,
2045 int *mask
, int *current_mask_element
,
2046 bool *need_next_vector
)
2049 static int number_of_mask_fixes
= 1;
2050 static bool mask_fixed
= false;
2051 static bool needs_first_vector
= false;
2053 /* Convert to target specific representation. */
2054 *current_mask_element
= first_mask_element
+ m
;
2055 /* Adjust the value in case it's a mask for second and third vectors. */
2056 *current_mask_element
-= mask_nunits
* (number_of_mask_fixes
- 1);
2058 if (*current_mask_element
< mask_nunits
)
2059 needs_first_vector
= true;
2061 /* We have only one input vector to permute but the mask accesses values in
2062 the next vector as well. */
2063 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2065 if (vect_print_dump_info (REPORT_DETAILS
))
2067 fprintf (vect_dump
, "permutation requires at least two vectors ");
2068 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2074 /* The mask requires the next vector. */
2075 if (*current_mask_element
>= mask_nunits
* 2)
2077 if (needs_first_vector
|| mask_fixed
)
2079 /* We either need the first vector too or have already moved to the
2080 next vector. In both cases, this permutation needs three
2082 if (vect_print_dump_info (REPORT_DETAILS
))
2084 fprintf (vect_dump
, "permutation requires at "
2085 "least three vectors ");
2086 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2092 /* We move to the next vector, dropping the first one and working with
2093 the second and the third - we need to adjust the values of the mask
2095 *current_mask_element
-= mask_nunits
* number_of_mask_fixes
;
2097 for (i
= 0; i
< index
; i
++)
2098 mask
[i
] -= mask_nunits
* number_of_mask_fixes
;
2100 (number_of_mask_fixes
)++;
2104 *need_next_vector
= mask_fixed
;
2106 /* This was the last element of this mask. Start a new one. */
2107 if (index
== mask_nunits
- 1)
2109 number_of_mask_fixes
= 1;
2111 needs_first_vector
= false;
2118 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2119 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2120 permute statements for SLP_NODE_INSTANCE. */
2122 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2123 gimple_stmt_iterator
*gsi
, int vf
,
2124 slp_instance slp_node_instance
, bool analyze_only
)
2126 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2127 tree mask_element_type
= NULL_TREE
, mask_type
;
2128 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
2130 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
2131 gimple next_scalar_stmt
;
2132 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2133 int first_mask_element
;
2134 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
2135 bool only_one_vec
= false, need_next_vector
= false;
2136 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2138 if (!targetm
.vectorize
.builtin_vec_perm
)
2140 if (vect_print_dump_info (REPORT_DETAILS
))
2142 fprintf (vect_dump
, "no builtin for vect permute for ");
2143 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2149 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
2150 &mask_element_type
);
2151 if (!builtin_decl
|| !mask_element_type
)
2153 if (vect_print_dump_info (REPORT_DETAILS
))
2155 fprintf (vect_dump
, "no builtin for vect permute for ");
2156 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2162 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2163 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
2164 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
2165 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2166 scale
= mask_nunits
/ nunits
;
2167 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2169 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2170 unrolling factor. */
2171 orig_vec_stmts_num
= group_size
*
2172 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2173 if (orig_vec_stmts_num
== 1)
2174 only_one_vec
= true;
2176 /* Number of copies is determined by the final vectorization factor
2177 relatively to SLP_NODE_INSTANCE unrolling factor. */
2178 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2180 /* Generate permutation masks for every NODE. Number of masks for each NODE
2181 is equal to GROUP_SIZE.
2182 E.g., we have a group of three nodes with three loads from the same
2183 location in each node, and the vector size is 4. I.e., we have a
2184 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2185 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2186 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2189 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2190 scpecific type, e.g., in bytes for Altivec.
2191 The last mask is illegal since we assume two operands for permute
2192 operation, and the mask element values can't be outside that range. Hence,
2193 the last mask must be converted into {2,5,5,5}.
2194 For the first two permutations we need the first and the second input
2195 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2196 we need the second and the third vectors: {b1,c1,a2,b2} and
2200 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
),
2206 vect_stmts_counter
= 0;
2208 first_vec_index
= vec_index
++;
2210 second_vec_index
= first_vec_index
;
2212 second_vec_index
= vec_index
++;
2214 for (j
= 0; j
< unroll_factor
; j
++)
2216 for (k
= 0; k
< group_size
; k
++)
2218 first_mask_element
= (i
+ j
* group_size
) * scale
;
2219 for (m
= 0; m
< scale
; m
++)
2221 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
2222 mask_nunits
, only_one_vec
, index
, mask
,
2223 ¤t_mask_element
, &need_next_vector
))
2226 mask
[index
++] = current_mask_element
;
2229 if (index
== mask_nunits
)
2231 tree mask_vec
= NULL
;
2233 while (--index
>= 0)
2235 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2236 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2238 mask_vec
= build_vector (mask_type
, mask_vec
);
2241 if (!targetm
.vectorize
.builtin_vec_perm_ok (vectype
,
2244 if (vect_print_dump_info (REPORT_DETAILS
))
2246 fprintf (vect_dump
, "unsupported vect permute ");
2247 print_generic_expr (vect_dump
, mask_vec
, 0);
2255 if (need_next_vector
)
2257 first_vec_index
= second_vec_index
;
2258 second_vec_index
= vec_index
;
2261 next_scalar_stmt
= VEC_index (gimple
,
2262 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2264 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2265 mask_vec
, first_vec_index
, second_vec_index
,
2266 gsi
, node
, builtin_decl
, vectype
, dr_chain
,
2267 ncopies
, vect_stmts_counter
++);
2280 /* Vectorize SLP instance tree in postorder. */
2283 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2284 unsigned int vectorization_factor
)
2287 bool strided_store
, is_store
;
2288 gimple_stmt_iterator si
;
2289 stmt_vec_info stmt_info
;
2290 unsigned int vec_stmts_size
, nunits
, group_size
;
2293 slp_tree loads_node
;
2298 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
2299 vectorization_factor
);
2300 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
2301 vectorization_factor
);
2303 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2304 stmt_info
= vinfo_for_stmt (stmt
);
2306 /* VECTYPE is the type of the destination. */
2307 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2308 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2309 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2311 /* For each SLP instance calculate number of vector stmts to be created
2312 for the scalar stmts in each node of the SLP tree. Number of vector
2313 elements in one vector iteration is the number of scalar elements in
2314 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2316 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2318 /* In case of load permutation we have to allocate vectorized statements for
2319 all the nodes that participate in that permutation. */
2320 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2323 VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
);
2326 if (!SLP_TREE_VEC_STMTS (loads_node
))
2328 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2330 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2335 if (!SLP_TREE_VEC_STMTS (node
))
2337 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2338 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2341 if (vect_print_dump_info (REPORT_DETAILS
))
2343 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2344 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2347 /* Loads should be inserted before the first load. */
2348 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2349 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2350 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2351 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2353 si
= gsi_for_stmt (stmt
);
2355 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2361 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2363 VEC (slp_instance
, heap
) *slp_instances
;
2364 slp_instance instance
;
2366 bool is_store
= false;
2370 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2371 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2375 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2379 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
2381 /* Schedule the tree of INSTANCE. */
2382 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2384 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2385 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2386 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2389 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); i
++)
2391 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2394 gimple_stmt_iterator gsi
;
2396 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2397 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2399 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2402 /* Free the attached stmt_vec_info and remove the stmt. */
2403 gsi
= gsi_for_stmt (store
);
2404 gsi_remove (&gsi
, true);
2405 free_stmt_vec_info (store
);
2413 /* Vectorize the basic block. */
2416 vect_slp_transform_bb (basic_block bb
)
2418 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2419 gimple_stmt_iterator si
;
2421 gcc_assert (bb_vinfo
);
2423 if (vect_print_dump_info (REPORT_DETAILS
))
2424 fprintf (vect_dump
, "SLPing BB\n");
2426 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2428 gimple stmt
= gsi_stmt (si
);
2429 stmt_vec_info stmt_info
;
2431 if (vect_print_dump_info (REPORT_DETAILS
))
2433 fprintf (vect_dump
, "------>SLPing statement: ");
2434 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2437 stmt_info
= vinfo_for_stmt (stmt
);
2438 gcc_assert (stmt_info
);
2440 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2441 if (STMT_SLP_TYPE (stmt_info
))
2443 vect_schedule_slp (NULL
, bb_vinfo
);
2448 mark_sym_for_renaming (gimple_vop (cfun
));
2449 /* The memory tags and pointers in vectorized statements need to
2450 have their SSA forms updated. FIXME, why can't this be delayed
2451 until all the loops have been transformed? */
2452 update_ssa (TODO_update_ssa
);
2454 if (vect_print_dump_info (REPORT_DETAILS
))
2455 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2457 destroy_bb_vec_info (bb_vinfo
);