1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb
)
49 gimple_stmt_iterator si
;
54 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
57 if (gimple_location (stmt
) != UNKNOWN_LOC
)
58 return gimple_location (stmt
);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node
)
73 if (SLP_TREE_LEFT (node
))
74 vect_free_slp_tree (SLP_TREE_LEFT (node
));
76 if (SLP_TREE_RIGHT (node
))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
79 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
81 if (SLP_TREE_VEC_STMTS (node
))
82 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
94 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
95 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
105 slp_tree slp_node
, gimple stmt
,
106 VEC (gimple
, heap
) **def_stmts0
,
107 VEC (gimple
, heap
) **def_stmts1
,
108 enum vect_def_type
*first_stmt_dt0
,
109 enum vect_def_type
*first_stmt_dt1
,
110 tree
*first_stmt_def0_type
,
111 tree
*first_stmt_def1_type
,
112 tree
*first_stmt_const_oprnd
,
113 int ncopies_for_cost
,
114 bool *pattern0
, bool *pattern1
)
117 unsigned int i
, number_of_oprnds
;
120 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
121 stmt_vec_info stmt_info
=
122 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
123 enum gimple_rhs_class rhs_class
;
124 struct loop
*loop
= NULL
;
127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
130 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
132 for (i
= 0; i
< number_of_oprnds
; i
++)
134 oprnd
= gimple_op (stmt
, i
+ 1);
136 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
138 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
140 if (vect_print_dump_info (REPORT_SLP
))
142 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
153 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
154 && vinfo_for_stmt (def_stmt
)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
159 if (!*first_stmt_dt0
)
163 if (i
== 1 && !*first_stmt_dt1
)
165 else if ((i
== 0 && !*pattern0
) || (i
== 1 && !*pattern1
))
167 if (vect_print_dump_info (REPORT_DETAILS
))
169 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
178 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
179 dt
[i
] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
181 if (*dt
== vect_unknown_def_type
)
183 if (vect_print_dump_info (REPORT_DETAILS
))
184 fprintf (vect_dump
, "Unsupported pattern.");
188 switch (gimple_code (def_stmt
))
191 def
= gimple_phi_result (def_stmt
);
195 def
= gimple_assign_lhs (def_stmt
);
199 if (vect_print_dump_info (REPORT_DETAILS
))
200 fprintf (vect_dump
, "unsupported defining stmt: ");
205 if (!*first_stmt_dt0
)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0
= dt
[i
];
210 *first_stmt_def0_type
= TREE_TYPE (def
);
212 *first_stmt_const_oprnd
= oprnd
;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
220 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
226 if (!*first_stmt_dt1
&& i
== 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1
= dt
[i
];
231 *first_stmt_def1_type
= TREE_TYPE (def
);
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd
)
238 if (vect_print_dump_info (REPORT_SLP
))
239 fprintf (vect_dump
, "Build SLP failed: two constant "
243 *first_stmt_const_oprnd
= oprnd
;
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
254 && ((*first_stmt_dt0
!= dt
[i
]
255 && !(*first_stmt_dt0
== vect_reduction_def
256 && dt
[i
] == vect_internal_def
))
257 || (*first_stmt_def0_type
&& def
258 && !types_compatible_p (*first_stmt_def0_type
,
261 && ((*first_stmt_dt1
!= dt
[i
]
262 && !(*first_stmt_dt1
== vect_reduction_def
263 && dt
[i
] == vect_internal_def
))
264 || (*first_stmt_def1_type
&& def
265 && !types_compatible_p (*first_stmt_def1_type
,
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd
),
271 if (vect_print_dump_info (REPORT_SLP
))
272 fprintf (vect_dump
, "Build SLP failed: different types ");
279 /* Check the types of the definitions. */
282 case vect_constant_def
:
283 case vect_external_def
:
286 case vect_internal_def
:
287 case vect_reduction_def
:
289 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
291 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP
))
298 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
316 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
317 slp_tree
*node
, unsigned int group_size
,
318 int *inside_cost
, int *outside_cost
,
319 int ncopies_for_cost
, unsigned int *max_nunits
,
320 VEC (int, heap
) **load_permutation
,
321 VEC (slp_tree
, heap
) **loads
,
322 unsigned int vectorization_factor
)
324 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
325 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
327 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
328 gimple stmt
= VEC_index (gimple
, stmts
, 0);
329 enum vect_def_type first_stmt_dt0
= vect_uninitialized_def
;
330 enum vect_def_type first_stmt_dt1
= vect_uninitialized_def
;
331 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
332 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
334 bool stop_recursion
= false, need_same_oprnds
= false;
335 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
336 unsigned int ncopies
;
339 enum machine_mode optab_op2_mode
;
340 enum machine_mode vec_mode
;
341 tree first_stmt_const_oprnd
= NULL_TREE
;
342 struct data_reference
*first_dr
;
343 bool pattern0
= false, pattern1
= false;
345 bool permutation
= false;
346 unsigned int load_place
;
347 gimple first_load
, prev_first_load
= NULL
;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
352 if (vect_print_dump_info (REPORT_SLP
))
354 fprintf (vect_dump
, "Build SLP for ");
355 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
361 if (vect_print_dump_info (REPORT_SLP
))
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
371 lhs
= gimple_get_lhs (stmt
);
372 if (lhs
== NULL_TREE
)
374 if (vect_print_dump_info (REPORT_SLP
))
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
384 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
385 vectype
= get_vectype_for_scalar_type (scalar_type
);
388 if (vect_print_dump_info (REPORT_SLP
))
390 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
396 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
399 if (vect_print_dump_info (REPORT_SLP
))
400 fprintf (vect_dump
, "SLP with multiple types ");
402 /* FORNOW: multiple types are unsupported in BB SLP. */
407 /* In case of multiple types we need to detect the smallest type. */
408 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
409 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
411 if (is_gimple_call (stmt
))
412 rhs_code
= CALL_EXPR
;
414 rhs_code
= gimple_assign_rhs_code (stmt
);
416 /* Check the operation. */
419 first_stmt_code
= rhs_code
;
421 /* Shift arguments should be equal in all the packed stmts for a
422 vector shift with scalar shift operand. */
423 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
424 || rhs_code
== LROTATE_EXPR
425 || rhs_code
== RROTATE_EXPR
)
427 vec_mode
= TYPE_MODE (vectype
);
429 /* First see if we have a vector/vector shift. */
430 optab
= optab_for_tree_code (rhs_code
, vectype
,
434 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
436 /* No vector/vector shift, try for a vector/scalar shift. */
437 optab
= optab_for_tree_code (rhs_code
, vectype
,
442 if (vect_print_dump_info (REPORT_SLP
))
443 fprintf (vect_dump
, "Build SLP failed: no optab.");
446 icode
= (int) optab_handler (optab
, vec_mode
);
447 if (icode
== CODE_FOR_nothing
)
449 if (vect_print_dump_info (REPORT_SLP
))
450 fprintf (vect_dump
, "Build SLP failed: "
451 "op not supported by target.");
454 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
455 if (!VECTOR_MODE_P (optab_op2_mode
))
457 need_same_oprnds
= true;
458 first_op1
= gimple_assign_rhs2 (stmt
);
465 if (first_stmt_code
!= rhs_code
466 && (first_stmt_code
!= IMAGPART_EXPR
467 || rhs_code
!= REALPART_EXPR
)
468 && (first_stmt_code
!= REALPART_EXPR
469 || rhs_code
!= IMAGPART_EXPR
)
470 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
471 && (first_stmt_code
== ARRAY_REF
472 || first_stmt_code
== INDIRECT_REF
473 || first_stmt_code
== COMPONENT_REF
474 || first_stmt_code
== MEM_REF
)))
476 if (vect_print_dump_info (REPORT_SLP
))
479 "Build SLP failed: different operation in stmt ");
480 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
487 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
489 if (vect_print_dump_info (REPORT_SLP
))
492 "Build SLP failed: different shift arguments in ");
493 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
500 /* Strided store or load. */
501 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
503 if (REFERENCE_CLASS_P (lhs
))
506 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
507 stmt
, &def_stmts0
, &def_stmts1
,
510 &first_stmt_def0_type
,
511 &first_stmt_def1_type
,
512 &first_stmt_const_oprnd
,
514 &pattern0
, &pattern1
))
520 /* FORNOW: Check that there is no gap between the loads. */
521 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
522 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
523 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
524 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
526 if (vect_print_dump_info (REPORT_SLP
))
528 fprintf (vect_dump
, "Build SLP failed: strided "
530 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
536 /* Check that the size of interleaved loads group is not
537 greater than the SLP group size. */
538 if (GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
540 if (vect_print_dump_info (REPORT_SLP
))
542 fprintf (vect_dump
, "Build SLP failed: the number of "
543 "interleaved loads is greater than"
544 " the SLP group size ");
545 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
551 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
554 /* Check that there are no loads from different interleaving
555 chains in the same node. The only exception is complex
557 if (prev_first_load
!= first_load
558 && rhs_code
!= REALPART_EXPR
559 && rhs_code
!= IMAGPART_EXPR
)
561 if (vect_print_dump_info (REPORT_SLP
))
563 fprintf (vect_dump
, "Build SLP failed: different "
564 "interleaving chains in one node ");
565 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
572 prev_first_load
= first_load
;
574 if (first_load
== stmt
)
576 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
577 if (vect_supportable_dr_alignment (first_dr
, false)
578 == dr_unaligned_unsupported
)
580 if (vect_print_dump_info (REPORT_SLP
))
582 fprintf (vect_dump
, "Build SLP failed: unsupported "
584 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
590 /* Analyze costs (for the first stmt in the group). */
591 vect_model_load_cost (vinfo_for_stmt (stmt
),
592 ncopies_for_cost
, false, *node
);
595 /* Store the place of this load in the interleaving chain. In
596 case that permutation is needed we later decide if a specific
597 permutation is supported. */
598 load_place
= vect_get_place_in_interleaving_chain (stmt
,
603 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
605 /* We stop the tree when we reach a group of loads. */
606 stop_recursion
= true;
609 } /* Strided access. */
612 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
614 /* Not strided load. */
615 if (vect_print_dump_info (REPORT_SLP
))
617 fprintf (vect_dump
, "Build SLP failed: not strided load ");
618 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
621 /* FORNOW: Not strided loads are not supported. */
625 /* Not memory operation. */
626 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
627 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
629 if (vect_print_dump_info (REPORT_SLP
))
631 fprintf (vect_dump
, "Build SLP failed: operation");
632 fprintf (vect_dump
, " unsupported ");
633 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
639 /* Find the def-stmts. */
640 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
641 &def_stmts0
, &def_stmts1
,
642 &first_stmt_dt0
, &first_stmt_dt1
,
643 &first_stmt_def0_type
,
644 &first_stmt_def1_type
,
645 &first_stmt_const_oprnd
,
647 &pattern0
, &pattern1
))
652 /* Add the costs of the node to the overall instance costs. */
653 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
654 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
656 /* Strided loads were reached - stop the recursion. */
661 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
663 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
668 /* We don't check here complex numbers chains, so we keep them in
669 LOADS for further check in vect_supported_load_permutation_p. */
670 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
671 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
677 /* Create SLP_TREE nodes for the definition node/s. */
678 if (first_stmt_dt0
== vect_internal_def
)
680 slp_tree left_node
= XNEW (struct _slp_tree
);
681 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
682 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
683 SLP_TREE_LEFT (left_node
) = NULL
;
684 SLP_TREE_RIGHT (left_node
) = NULL
;
685 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
686 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
687 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &left_node
, group_size
,
688 inside_cost
, outside_cost
, ncopies_for_cost
,
689 max_nunits
, load_permutation
, loads
,
690 vectorization_factor
))
693 SLP_TREE_LEFT (*node
) = left_node
;
696 if (first_stmt_dt1
== vect_internal_def
)
698 slp_tree right_node
= XNEW (struct _slp_tree
);
699 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
700 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
701 SLP_TREE_LEFT (right_node
) = NULL
;
702 SLP_TREE_RIGHT (right_node
) = NULL
;
703 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
704 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
705 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &right_node
, group_size
,
706 inside_cost
, outside_cost
, ncopies_for_cost
,
707 max_nunits
, load_permutation
, loads
,
708 vectorization_factor
))
711 SLP_TREE_RIGHT (*node
) = right_node
;
719 vect_print_slp_tree (slp_tree node
)
727 fprintf (vect_dump
, "node ");
728 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
730 fprintf (vect_dump
, "\n\tstmt %d ", i
);
731 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
733 fprintf (vect_dump
, "\n");
735 vect_print_slp_tree (SLP_TREE_LEFT (node
));
736 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
740 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
741 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
742 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
743 stmts in NODE are to be marked. */
746 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
754 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
756 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
758 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
759 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
763 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
766 vect_mark_slp_stmts_relevant (slp_tree node
)
770 stmt_vec_info stmt_info
;
775 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
777 stmt_info
= vinfo_for_stmt (stmt
);
778 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
779 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
780 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
783 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node
));
784 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node
));
788 /* Check if the permutation required by the SLP INSTANCE is supported.
789 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
792 vect_supported_slp_permutation_p (slp_instance instance
)
794 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
795 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
796 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
797 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
799 slp_tree
*tmp_loads
= NULL
;
800 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
803 /* FORNOW: The only supported loads permutation is loads from the same
804 location in all the loads in the node, when the data-refs in
805 nodes of LOADS constitute an interleaving chain.
806 Sort the nodes according to the order of accesses in the chain. */
807 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
809 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
810 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
811 i
+= group_size
, j
++)
813 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
814 /* Check that the loads are all in the same interleaving chain. */
815 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
817 if (vect_print_dump_info (REPORT_DETAILS
))
819 fprintf (vect_dump
, "Build SLP failed: unsupported data "
821 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
828 tmp_loads
[index
] = load
;
831 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
832 for (i
= 0; i
< group_size
; i
++)
833 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
835 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
836 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
839 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
840 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
848 /* Rearrange the statements of NODE according to PERMUTATION. */
851 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
852 VEC (int, heap
) *permutation
)
855 VEC (gimple
, heap
) *tmp_stmts
;
856 unsigned int index
, i
;
861 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node
), group_size
, permutation
);
862 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node
), group_size
, permutation
);
864 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
865 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
867 for (i
= 0; i
< group_size
; i
++)
868 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
870 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
872 index
= VEC_index (int, permutation
, i
);
873 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
876 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
877 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
881 /* Check if the required load permutation is supported.
882 LOAD_PERMUTATION contains a list of indices of the loads.
883 In SLP this permutation is relative to the order of strided stores that are
884 the base of the SLP instance. */
887 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
888 VEC (int, heap
) *load_permutation
)
890 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
891 bool supported
, bad_permutation
= false;
893 slp_tree node
, other_complex_node
;
894 gimple stmt
, first
= NULL
, other_node_first
;
895 unsigned complex_numbers
= 0;
897 /* FORNOW: permutations are only supported in SLP. */
901 if (vect_print_dump_info (REPORT_SLP
))
903 fprintf (vect_dump
, "Load permutation ");
904 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
905 fprintf (vect_dump
, "%d ", next
);
908 /* In case of reduction every load permutation is allowed, since the order
909 of the reduction statements is not important (as opposed to the case of
910 strided stores). The only condition we need to check is that all the
911 load nodes are of the same size and have the same permutation (and then
912 rearrange all the nodes of the SLP instance according to this
915 /* Check that all the load nodes are of the same size. */
916 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
918 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
919 != (unsigned) group_size
)
922 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
923 if (is_gimple_assign (stmt
)
924 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
925 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
929 /* Complex operands can be swapped as following:
930 real_c = real_b + real_a;
931 imag_c = imag_a + imag_b;
932 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
933 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
934 chains are mixed, they match the above pattern. */
937 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
939 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
945 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
947 if (complex_numbers
!= 2)
955 other_complex_node
= VEC_index (slp_tree
,
956 SLP_INSTANCE_LOADS (slp_instn
), k
);
957 other_node_first
= VEC_index (gimple
,
958 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
960 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
969 /* We checked that this case ok, so there is no need to proceed with
970 permutation tests. */
971 if (complex_numbers
== 2)
973 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
974 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
978 node
= SLP_INSTANCE_TREE (slp_instn
);
979 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
980 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
981 instance, not all the loads belong to the same node or interleaving
982 group. Hence, we need to divide them into groups according to
984 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
986 /* Reduction (there are no data-refs in the root).
987 In reduction chain the order of the loads is important. */
988 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
989 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
991 int first_group_load_index
;
993 /* Compare all the permutation sequences to the first one. */
994 for (i
= 1; i
< number_of_groups
; i
++)
997 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
999 next
= VEC_index (int, load_permutation
, j
);
1000 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1002 if (next
!= first_group_load_index
)
1004 bad_permutation
= true;
1011 if (bad_permutation
)
1015 if (!bad_permutation
)
1017 /* Check that the loads in the first sequence are different and there
1018 are no gaps between them. */
1019 load_index
= sbitmap_alloc (group_size
);
1020 sbitmap_zero (load_index
);
1021 for (k
= 0; k
< group_size
; k
++)
1023 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1024 if (TEST_BIT (load_index
, first_group_load_index
))
1026 bad_permutation
= true;
1030 SET_BIT (load_index
, first_group_load_index
);
1033 if (!bad_permutation
)
1034 for (k
= 0; k
< group_size
; k
++)
1035 if (!TEST_BIT (load_index
, k
))
1037 bad_permutation
= true;
1041 sbitmap_free (load_index
);
1044 if (!bad_permutation
)
1046 /* This permutation is valid for reduction. Since the order of the
1047 statements in the nodes is not important unless they are memory
1048 accesses, we can rearrange the statements in all the nodes
1049 according to the order of the loads. */
1050 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1052 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1057 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1058 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1059 well (unless it's reduction). */
1060 if (VEC_length (int, load_permutation
)
1061 != (unsigned int) (group_size
* group_size
))
1065 load_index
= sbitmap_alloc (group_size
);
1066 sbitmap_zero (load_index
);
1067 for (j
= 0; j
< group_size
; j
++)
1069 for (i
= j
* group_size
, k
= 0;
1070 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1073 if (i
!= j
* group_size
&& next
!= prev
)
1082 if (TEST_BIT (load_index
, prev
))
1088 SET_BIT (load_index
, prev
);
1091 for (j
= 0; j
< group_size
; j
++)
1092 if (!TEST_BIT (load_index
, j
))
1095 sbitmap_free (load_index
);
1097 if (supported
&& i
== group_size
* group_size
1098 && vect_supported_slp_permutation_p (slp_instn
))
1105 /* Find the first load in the loop that belongs to INSTANCE.
1106 When loads are in several SLP nodes, there can be a case in which the first
1107 load does not appear in the first SLP node to be transformed, causing
1108 incorrect order of statements. Since we generate all the loads together,
1109 they must be inserted before the first load of the SLP instance and not
1110 before the first load of the first node of the instance. */
1113 vect_find_first_load_in_slp_instance (slp_instance instance
)
1117 gimple first_load
= NULL
, load
;
1119 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1120 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1121 first_load
= get_earlier_stmt (load
, first_load
);
1127 /* Find the last store in SLP INSTANCE. */
1130 vect_find_last_store_in_slp_instance (slp_instance instance
)
1134 gimple last_store
= NULL
, store
;
1136 node
= SLP_INSTANCE_TREE (instance
);
1138 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1140 last_store
= get_later_stmt (store
, last_store
);
1146 /* Analyze an SLP instance starting from a group of strided stores. Call
1147 vect_build_slp_tree to build a tree of packed stmts if possible.
1148 Return FALSE if it's impossible to SLP any stmt in the loop. */
1151 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1154 slp_instance new_instance
;
1155 slp_tree node
= XNEW (struct _slp_tree
);
1156 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1157 unsigned int unrolling_factor
= 1, nunits
;
1158 tree vectype
, scalar_type
= NULL_TREE
;
1160 unsigned int vectorization_factor
= 0;
1161 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1162 unsigned int max_nunits
= 0;
1163 VEC (int, heap
) *load_permutation
;
1164 VEC (slp_tree
, heap
) *loads
;
1165 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1167 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1171 scalar_type
= TREE_TYPE (DR_REF (dr
));
1172 vectype
= get_vectype_for_scalar_type (scalar_type
);
1176 gcc_assert (loop_vinfo
);
1177 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1180 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1184 gcc_assert (loop_vinfo
);
1185 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1186 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1191 if (vect_print_dump_info (REPORT_SLP
))
1193 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1194 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1200 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1202 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1204 /* No multitypes in BB SLP. */
1205 vectorization_factor
= nunits
;
1207 /* Calculate the unrolling factor. */
1208 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1209 if (unrolling_factor
!= 1 && !loop_vinfo
)
1211 if (vect_print_dump_info (REPORT_SLP
))
1212 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1218 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1219 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1223 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1226 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1227 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1232 /* Collect reduction statements. */
1233 for (i
= 0; VEC_iterate (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
,
1236 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1239 SLP_TREE_VEC_STMTS (node
) = NULL
;
1240 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
1241 SLP_TREE_LEFT (node
) = NULL
;
1242 SLP_TREE_RIGHT (node
) = NULL
;
1243 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
1244 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
1246 /* Calculate the number of vector stmts to create based on the unrolling
1247 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1248 GROUP_SIZE / NUNITS otherwise. */
1249 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1251 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1252 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1254 /* Build the tree for the SLP instance. */
1255 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1256 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1257 &max_nunits
, &load_permutation
, &loads
,
1258 vectorization_factor
))
1260 /* Create a new SLP instance. */
1261 new_instance
= XNEW (struct _slp_instance
);
1262 SLP_INSTANCE_TREE (new_instance
) = node
;
1263 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1264 /* Calculate the unrolling factor based on the smallest type in the
1266 if (max_nunits
> nunits
)
1267 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1270 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1271 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1272 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1273 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1274 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1275 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1276 if (VEC_length (slp_tree
, loads
))
1278 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1281 if (vect_print_dump_info (REPORT_SLP
))
1283 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1285 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1288 vect_free_slp_instance (new_instance
);
1292 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1293 = vect_find_first_load_in_slp_instance (new_instance
);
1296 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1299 VEC_safe_push (slp_instance
, heap
,
1300 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1303 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1306 if (vect_print_dump_info (REPORT_SLP
))
1307 vect_print_slp_tree (node
);
1312 /* Failed to SLP. */
1313 /* Free the allocated memory. */
1314 vect_free_slp_tree (node
);
1315 VEC_free (int, heap
, load_permutation
);
1316 VEC_free (slp_tree
, heap
, loads
);
1322 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1323 trees of packed scalar stmts if SLP is possible. */
1326 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1329 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1330 gimple first_element
;
1333 if (vect_print_dump_info (REPORT_SLP
))
1334 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1338 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1339 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1340 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1343 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1345 /* Find SLP sequences starting from groups of strided stores. */
1346 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1347 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1350 if (bb_vinfo
&& !ok
)
1352 if (vect_print_dump_info (REPORT_SLP
))
1353 fprintf (vect_dump
, "Failed to SLP the basic block.");
1359 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1361 /* Find SLP sequences starting from reduction chains. */
1362 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1363 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1368 /* Don't try to vectorize SLP reductions if reduction chain was
1373 /* Find SLP sequences starting from groups of reductions. */
1374 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1375 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1376 VEC_index (gimple
, reductions
, 0)))
1383 /* For each possible SLP instance decide whether to SLP it and calculate overall
1384 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1385 least one instance. */
1388 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1390 unsigned int i
, unrolling_factor
= 1;
1391 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1392 slp_instance instance
;
1393 int decided_to_slp
= 0;
1395 if (vect_print_dump_info (REPORT_SLP
))
1396 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1398 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1400 /* FORNOW: SLP if you can. */
1401 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1402 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1404 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1405 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1406 loop-based vectorization. Such stmts will be marked as HYBRID. */
1407 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1411 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1413 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1414 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1415 decided_to_slp
, unrolling_factor
);
1417 return (decided_to_slp
> 0);
1421 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1422 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1425 vect_detect_hybrid_slp_stmts (slp_tree node
)
1429 imm_use_iterator imm_iter
;
1431 stmt_vec_info stmt_vinfo
;
1436 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1437 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1438 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1439 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1440 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1441 && !STMT_SLP_TYPE (stmt_vinfo
)
1442 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1443 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1444 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1445 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1446 == vect_reduction_def
))
1447 vect_mark_slp_stmts (node
, hybrid
, i
);
1449 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
1454 /* Find stmts that must be both vectorized and SLPed. */
1457 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1460 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1461 slp_instance instance
;
1463 if (vect_print_dump_info (REPORT_SLP
))
1464 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1466 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1467 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1471 /* Create and initialize a new bb_vec_info struct for BB, as well as
1472 stmt_vec_info structs for all the stmts in it. */
1475 new_bb_vec_info (basic_block bb
)
1477 bb_vec_info res
= NULL
;
1478 gimple_stmt_iterator gsi
;
1480 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1481 BB_VINFO_BB (res
) = bb
;
1483 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1485 gimple stmt
= gsi_stmt (gsi
);
1486 gimple_set_uid (stmt
, 0);
1487 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1490 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1491 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1498 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1499 stmts in the basic block. */
1502 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1505 gimple_stmt_iterator si
;
1510 bb
= BB_VINFO_BB (bb_vinfo
);
1512 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1514 gimple stmt
= gsi_stmt (si
);
1515 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1518 /* Free stmt_vec_info. */
1519 free_stmt_vec_info (stmt
);
1522 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1523 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1524 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1525 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1531 /* Analyze statements contained in SLP tree node after recursively analyzing
1532 the subtree. Return TRUE if the operations are supported. */
1535 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1544 if (!vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_LEFT (node
))
1545 || !vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_RIGHT (node
)))
1548 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1550 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1551 gcc_assert (stmt_info
);
1552 gcc_assert (PURE_SLP_STMT (stmt_info
));
1554 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1562 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1563 operations are supported. */
1566 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1568 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1569 slp_instance instance
;
1572 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1574 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1575 SLP_INSTANCE_TREE (instance
)))
1577 vect_free_slp_instance (instance
);
1578 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1584 if (!VEC_length (slp_instance
, slp_instances
))
1590 /* Check if loads and stores are mixed in the basic block (in that
1591 case if we are not sure that the accesses differ, we can't vectorize the
1592 basic block). Also return FALSE in case that there is statement marked as
1593 not vectorizable. */
1596 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo
)
1598 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1599 gimple_stmt_iterator si
;
1600 bool detected_store
= false;
1602 struct data_reference
*dr
;
1604 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1606 stmt
= gsi_stmt (si
);
1608 /* We can't allow not analyzed statements, since they may contain data
1610 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
1613 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
1616 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1617 if (DR_IS_READ (dr
) && detected_store
)
1620 if (!DR_IS_READ (dr
))
1621 detected_store
= true;
1627 /* Check if vectorization of the basic block is profitable. */
1630 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1632 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1633 slp_instance instance
;
1635 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1636 unsigned int stmt_cost
;
1638 gimple_stmt_iterator si
;
1639 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1640 stmt_vec_info stmt_info
= NULL
;
1641 tree dummy_type
= NULL
;
1644 /* Calculate vector costs. */
1645 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1647 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1648 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1651 /* Calculate scalar cost. */
1652 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1654 stmt
= gsi_stmt (si
);
1655 stmt_info
= vinfo_for_stmt (stmt
);
1657 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1658 || !PURE_SLP_STMT (stmt_info
))
1661 if (STMT_VINFO_DATA_REF (stmt_info
))
1663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1664 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1665 (scalar_load
, dummy_type
, dummy
);
1667 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1668 (scalar_store
, dummy_type
, dummy
);
1671 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1672 (scalar_stmt
, dummy_type
, dummy
);
1674 scalar_cost
+= stmt_cost
;
1677 if (vect_print_dump_info (REPORT_COST
))
1679 fprintf (vect_dump
, "Cost model analysis: \n");
1680 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1682 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1684 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1687 /* Vectorization is profitable if its cost is less than the cost of scalar
1689 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1695 /* Check if the basic block can be vectorized. */
1698 vect_slp_analyze_bb (basic_block bb
)
1700 bb_vec_info bb_vinfo
;
1701 VEC (ddr_p
, heap
) *ddrs
;
1702 VEC (slp_instance
, heap
) *slp_instances
;
1703 slp_instance instance
;
1705 gimple_stmt_iterator gsi
;
1707 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1708 bool data_dependence_in_bb
= false;
1710 current_vector_size
= 0;
1712 if (vect_print_dump_info (REPORT_DETAILS
))
1713 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
1715 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1717 gimple stmt
= gsi_stmt (gsi
);
1718 if (!is_gimple_debug (stmt
)
1719 && !gimple_nop_p (stmt
)
1720 && gimple_code (stmt
) != GIMPLE_LABEL
)
1724 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
1726 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1727 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
1733 bb_vinfo
= new_bb_vec_info (bb
);
1737 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1740 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1743 destroy_bb_vec_info (bb_vinfo
);
1747 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1748 if (!VEC_length (ddr_p
, ddrs
))
1750 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1751 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1754 destroy_bb_vec_info (bb_vinfo
);
1758 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
,
1759 &data_dependence_in_bb
)
1761 || (data_dependence_in_bb
1762 && !vect_bb_vectorizable_with_dependencies (bb_vinfo
)))
1764 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1765 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1766 "in basic block.\n");
1768 destroy_bb_vec_info (bb_vinfo
);
1772 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1774 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1775 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1778 destroy_bb_vec_info (bb_vinfo
);
1782 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1784 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1785 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1788 destroy_bb_vec_info (bb_vinfo
);
1792 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1794 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1795 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1798 destroy_bb_vec_info (bb_vinfo
);
1802 /* Check the SLP opportunities in the basic block, analyze and build SLP
1804 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1806 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1807 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
1808 "in basic block.\n");
1810 destroy_bb_vec_info (bb_vinfo
);
1814 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1816 /* Mark all the statements that we want to vectorize as pure SLP and
1818 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1820 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1821 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
1824 if (!vect_slp_analyze_operations (bb_vinfo
))
1826 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1827 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
1829 destroy_bb_vec_info (bb_vinfo
);
1833 /* Cost model: check if the vectorization is worthwhile. */
1834 if (flag_vect_cost_model
1835 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
1837 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1838 fprintf (vect_dump
, "not vectorized: vectorization is not "
1841 destroy_bb_vec_info (bb_vinfo
);
1845 if (vect_print_dump_info (REPORT_DETAILS
))
1846 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
1852 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1853 the number of created vector stmts depends on the unrolling factor).
1854 However, the actual number of vector stmts for every SLP node depends on
1855 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1856 should be updated. In this function we assume that the inside costs
1857 calculated in vect_model_xxx_cost are linear in ncopies. */
1860 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
1862 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1863 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1864 slp_instance instance
;
1866 if (vect_print_dump_info (REPORT_SLP
))
1867 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
1869 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1870 /* We assume that costs are linear in ncopies. */
1871 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
1872 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1876 /* For constant and loop invariant defs of SLP_NODE this function returns
1877 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1878 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1879 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1880 REDUC_INDEX is the index of the reduction operand in the statements, unless
1884 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
1885 VEC (tree
, heap
) **vec_oprnds
,
1886 unsigned int op_num
, unsigned int number_of_vectors
,
1889 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1890 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1891 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1895 int j
, number_of_places_left_in_vector
;
1898 int group_size
= VEC_length (gimple
, stmts
);
1899 unsigned int vec_num
, i
;
1900 int number_of_copies
= 1;
1901 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1902 bool constant_p
, is_store
;
1903 tree neutral_op
= NULL
;
1904 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1906 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
)
1908 if (reduc_index
== -1)
1910 VEC_free (tree
, heap
, *vec_oprnds
);
1914 op_num
= reduc_index
- 1;
1915 op
= gimple_op (stmt
, reduc_index
);
1916 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1917 we need either neutral operands or the original operands. See
1918 get_initial_def_for_reduction() for details. */
1921 case WIDEN_SUM_EXPR
:
1927 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1928 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
1930 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
1935 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1936 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
1938 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
1943 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
1951 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1954 op
= gimple_assign_rhs1 (stmt
);
1961 if (CONSTANT_CLASS_P (op
))
1966 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1967 gcc_assert (vector_type
);
1968 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1970 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1971 created vectors. It is greater than 1 if unrolling is performed.
1973 For example, we have two scalar operands, s1 and s2 (e.g., group of
1974 strided accesses of size two), while NUNITS is four (i.e., four scalars
1975 of this type can be packed in a vector). The output vector will contain
1976 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1979 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1980 containing the operands.
1982 For example, NUNITS is four as before, and the group size is 8
1983 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1984 {s5, s6, s7, s8}. */
1986 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
1988 number_of_places_left_in_vector
= nunits
;
1989 for (j
= 0; j
< number_of_copies
; j
++)
1991 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
1994 op
= gimple_assign_rhs1 (stmt
);
1996 op
= gimple_op (stmt
, op_num
+ 1);
1998 if (reduc_index
!= -1)
2000 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
2001 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
2005 /* Get the def before the loop. In reduction chain we have only
2006 one initial value. */
2007 if ((j
!= (number_of_copies
- 1)
2008 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2013 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2014 loop_preheader_edge (loop
));
2017 /* Create 'vect_ = {op0,op1,...,opn}'. */
2018 t
= tree_cons (NULL_TREE
, op
, t
);
2020 number_of_places_left_in_vector
--;
2022 if (number_of_places_left_in_vector
== 0)
2024 number_of_places_left_in_vector
= nunits
;
2027 vec_cst
= build_vector (vector_type
, t
);
2029 vec_cst
= build_constructor_from_list (vector_type
, t
);
2030 VEC_quick_push (tree
, voprnds
,
2031 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
2037 /* Since the vectors are created in the reverse order, we should invert
2039 vec_num
= VEC_length (tree
, voprnds
);
2040 for (j
= vec_num
- 1; j
>= 0; j
--)
2042 vop
= VEC_index (tree
, voprnds
, j
);
2043 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2046 VEC_free (tree
, heap
, voprnds
);
2048 /* In case that VF is greater than the unrolling factor needed for the SLP
2049 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2050 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2051 to replicate the vectors. */
2052 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2054 tree neutral_vec
= NULL
;
2059 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2061 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2065 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2066 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2072 /* Get vectorized definitions from SLP_NODE that contains corresponding
2073 vectorized def-stmts. */
2076 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2079 gimple vec_def_stmt
;
2082 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2084 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2086 gcc_assert (vec_def_stmt
);
2087 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2088 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2093 /* Get vectorized definitions for SLP_NODE.
2094 If the scalar definitions are loop invariants or constants, collect them and
2095 call vect_get_constant_vectors() to create vector stmts.
2096 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2097 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2098 vect_get_slp_vect_defs() to retrieve them.
2099 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2100 the right node. This is used when the second operand must remain scalar. */
2103 vect_get_slp_defs (tree op0
, tree op1
, slp_tree slp_node
,
2104 VEC (tree
,heap
) **vec_oprnds0
,
2105 VEC (tree
,heap
) **vec_oprnds1
, int reduc_index
)
2108 enum tree_code code
;
2109 int number_of_vects
;
2110 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2112 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2113 /* The number of vector defs is determined by the number of vector statements
2114 in the node from which we get those statements. */
2115 if (SLP_TREE_LEFT (slp_node
))
2116 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
2119 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2120 /* Number of vector stmts was calculated according to LHS in
2121 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2122 necessary. See vect_get_smallest_scalar_type () for details. */
2123 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2125 if (rhs_size_unit
!= lhs_size_unit
)
2127 number_of_vects
*= rhs_size_unit
;
2128 number_of_vects
/= lhs_size_unit
;
2132 /* Allocate memory for vectorized defs. */
2133 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
2135 /* SLP_NODE corresponds either to a group of stores or to a group of
2136 unary/binary operations. We don't call this function for loads.
2137 For reduction defs we call vect_get_constant_vectors(), since we are
2138 looking for initial loop invariant values. */
2139 if (SLP_TREE_LEFT (slp_node
) && reduc_index
== -1)
2140 /* The defs are already vectorized. */
2141 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
2143 /* Build vectors from scalar defs. */
2144 vect_get_constant_vectors (op0
, slp_node
, vec_oprnds0
, 0, number_of_vects
,
2147 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
2148 /* Since we don't call this function with loads, this is a group of
2152 /* For reductions, we only need initial values. */
2153 if (reduc_index
!= -1)
2156 code
= gimple_assign_rhs_code (first_stmt
);
2157 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
)
2160 /* The number of vector defs is determined by the number of vector statements
2161 in the node from which we get those statements. */
2162 if (SLP_TREE_RIGHT (slp_node
))
2163 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
2165 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2167 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
2169 if (SLP_TREE_RIGHT (slp_node
))
2170 /* The defs are already vectorized. */
2171 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
2173 /* Build vectors from scalar defs. */
2174 vect_get_constant_vectors (op1
, slp_node
, vec_oprnds1
, 1, number_of_vects
,
2179 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2180 building a vector of type MASK_TYPE from it) and two input vectors placed in
2181 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2182 shifting by STRIDE elements of DR_CHAIN for every copy.
2183 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2185 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2186 the created stmts must be inserted. */
2189 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2190 tree mask
, int first_vec_indx
, int second_vec_indx
,
2191 gimple_stmt_iterator
*gsi
, slp_tree node
,
2192 tree builtin_decl
, tree vectype
,
2193 VEC(tree
,heap
) *dr_chain
,
2194 int ncopies
, int vect_stmts_counter
)
2197 gimple perm_stmt
= NULL
;
2198 stmt_vec_info next_stmt_info
;
2200 tree first_vec
, second_vec
, data_ref
;
2202 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2204 /* Initialize the vect stmts of NODE to properly insert the generated
2206 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2207 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2208 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2210 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2211 for (i
= 0; i
< ncopies
; i
++)
2213 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2214 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2216 /* Generate the permute statement. */
2217 perm_stmt
= gimple_build_call (builtin_decl
,
2218 3, first_vec
, second_vec
, mask
);
2219 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2220 gimple_call_set_lhs (perm_stmt
, data_ref
);
2221 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2223 /* Store the vector statement in NODE. */
2224 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2225 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2227 first_vec_indx
+= stride
;
2228 second_vec_indx
+= stride
;
2231 /* Mark the scalar stmt as vectorized. */
2232 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2233 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2237 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2238 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2239 representation. Check that the mask is valid and return FALSE if not.
2240 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2241 the next vector, i.e., the current first vector is not needed. */
2244 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2245 int mask_nunits
, bool only_one_vec
, int index
,
2246 int *mask
, int *current_mask_element
,
2247 bool *need_next_vector
, int *number_of_mask_fixes
,
2248 bool *mask_fixed
, bool *needs_first_vector
)
2252 /* Convert to target specific representation. */
2253 *current_mask_element
= first_mask_element
+ m
;
2254 /* Adjust the value in case it's a mask for second and third vectors. */
2255 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2257 if (*current_mask_element
< mask_nunits
)
2258 *needs_first_vector
= true;
2260 /* We have only one input vector to permute but the mask accesses values in
2261 the next vector as well. */
2262 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2264 if (vect_print_dump_info (REPORT_DETAILS
))
2266 fprintf (vect_dump
, "permutation requires at least two vectors ");
2267 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2273 /* The mask requires the next vector. */
2274 if (*current_mask_element
>= mask_nunits
* 2)
2276 if (*needs_first_vector
|| *mask_fixed
)
2278 /* We either need the first vector too or have already moved to the
2279 next vector. In both cases, this permutation needs three
2281 if (vect_print_dump_info (REPORT_DETAILS
))
2283 fprintf (vect_dump
, "permutation requires at "
2284 "least three vectors ");
2285 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2291 /* We move to the next vector, dropping the first one and working with
2292 the second and the third - we need to adjust the values of the mask
2294 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2296 for (i
= 0; i
< index
; i
++)
2297 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2299 (*number_of_mask_fixes
)++;
2303 *need_next_vector
= *mask_fixed
;
2305 /* This was the last element of this mask. Start a new one. */
2306 if (index
== mask_nunits
- 1)
2308 *number_of_mask_fixes
= 1;
2309 *mask_fixed
= false;
2310 *needs_first_vector
= false;
2317 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2318 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2319 permute statements for SLP_NODE_INSTANCE. */
2321 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2322 gimple_stmt_iterator
*gsi
, int vf
,
2323 slp_instance slp_node_instance
, bool analyze_only
)
2325 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2326 tree mask_element_type
= NULL_TREE
, mask_type
;
2327 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
2329 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
2330 gimple next_scalar_stmt
;
2331 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2332 int first_mask_element
;
2333 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
2334 bool only_one_vec
= false, need_next_vector
= false;
2335 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2336 int number_of_mask_fixes
= 1;
2337 bool mask_fixed
= false;
2338 bool needs_first_vector
= false;
2340 if (!targetm
.vectorize
.builtin_vec_perm
)
2342 if (vect_print_dump_info (REPORT_DETAILS
))
2344 fprintf (vect_dump
, "no builtin for vect permute for ");
2345 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2351 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
2352 &mask_element_type
);
2353 if (!builtin_decl
|| !mask_element_type
)
2355 if (vect_print_dump_info (REPORT_DETAILS
))
2357 fprintf (vect_dump
, "no builtin for vect permute for ");
2358 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2364 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2365 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
2366 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
2367 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2368 scale
= mask_nunits
/ nunits
;
2369 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2371 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2372 unrolling factor. */
2373 orig_vec_stmts_num
= group_size
*
2374 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2375 if (orig_vec_stmts_num
== 1)
2376 only_one_vec
= true;
2378 /* Number of copies is determined by the final vectorization factor
2379 relatively to SLP_NODE_INSTANCE unrolling factor. */
2380 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2382 /* Generate permutation masks for every NODE. Number of masks for each NODE
2383 is equal to GROUP_SIZE.
2384 E.g., we have a group of three nodes with three loads from the same
2385 location in each node, and the vector size is 4. I.e., we have a
2386 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2387 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2388 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2391 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2392 scpecific type, e.g., in bytes for Altivec.
2393 The last mask is illegal since we assume two operands for permute
2394 operation, and the mask element values can't be outside that range.
2395 Hence, the last mask must be converted into {2,5,5,5}.
2396 For the first two permutations we need the first and the second input
2397 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2398 we need the second and the third vectors: {b1,c1,a2,b2} and
2401 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2405 vect_stmts_counter
= 0;
2407 first_vec_index
= vec_index
++;
2409 second_vec_index
= first_vec_index
;
2411 second_vec_index
= vec_index
++;
2413 for (j
= 0; j
< unroll_factor
; j
++)
2415 for (k
= 0; k
< group_size
; k
++)
2417 first_mask_element
= (i
+ j
* group_size
) * scale
;
2418 for (m
= 0; m
< scale
; m
++)
2420 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
2421 mask_nunits
, only_one_vec
, index
, mask
,
2422 ¤t_mask_element
, &need_next_vector
,
2423 &number_of_mask_fixes
, &mask_fixed
,
2424 &needs_first_vector
))
2427 mask
[index
++] = current_mask_element
;
2430 if (index
== mask_nunits
)
2432 tree mask_vec
= NULL
;
2434 while (--index
>= 0)
2436 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2437 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2439 mask_vec
= build_vector (mask_type
, mask_vec
);
2442 if (!targetm
.vectorize
.builtin_vec_perm_ok (vectype
,
2445 if (vect_print_dump_info (REPORT_DETAILS
))
2447 fprintf (vect_dump
, "unsupported vect permute ");
2448 print_generic_expr (vect_dump
, mask_vec
, 0);
2456 if (need_next_vector
)
2458 first_vec_index
= second_vec_index
;
2459 second_vec_index
= vec_index
;
2462 next_scalar_stmt
= VEC_index (gimple
,
2463 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2465 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2466 mask_vec
, first_vec_index
, second_vec_index
,
2467 gsi
, node
, builtin_decl
, vectype
, dr_chain
,
2468 ncopies
, vect_stmts_counter
++);
2481 /* Vectorize SLP instance tree in postorder. */
2484 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2485 unsigned int vectorization_factor
)
2488 bool strided_store
, is_store
;
2489 gimple_stmt_iterator si
;
2490 stmt_vec_info stmt_info
;
2491 unsigned int vec_stmts_size
, nunits
, group_size
;
2494 slp_tree loads_node
;
2499 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
2500 vectorization_factor
);
2501 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
2502 vectorization_factor
);
2504 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2505 stmt_info
= vinfo_for_stmt (stmt
);
2507 /* VECTYPE is the type of the destination. */
2508 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2509 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2510 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2512 /* For each SLP instance calculate number of vector stmts to be created
2513 for the scalar stmts in each node of the SLP tree. Number of vector
2514 elements in one vector iteration is the number of scalar elements in
2515 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2517 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2519 /* In case of load permutation we have to allocate vectorized statements for
2520 all the nodes that participate in that permutation. */
2521 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2523 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2525 if (!SLP_TREE_VEC_STMTS (loads_node
))
2527 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2529 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2534 if (!SLP_TREE_VEC_STMTS (node
))
2536 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2537 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2540 if (vect_print_dump_info (REPORT_DETAILS
))
2542 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2543 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2546 /* Loads should be inserted before the first load. */
2547 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2548 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2549 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2550 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2551 else if (is_pattern_stmt_p (stmt_info
))
2552 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2554 si
= gsi_for_stmt (stmt
);
2556 /* Stores should be inserted just before the last store. */
2557 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2558 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2560 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2561 si
= gsi_for_stmt (last_store
);
2564 /* Mark the first element of the reduction chain as reduction to properly
2565 transform the node. In the analysis phase only the last element of the
2566 chain is marked as reduction. */
2567 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2568 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2570 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2571 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2574 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2579 /* Generate vector code for all SLP instances in the loop/basic block. */
2582 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2584 VEC (slp_instance
, heap
) *slp_instances
;
2585 slp_instance instance
;
2587 bool is_store
= false;
2591 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2592 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2596 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2600 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2602 /* Schedule the tree of INSTANCE. */
2603 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2605 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2606 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2607 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2610 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2612 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2615 gimple_stmt_iterator gsi
;
2617 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2618 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2620 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2623 /* Free the attached stmt_vec_info and remove the stmt. */
2624 gsi
= gsi_for_stmt (store
);
2625 gsi_remove (&gsi
, true);
2626 free_stmt_vec_info (store
);
2634 /* Vectorize the basic block. */
2637 vect_slp_transform_bb (basic_block bb
)
2639 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2640 gimple_stmt_iterator si
;
2642 gcc_assert (bb_vinfo
);
2644 if (vect_print_dump_info (REPORT_DETAILS
))
2645 fprintf (vect_dump
, "SLPing BB\n");
2647 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2649 gimple stmt
= gsi_stmt (si
);
2650 stmt_vec_info stmt_info
;
2652 if (vect_print_dump_info (REPORT_DETAILS
))
2654 fprintf (vect_dump
, "------>SLPing statement: ");
2655 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2658 stmt_info
= vinfo_for_stmt (stmt
);
2659 gcc_assert (stmt_info
);
2661 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2662 if (STMT_SLP_TYPE (stmt_info
))
2664 vect_schedule_slp (NULL
, bb_vinfo
);
2669 mark_sym_for_renaming (gimple_vop (cfun
));
2670 /* The memory tags and pointers in vectorized statements need to
2671 have their SSA forms updated. FIXME, why can't this be delayed
2672 until all the loops have been transformed? */
2673 update_ssa (TODO_update_ssa
);
2675 if (vect_print_dump_info (REPORT_DETAILS
))
2676 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2678 destroy_bb_vec_info (bb_vinfo
);