1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
46 find_bb_location (basic_block bb
)
49 gimple_stmt_iterator si
;
54 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
57 if (gimple_location (stmt
) != UNKNOWN_LOC
)
58 return gimple_location (stmt
);
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 vect_free_slp_tree (slp_tree node
)
73 if (SLP_TREE_LEFT (node
))
74 vect_free_slp_tree (SLP_TREE_LEFT (node
));
76 if (SLP_TREE_RIGHT (node
))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node
));
79 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
81 if (SLP_TREE_VEC_STMTS (node
))
82 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
88 /* Free the memory allocated for the SLP instance. */
91 vect_free_slp_instance (slp_instance instance
)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
94 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
95 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
105 slp_tree slp_node
, gimple stmt
,
106 VEC (gimple
, heap
) **def_stmts0
,
107 VEC (gimple
, heap
) **def_stmts1
,
108 enum vect_def_type
*first_stmt_dt0
,
109 enum vect_def_type
*first_stmt_dt1
,
110 tree
*first_stmt_def0_type
,
111 tree
*first_stmt_def1_type
,
112 tree
*first_stmt_const_oprnd
,
113 int ncopies_for_cost
,
114 bool *pattern0
, bool *pattern1
)
117 unsigned int i
, number_of_oprnds
;
120 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
121 stmt_vec_info stmt_info
=
122 vinfo_for_stmt (VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0));
123 enum gimple_rhs_class rhs_class
;
124 struct loop
*loop
= NULL
;
127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
129 rhs_class
= get_gimple_rhs_class (gimple_assign_rhs_code (stmt
));
130 number_of_oprnds
= gimple_num_ops (stmt
) - 1; /* RHS only */
132 for (i
= 0; i
< number_of_oprnds
; i
++)
134 oprnd
= gimple_op (stmt
, i
+ 1);
136 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
138 || (!def_stmt
&& dt
[i
] != vect_constant_def
))
140 if (vect_print_dump_info (REPORT_SLP
))
142 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
152 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
153 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
154 && vinfo_for_stmt (def_stmt
)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
159 if (!*first_stmt_dt0
)
163 if (i
== 1 && !*first_stmt_dt1
)
165 else if ((i
== 0 && !*pattern0
) || (i
== 1 && !*pattern1
))
167 if (vect_print_dump_info (REPORT_DETAILS
))
169 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
178 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
179 dt
[i
] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
181 if (*dt
== vect_unknown_def_type
)
183 if (vect_print_dump_info (REPORT_DETAILS
))
184 fprintf (vect_dump
, "Unsupported pattern.");
188 switch (gimple_code (def_stmt
))
191 def
= gimple_phi_result (def_stmt
);
195 def
= gimple_assign_lhs (def_stmt
);
199 if (vect_print_dump_info (REPORT_DETAILS
))
200 fprintf (vect_dump
, "unsupported defining stmt: ");
205 if (!*first_stmt_dt0
)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0
= dt
[i
];
210 *first_stmt_def0_type
= TREE_TYPE (def
);
212 *first_stmt_const_oprnd
= oprnd
;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class
!= GIMPLE_SINGLE_RHS
)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dt
, slp_node
);
220 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
226 if (!*first_stmt_dt1
&& i
== 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1
= dt
[i
];
231 *first_stmt_def1_type
= TREE_TYPE (def
);
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd
)
238 if (vect_print_dump_info (REPORT_SLP
))
239 fprintf (vect_dump
, "Build SLP failed: two constant "
243 *first_stmt_const_oprnd
= oprnd
;
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
254 && ((*first_stmt_dt0
!= dt
[i
]
255 && !(*first_stmt_dt0
== vect_reduction_def
256 && dt
[i
] == vect_internal_def
))
257 || (*first_stmt_def0_type
&& def
258 && !types_compatible_p (*first_stmt_def0_type
,
261 && ((*first_stmt_dt1
!= dt
[i
]
262 && !(*first_stmt_dt1
== vect_reduction_def
263 && dt
[i
] == vect_internal_def
))
264 || (*first_stmt_def1_type
&& def
265 && !types_compatible_p (*first_stmt_def1_type
,
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd
),
271 if (vect_print_dump_info (REPORT_SLP
))
272 fprintf (vect_dump
, "Build SLP failed: different types ");
279 /* Check the types of the definitions. */
282 case vect_constant_def
:
283 case vect_external_def
:
286 case vect_internal_def
:
287 case vect_reduction_def
:
289 VEC_safe_push (gimple
, heap
, *def_stmts0
, def_stmt
);
291 VEC_safe_push (gimple
, heap
, *def_stmts1
, def_stmt
);
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP
))
298 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
316 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
317 slp_tree
*node
, unsigned int group_size
,
318 int *inside_cost
, int *outside_cost
,
319 int ncopies_for_cost
, unsigned int *max_nunits
,
320 VEC (int, heap
) **load_permutation
,
321 VEC (slp_tree
, heap
) **loads
,
322 unsigned int vectorization_factor
)
324 VEC (gimple
, heap
) *def_stmts0
= VEC_alloc (gimple
, heap
, group_size
);
325 VEC (gimple
, heap
) *def_stmts1
= VEC_alloc (gimple
, heap
, group_size
);
327 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
328 gimple stmt
= VEC_index (gimple
, stmts
, 0);
329 enum vect_def_type first_stmt_dt0
= vect_uninitialized_def
;
330 enum vect_def_type first_stmt_dt1
= vect_uninitialized_def
;
331 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
332 tree first_stmt_def1_type
= NULL_TREE
, first_stmt_def0_type
= NULL_TREE
;
334 bool stop_recursion
= false, need_same_oprnds
= false;
335 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
336 unsigned int ncopies
;
339 enum machine_mode optab_op2_mode
;
340 enum machine_mode vec_mode
;
341 tree first_stmt_const_oprnd
= NULL_TREE
;
342 struct data_reference
*first_dr
;
343 bool pattern0
= false, pattern1
= false;
345 bool permutation
= false;
346 unsigned int load_place
;
347 gimple first_load
, prev_first_load
= NULL
;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
352 if (vect_print_dump_info (REPORT_SLP
))
354 fprintf (vect_dump
, "Build SLP for ");
355 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
361 if (vect_print_dump_info (REPORT_SLP
))
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
371 lhs
= gimple_get_lhs (stmt
);
372 if (lhs
== NULL_TREE
)
374 if (vect_print_dump_info (REPORT_SLP
))
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
384 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
385 vectype
= get_vectype_for_scalar_type (scalar_type
);
388 if (vect_print_dump_info (REPORT_SLP
))
390 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
396 /* In case of multiple types we need to detect the smallest type. */
397 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
399 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
401 vectorization_factor
= *max_nunits
;
404 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
406 if (is_gimple_call (stmt
))
407 rhs_code
= CALL_EXPR
;
409 rhs_code
= gimple_assign_rhs_code (stmt
);
411 /* Check the operation. */
414 first_stmt_code
= rhs_code
;
416 /* Shift arguments should be equal in all the packed stmts for a
417 vector shift with scalar shift operand. */
418 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
419 || rhs_code
== LROTATE_EXPR
420 || rhs_code
== RROTATE_EXPR
)
422 vec_mode
= TYPE_MODE (vectype
);
424 /* First see if we have a vector/vector shift. */
425 optab
= optab_for_tree_code (rhs_code
, vectype
,
429 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
431 /* No vector/vector shift, try for a vector/scalar shift. */
432 optab
= optab_for_tree_code (rhs_code
, vectype
,
437 if (vect_print_dump_info (REPORT_SLP
))
438 fprintf (vect_dump
, "Build SLP failed: no optab.");
441 icode
= (int) optab_handler (optab
, vec_mode
);
442 if (icode
== CODE_FOR_nothing
)
444 if (vect_print_dump_info (REPORT_SLP
))
445 fprintf (vect_dump
, "Build SLP failed: "
446 "op not supported by target.");
449 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
450 if (!VECTOR_MODE_P (optab_op2_mode
))
452 need_same_oprnds
= true;
453 first_op1
= gimple_assign_rhs2 (stmt
);
460 if (first_stmt_code
!= rhs_code
461 && (first_stmt_code
!= IMAGPART_EXPR
462 || rhs_code
!= REALPART_EXPR
)
463 && (first_stmt_code
!= REALPART_EXPR
464 || rhs_code
!= IMAGPART_EXPR
)
465 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
466 && (first_stmt_code
== ARRAY_REF
467 || first_stmt_code
== INDIRECT_REF
468 || first_stmt_code
== COMPONENT_REF
469 || first_stmt_code
== MEM_REF
)))
471 if (vect_print_dump_info (REPORT_SLP
))
474 "Build SLP failed: different operation in stmt ");
475 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
482 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
484 if (vect_print_dump_info (REPORT_SLP
))
487 "Build SLP failed: different shift arguments in ");
488 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
495 /* Strided store or load. */
496 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
498 if (REFERENCE_CLASS_P (lhs
))
501 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
502 stmt
, &def_stmts0
, &def_stmts1
,
505 &first_stmt_def0_type
,
506 &first_stmt_def1_type
,
507 &first_stmt_const_oprnd
,
509 &pattern0
, &pattern1
))
515 /* FORNOW: Check that there is no gap between the loads. */
516 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
517 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
518 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
519 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
521 if (vect_print_dump_info (REPORT_SLP
))
523 fprintf (vect_dump
, "Build SLP failed: strided "
525 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
531 /* Check that the size of interleaved loads group is not
532 greater than the SLP group size. */
533 if (GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
535 if (vect_print_dump_info (REPORT_SLP
))
537 fprintf (vect_dump
, "Build SLP failed: the number of "
538 "interleaved loads is greater than"
539 " the SLP group size ");
540 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
546 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
549 /* Check that there are no loads from different interleaving
550 chains in the same node. The only exception is complex
552 if (prev_first_load
!= first_load
553 && rhs_code
!= REALPART_EXPR
554 && rhs_code
!= IMAGPART_EXPR
)
556 if (vect_print_dump_info (REPORT_SLP
))
558 fprintf (vect_dump
, "Build SLP failed: different "
559 "interleaving chains in one node ");
560 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
567 prev_first_load
= first_load
;
569 if (first_load
== stmt
)
571 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
572 if (vect_supportable_dr_alignment (first_dr
, false)
573 == dr_unaligned_unsupported
)
575 if (vect_print_dump_info (REPORT_SLP
))
577 fprintf (vect_dump
, "Build SLP failed: unsupported "
579 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
585 /* Analyze costs (for the first stmt in the group). */
586 vect_model_load_cost (vinfo_for_stmt (stmt
),
587 ncopies_for_cost
, false, *node
);
590 /* Store the place of this load in the interleaving chain. In
591 case that permutation is needed we later decide if a specific
592 permutation is supported. */
593 load_place
= vect_get_place_in_interleaving_chain (stmt
,
598 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
600 /* We stop the tree when we reach a group of loads. */
601 stop_recursion
= true;
604 } /* Strided access. */
607 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
609 /* Not strided load. */
610 if (vect_print_dump_info (REPORT_SLP
))
612 fprintf (vect_dump
, "Build SLP failed: not strided load ");
613 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
616 /* FORNOW: Not strided loads are not supported. */
620 /* Not memory operation. */
621 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
622 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
)
624 if (vect_print_dump_info (REPORT_SLP
))
626 fprintf (vect_dump
, "Build SLP failed: operation");
627 fprintf (vect_dump
, " unsupported ");
628 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
634 /* Find the def-stmts. */
635 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
636 &def_stmts0
, &def_stmts1
,
637 &first_stmt_dt0
, &first_stmt_dt1
,
638 &first_stmt_def0_type
,
639 &first_stmt_def1_type
,
640 &first_stmt_const_oprnd
,
642 &pattern0
, &pattern1
))
647 /* Add the costs of the node to the overall instance costs. */
648 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
649 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
651 /* Strided loads were reached - stop the recursion. */
656 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
658 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
663 /* We don't check here complex numbers chains, so we keep them in
664 LOADS for further check in vect_supported_load_permutation_p. */
665 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
666 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
672 /* Create SLP_TREE nodes for the definition node/s. */
673 if (first_stmt_dt0
== vect_internal_def
)
675 slp_tree left_node
= XNEW (struct _slp_tree
);
676 SLP_TREE_SCALAR_STMTS (left_node
) = def_stmts0
;
677 SLP_TREE_VEC_STMTS (left_node
) = NULL
;
678 SLP_TREE_LEFT (left_node
) = NULL
;
679 SLP_TREE_RIGHT (left_node
) = NULL
;
680 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node
) = 0;
681 SLP_TREE_INSIDE_OF_LOOP_COST (left_node
) = 0;
682 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &left_node
, group_size
,
683 inside_cost
, outside_cost
, ncopies_for_cost
,
684 max_nunits
, load_permutation
, loads
,
685 vectorization_factor
))
688 SLP_TREE_LEFT (*node
) = left_node
;
691 if (first_stmt_dt1
== vect_internal_def
)
693 slp_tree right_node
= XNEW (struct _slp_tree
);
694 SLP_TREE_SCALAR_STMTS (right_node
) = def_stmts1
;
695 SLP_TREE_VEC_STMTS (right_node
) = NULL
;
696 SLP_TREE_LEFT (right_node
) = NULL
;
697 SLP_TREE_RIGHT (right_node
) = NULL
;
698 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node
) = 0;
699 SLP_TREE_INSIDE_OF_LOOP_COST (right_node
) = 0;
700 if (!vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &right_node
, group_size
,
701 inside_cost
, outside_cost
, ncopies_for_cost
,
702 max_nunits
, load_permutation
, loads
,
703 vectorization_factor
))
706 SLP_TREE_RIGHT (*node
) = right_node
;
714 vect_print_slp_tree (slp_tree node
)
722 fprintf (vect_dump
, "node ");
723 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
725 fprintf (vect_dump
, "\n\tstmt %d ", i
);
726 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
728 fprintf (vect_dump
, "\n");
730 vect_print_slp_tree (SLP_TREE_LEFT (node
));
731 vect_print_slp_tree (SLP_TREE_RIGHT (node
));
735 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
736 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
737 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
738 stmts in NODE are to be marked. */
741 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
749 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
751 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
753 vect_mark_slp_stmts (SLP_TREE_LEFT (node
), mark
, j
);
754 vect_mark_slp_stmts (SLP_TREE_RIGHT (node
), mark
, j
);
758 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
761 vect_mark_slp_stmts_relevant (slp_tree node
)
765 stmt_vec_info stmt_info
;
770 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
772 stmt_info
= vinfo_for_stmt (stmt
);
773 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
774 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
775 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
778 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node
));
779 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node
));
783 /* Check if the permutation required by the SLP INSTANCE is supported.
784 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
787 vect_supported_slp_permutation_p (slp_instance instance
)
789 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
790 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
791 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
792 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
794 slp_tree
*tmp_loads
= NULL
;
795 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
798 /* FORNOW: The only supported loads permutation is loads from the same
799 location in all the loads in the node, when the data-refs in
800 nodes of LOADS constitute an interleaving chain.
801 Sort the nodes according to the order of accesses in the chain. */
802 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
804 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
805 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
806 i
+= group_size
, j
++)
808 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
809 /* Check that the loads are all in the same interleaving chain. */
810 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
812 if (vect_print_dump_info (REPORT_DETAILS
))
814 fprintf (vect_dump
, "Build SLP failed: unsupported data "
816 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
823 tmp_loads
[index
] = load
;
826 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
827 for (i
= 0; i
< group_size
; i
++)
828 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
830 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
831 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
834 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
835 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
843 /* Rearrange the statements of NODE according to PERMUTATION. */
846 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
847 VEC (int, heap
) *permutation
)
850 VEC (gimple
, heap
) *tmp_stmts
;
851 unsigned int index
, i
;
856 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node
), group_size
, permutation
);
857 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node
), group_size
, permutation
);
859 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
860 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
862 for (i
= 0; i
< group_size
; i
++)
863 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
865 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
867 index
= VEC_index (int, permutation
, i
);
868 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
871 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
872 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
876 /* Check if the required load permutation is supported.
877 LOAD_PERMUTATION contains a list of indices of the loads.
878 In SLP this permutation is relative to the order of strided stores that are
879 the base of the SLP instance. */
882 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
883 VEC (int, heap
) *load_permutation
)
885 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
886 bool supported
, bad_permutation
= false;
888 slp_tree node
, other_complex_node
;
889 gimple stmt
, first
= NULL
, other_node_first
;
890 unsigned complex_numbers
= 0;
892 /* FORNOW: permutations are only supported in SLP. */
896 if (vect_print_dump_info (REPORT_SLP
))
898 fprintf (vect_dump
, "Load permutation ");
899 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
900 fprintf (vect_dump
, "%d ", next
);
903 /* In case of reduction every load permutation is allowed, since the order
904 of the reduction statements is not important (as opposed to the case of
905 strided stores). The only condition we need to check is that all the
906 load nodes are of the same size and have the same permutation (and then
907 rearrange all the nodes of the SLP instance according to this
910 /* Check that all the load nodes are of the same size. */
911 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
913 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
914 != (unsigned) group_size
)
917 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
918 if (is_gimple_assign (stmt
)
919 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
920 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
924 /* Complex operands can be swapped as following:
925 real_c = real_b + real_a;
926 imag_c = imag_a + imag_b;
927 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
928 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
929 chains are mixed, they match the above pattern. */
932 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
934 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
940 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
942 if (complex_numbers
!= 2)
950 other_complex_node
= VEC_index (slp_tree
,
951 SLP_INSTANCE_LOADS (slp_instn
), k
);
952 other_node_first
= VEC_index (gimple
,
953 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
955 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
964 /* We checked that this case ok, so there is no need to proceed with
965 permutation tests. */
966 if (complex_numbers
== 2)
968 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
969 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
973 node
= SLP_INSTANCE_TREE (slp_instn
);
974 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
975 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
976 instance, not all the loads belong to the same node or interleaving
977 group. Hence, we need to divide them into groups according to
979 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
981 /* Reduction (there are no data-refs in the root).
982 In reduction chain the order of the loads is important. */
983 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
984 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
986 int first_group_load_index
;
988 /* Compare all the permutation sequences to the first one. */
989 for (i
= 1; i
< number_of_groups
; i
++)
992 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
994 next
= VEC_index (int, load_permutation
, j
);
995 first_group_load_index
= VEC_index (int, load_permutation
, k
);
997 if (next
!= first_group_load_index
)
999 bad_permutation
= true;
1006 if (bad_permutation
)
1010 if (!bad_permutation
)
1012 /* Check that the loads in the first sequence are different and there
1013 are no gaps between them. */
1014 load_index
= sbitmap_alloc (group_size
);
1015 sbitmap_zero (load_index
);
1016 for (k
= 0; k
< group_size
; k
++)
1018 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1019 if (TEST_BIT (load_index
, first_group_load_index
))
1021 bad_permutation
= true;
1025 SET_BIT (load_index
, first_group_load_index
);
1028 if (!bad_permutation
)
1029 for (k
= 0; k
< group_size
; k
++)
1030 if (!TEST_BIT (load_index
, k
))
1032 bad_permutation
= true;
1036 sbitmap_free (load_index
);
1039 if (!bad_permutation
)
1041 /* This permutation is valid for reduction. Since the order of the
1042 statements in the nodes is not important unless they are memory
1043 accesses, we can rearrange the statements in all the nodes
1044 according to the order of the loads. */
1045 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1047 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1052 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1053 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1054 well (unless it's reduction). */
1055 if (VEC_length (int, load_permutation
)
1056 != (unsigned int) (group_size
* group_size
))
1060 load_index
= sbitmap_alloc (group_size
);
1061 sbitmap_zero (load_index
);
1062 for (j
= 0; j
< group_size
; j
++)
1064 for (i
= j
* group_size
, k
= 0;
1065 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1068 if (i
!= j
* group_size
&& next
!= prev
)
1077 if (TEST_BIT (load_index
, prev
))
1083 SET_BIT (load_index
, prev
);
1086 for (j
= 0; j
< group_size
; j
++)
1087 if (!TEST_BIT (load_index
, j
))
1090 sbitmap_free (load_index
);
1092 if (supported
&& i
== group_size
* group_size
1093 && vect_supported_slp_permutation_p (slp_instn
))
1100 /* Find the first load in the loop that belongs to INSTANCE.
1101 When loads are in several SLP nodes, there can be a case in which the first
1102 load does not appear in the first SLP node to be transformed, causing
1103 incorrect order of statements. Since we generate all the loads together,
1104 they must be inserted before the first load of the SLP instance and not
1105 before the first load of the first node of the instance. */
1108 vect_find_first_load_in_slp_instance (slp_instance instance
)
1112 gimple first_load
= NULL
, load
;
1114 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1115 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1116 first_load
= get_earlier_stmt (load
, first_load
);
1122 /* Find the last store in SLP INSTANCE. */
1125 vect_find_last_store_in_slp_instance (slp_instance instance
)
1129 gimple last_store
= NULL
, store
;
1131 node
= SLP_INSTANCE_TREE (instance
);
1133 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1135 last_store
= get_later_stmt (store
, last_store
);
1141 /* Analyze an SLP instance starting from a group of strided stores. Call
1142 vect_build_slp_tree to build a tree of packed stmts if possible.
1143 Return FALSE if it's impossible to SLP any stmt in the loop. */
1146 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1149 slp_instance new_instance
;
1150 slp_tree node
= XNEW (struct _slp_tree
);
1151 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1152 unsigned int unrolling_factor
= 1, nunits
;
1153 tree vectype
, scalar_type
= NULL_TREE
;
1155 unsigned int vectorization_factor
= 0;
1156 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1157 unsigned int max_nunits
= 0;
1158 VEC (int, heap
) *load_permutation
;
1159 VEC (slp_tree
, heap
) *loads
;
1160 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1162 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1166 scalar_type
= TREE_TYPE (DR_REF (dr
));
1167 vectype
= get_vectype_for_scalar_type (scalar_type
);
1171 gcc_assert (loop_vinfo
);
1172 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1175 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1179 gcc_assert (loop_vinfo
);
1180 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1181 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1186 if (vect_print_dump_info (REPORT_SLP
))
1188 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1189 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1195 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1197 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1199 vectorization_factor
= nunits
;
1201 /* Calculate the unrolling factor. */
1202 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1203 if (unrolling_factor
!= 1 && !loop_vinfo
)
1205 if (vect_print_dump_info (REPORT_SLP
))
1206 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1212 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1213 SLP_TREE_SCALAR_STMTS (node
) = VEC_alloc (gimple
, heap
, group_size
);
1215 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1217 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1220 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1221 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1226 /* Collect reduction statements. */
1227 for (i
= 0; VEC_iterate (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
,
1230 VEC_safe_push (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
), next
);
1233 SLP_TREE_VEC_STMTS (node
) = NULL
;
1234 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
1235 SLP_TREE_LEFT (node
) = NULL
;
1236 SLP_TREE_RIGHT (node
) = NULL
;
1237 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
1238 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
1240 /* Calculate the number of vector stmts to create based on the unrolling
1241 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1242 GROUP_SIZE / NUNITS otherwise. */
1243 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1245 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1246 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1248 /* Build the tree for the SLP instance. */
1249 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1250 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1251 &max_nunits
, &load_permutation
, &loads
,
1252 vectorization_factor
))
1254 /* Calculate the unrolling factor based on the smallest type. */
1255 if (max_nunits
> nunits
)
1256 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1259 if (unrolling_factor
!= 1 && !loop_vinfo
)
1261 if (vect_print_dump_info (REPORT_SLP
))
1262 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1267 /* Create a new SLP instance. */
1268 new_instance
= XNEW (struct _slp_instance
);
1269 SLP_INSTANCE_TREE (new_instance
) = node
;
1270 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1271 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1272 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1273 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1274 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1275 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1276 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1277 if (VEC_length (slp_tree
, loads
))
1279 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1282 if (vect_print_dump_info (REPORT_SLP
))
1284 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1286 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1289 vect_free_slp_instance (new_instance
);
1293 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1294 = vect_find_first_load_in_slp_instance (new_instance
);
1297 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1300 VEC_safe_push (slp_instance
, heap
,
1301 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1304 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1307 if (vect_print_dump_info (REPORT_SLP
))
1308 vect_print_slp_tree (node
);
1313 /* Failed to SLP. */
1314 /* Free the allocated memory. */
1315 vect_free_slp_tree (node
);
1316 VEC_free (int, heap
, load_permutation
);
1317 VEC_free (slp_tree
, heap
, loads
);
1323 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1324 trees of packed scalar stmts if SLP is possible. */
1327 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1330 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1331 gimple first_element
;
1334 if (vect_print_dump_info (REPORT_SLP
))
1335 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1339 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1340 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1341 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1344 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1346 /* Find SLP sequences starting from groups of strided stores. */
1347 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1348 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1351 if (bb_vinfo
&& !ok
)
1353 if (vect_print_dump_info (REPORT_SLP
))
1354 fprintf (vect_dump
, "Failed to SLP the basic block.");
1360 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1362 /* Find SLP sequences starting from reduction chains. */
1363 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1364 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1369 /* Don't try to vectorize SLP reductions if reduction chain was
1374 /* Find SLP sequences starting from groups of reductions. */
1375 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1376 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1377 VEC_index (gimple
, reductions
, 0)))
1384 /* For each possible SLP instance decide whether to SLP it and calculate overall
1385 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1386 least one instance. */
1389 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1391 unsigned int i
, unrolling_factor
= 1;
1392 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1393 slp_instance instance
;
1394 int decided_to_slp
= 0;
1396 if (vect_print_dump_info (REPORT_SLP
))
1397 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1399 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1401 /* FORNOW: SLP if you can. */
1402 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1403 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1405 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1406 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1407 loop-based vectorization. Such stmts will be marked as HYBRID. */
1408 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1412 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1414 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1415 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1416 decided_to_slp
, unrolling_factor
);
1418 return (decided_to_slp
> 0);
1422 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1423 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1426 vect_detect_hybrid_slp_stmts (slp_tree node
)
1430 imm_use_iterator imm_iter
;
1432 stmt_vec_info stmt_vinfo
;
1437 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1438 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1439 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1440 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1441 if ((stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1442 && !STMT_SLP_TYPE (stmt_vinfo
)
1443 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1444 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1445 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1446 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt
))
1447 == vect_reduction_def
))
1448 vect_mark_slp_stmts (node
, hybrid
, i
);
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node
));
1451 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node
));
1455 /* Find stmts that must be both vectorized and SLPed. */
1458 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1461 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1462 slp_instance instance
;
1464 if (vect_print_dump_info (REPORT_SLP
))
1465 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1467 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1468 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1472 /* Create and initialize a new bb_vec_info struct for BB, as well as
1473 stmt_vec_info structs for all the stmts in it. */
1476 new_bb_vec_info (basic_block bb
)
1478 bb_vec_info res
= NULL
;
1479 gimple_stmt_iterator gsi
;
1481 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1482 BB_VINFO_BB (res
) = bb
;
1484 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1486 gimple stmt
= gsi_stmt (gsi
);
1487 gimple_set_uid (stmt
, 0);
1488 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1491 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1492 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1499 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1500 stmts in the basic block. */
1503 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1506 gimple_stmt_iterator si
;
1511 bb
= BB_VINFO_BB (bb_vinfo
);
1513 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1515 gimple stmt
= gsi_stmt (si
);
1516 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1519 /* Free stmt_vec_info. */
1520 free_stmt_vec_info (stmt
);
1523 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1524 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1525 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1526 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1532 /* Analyze statements contained in SLP tree node after recursively analyzing
1533 the subtree. Return TRUE if the operations are supported. */
1536 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1545 if (!vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_LEFT (node
))
1546 || !vect_slp_analyze_node_operations (bb_vinfo
, SLP_TREE_RIGHT (node
)))
1549 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1551 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1552 gcc_assert (stmt_info
);
1553 gcc_assert (PURE_SLP_STMT (stmt_info
));
1555 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1563 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1564 operations are supported. */
1567 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1569 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1570 slp_instance instance
;
1573 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1575 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1576 SLP_INSTANCE_TREE (instance
)))
1578 vect_free_slp_instance (instance
);
1579 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1585 if (!VEC_length (slp_instance
, slp_instances
))
1591 /* Check if loads and stores are mixed in the basic block (in that
1592 case if we are not sure that the accesses differ, we can't vectorize the
1593 basic block). Also return FALSE in case that there is statement marked as
1594 not vectorizable. */
1597 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo
)
1599 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1600 gimple_stmt_iterator si
;
1601 bool detected_store
= false;
1603 struct data_reference
*dr
;
1605 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1607 stmt
= gsi_stmt (si
);
1609 /* We can't allow not analyzed statements, since they may contain data
1611 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
1614 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
)))
1617 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1618 if (DR_IS_READ (dr
) && detected_store
)
1621 if (!DR_IS_READ (dr
))
1622 detected_store
= true;
1628 /* Check if vectorization of the basic block is profitable. */
1631 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1633 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1634 slp_instance instance
;
1636 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1637 unsigned int stmt_cost
;
1639 gimple_stmt_iterator si
;
1640 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1641 stmt_vec_info stmt_info
= NULL
;
1642 tree dummy_type
= NULL
;
1645 /* Calculate vector costs. */
1646 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1648 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1649 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1652 /* Calculate scalar cost. */
1653 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1655 stmt
= gsi_stmt (si
);
1656 stmt_info
= vinfo_for_stmt (stmt
);
1658 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1659 || !PURE_SLP_STMT (stmt_info
))
1662 if (STMT_VINFO_DATA_REF (stmt_info
))
1664 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1665 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1666 (scalar_load
, dummy_type
, dummy
);
1668 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1669 (scalar_store
, dummy_type
, dummy
);
1672 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1673 (scalar_stmt
, dummy_type
, dummy
);
1675 scalar_cost
+= stmt_cost
;
1678 if (vect_print_dump_info (REPORT_COST
))
1680 fprintf (vect_dump
, "Cost model analysis: \n");
1681 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1683 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1685 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1688 /* Vectorization is profitable if its cost is less than the cost of scalar
1690 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1696 /* Check if the basic block can be vectorized. */
1699 vect_slp_analyze_bb_1 (basic_block bb
)
1701 bb_vec_info bb_vinfo
;
1702 VEC (ddr_p
, heap
) *ddrs
;
1703 VEC (slp_instance
, heap
) *slp_instances
;
1704 slp_instance instance
;
1707 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1708 bool data_dependence_in_bb
= false;
1710 bb_vinfo
= new_bb_vec_info (bb
);
1714 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1716 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1717 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1720 destroy_bb_vec_info (bb_vinfo
);
1724 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
1725 if (!VEC_length (ddr_p
, ddrs
))
1727 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1728 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
1731 destroy_bb_vec_info (bb_vinfo
);
1735 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
,
1736 &data_dependence_in_bb
)
1738 || (data_dependence_in_bb
1739 && !vect_bb_vectorizable_with_dependencies (bb_vinfo
)))
1741 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1742 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
1743 "in basic block.\n");
1745 destroy_bb_vec_info (bb_vinfo
);
1749 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
1751 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1752 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
1755 destroy_bb_vec_info (bb_vinfo
);
1759 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
1761 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1762 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
1765 destroy_bb_vec_info (bb_vinfo
);
1769 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
1771 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1772 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
1775 destroy_bb_vec_info (bb_vinfo
);
1779 /* Check the SLP opportunities in the basic block, analyze and build SLP
1781 if (!vect_analyze_slp (NULL
, bb_vinfo
))
1783 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1784 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
1785 "in basic block.\n");
1787 destroy_bb_vec_info (bb_vinfo
);
1791 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1793 /* Mark all the statements that we want to vectorize as pure SLP and
1795 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1797 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1798 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
1801 if (!vect_slp_analyze_operations (bb_vinfo
))
1803 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1804 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
1806 destroy_bb_vec_info (bb_vinfo
);
1810 /* Cost model: check if the vectorization is worthwhile. */
1811 if (flag_vect_cost_model
1812 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
1814 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1815 fprintf (vect_dump
, "not vectorized: vectorization is not "
1818 destroy_bb_vec_info (bb_vinfo
);
1822 if (vect_print_dump_info (REPORT_DETAILS
))
1823 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
1830 vect_slp_analyze_bb (basic_block bb
)
1832 bb_vec_info bb_vinfo
;
1834 gimple_stmt_iterator gsi
;
1835 unsigned int vector_sizes
;
1837 if (vect_print_dump_info (REPORT_DETAILS
))
1838 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
1840 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1842 gimple stmt
= gsi_stmt (gsi
);
1843 if (!is_gimple_debug (stmt
)
1844 && !gimple_nop_p (stmt
)
1845 && gimple_code (stmt
) != GIMPLE_LABEL
)
1849 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
1851 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1852 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
1858 /* Autodetect first vector size we try. */
1859 current_vector_size
= 0;
1860 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
1864 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
1868 destroy_bb_vec_info (bb_vinfo
);
1870 vector_sizes
&= ~current_vector_size
;
1871 if (vector_sizes
== 0
1872 || current_vector_size
== 0)
1875 /* Try the next biggest vector size. */
1876 current_vector_size
= 1 << floor_log2 (vector_sizes
);
1877 if (vect_print_dump_info (REPORT_DETAILS
))
1878 fprintf (vect_dump
, "***** Re-trying analysis with "
1879 "vector size %d\n", current_vector_size
);
1884 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1885 the number of created vector stmts depends on the unrolling factor).
1886 However, the actual number of vector stmts for every SLP node depends on
1887 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1888 should be updated. In this function we assume that the inside costs
1889 calculated in vect_model_xxx_cost are linear in ncopies. */
1892 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
1894 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1895 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1896 slp_instance instance
;
1898 if (vect_print_dump_info (REPORT_SLP
))
1899 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
1901 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1902 /* We assume that costs are linear in ncopies. */
1903 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
1904 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1908 /* For constant and loop invariant defs of SLP_NODE this function returns
1909 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1910 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1911 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1912 REDUC_INDEX is the index of the reduction operand in the statements, unless
1916 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
1917 VEC (tree
, heap
) **vec_oprnds
,
1918 unsigned int op_num
, unsigned int number_of_vectors
,
1921 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
1922 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1923 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1927 int j
, number_of_places_left_in_vector
;
1930 int group_size
= VEC_length (gimple
, stmts
);
1931 unsigned int vec_num
, i
;
1932 int number_of_copies
= 1;
1933 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
1934 bool constant_p
, is_store
;
1935 tree neutral_op
= NULL
;
1936 enum tree_code code
= gimple_assign_rhs_code (stmt
);
1940 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
1941 && reduc_index
!= -1)
1943 op_num
= reduc_index
- 1;
1944 op
= gimple_op (stmt
, reduc_index
);
1945 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1946 we need either neutral operands or the original operands. See
1947 get_initial_def_for_reduction() for details. */
1950 case WIDEN_SUM_EXPR
:
1956 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1957 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
1959 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
1964 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
1965 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
1967 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
1972 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
1977 def_stmt
= SSA_NAME_DEF_STMT (op
);
1978 loop
= (gimple_bb (stmt
))->loop_father
;
1979 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
1980 loop_preheader_edge (loop
));
1988 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
1991 op
= gimple_assign_rhs1 (stmt
);
1998 if (CONSTANT_CLASS_P (op
))
2003 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2004 gcc_assert (vector_type
);
2005 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2007 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2008 created vectors. It is greater than 1 if unrolling is performed.
2010 For example, we have two scalar operands, s1 and s2 (e.g., group of
2011 strided accesses of size two), while NUNITS is four (i.e., four scalars
2012 of this type can be packed in a vector). The output vector will contain
2013 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2016 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2017 containing the operands.
2019 For example, NUNITS is four as before, and the group size is 8
2020 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2021 {s5, s6, s7, s8}. */
2023 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2025 number_of_places_left_in_vector
= nunits
;
2026 for (j
= 0; j
< number_of_copies
; j
++)
2028 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
2031 op
= gimple_assign_rhs1 (stmt
);
2033 op
= gimple_op (stmt
, op_num
+ 1);
2035 if (reduc_index
!= -1)
2037 loop
= (gimple_bb (stmt
))->loop_father
;
2038 def_stmt
= SSA_NAME_DEF_STMT (op
);
2042 /* Get the def before the loop. In reduction chain we have only
2043 one initial value. */
2044 if ((j
!= (number_of_copies
- 1)
2045 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2050 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2051 loop_preheader_edge (loop
));
2054 /* Create 'vect_ = {op0,op1,...,opn}'. */
2055 t
= tree_cons (NULL_TREE
, op
, t
);
2057 number_of_places_left_in_vector
--;
2059 if (number_of_places_left_in_vector
== 0)
2061 number_of_places_left_in_vector
= nunits
;
2064 vec_cst
= build_vector (vector_type
, t
);
2066 vec_cst
= build_constructor_from_list (vector_type
, t
);
2067 VEC_quick_push (tree
, voprnds
,
2068 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
2074 /* Since the vectors are created in the reverse order, we should invert
2076 vec_num
= VEC_length (tree
, voprnds
);
2077 for (j
= vec_num
- 1; j
>= 0; j
--)
2079 vop
= VEC_index (tree
, voprnds
, j
);
2080 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2083 VEC_free (tree
, heap
, voprnds
);
2085 /* In case that VF is greater than the unrolling factor needed for the SLP
2086 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2087 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2088 to replicate the vectors. */
2089 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2091 tree neutral_vec
= NULL
;
2096 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2098 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2102 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2103 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2109 /* Get vectorized definitions from SLP_NODE that contains corresponding
2110 vectorized def-stmts. */
2113 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2116 gimple vec_def_stmt
;
2119 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2121 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2123 gcc_assert (vec_def_stmt
);
2124 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2125 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2130 /* Get vectorized definitions for SLP_NODE.
2131 If the scalar definitions are loop invariants or constants, collect them and
2132 call vect_get_constant_vectors() to create vector stmts.
2133 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2134 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2135 vect_get_slp_vect_defs() to retrieve them.
2136 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2137 the right node. This is used when the second operand must remain scalar. */
2140 vect_get_slp_defs (tree op0
, tree op1
, slp_tree slp_node
,
2141 VEC (tree
,heap
) **vec_oprnds0
,
2142 VEC (tree
,heap
) **vec_oprnds1
, int reduc_index
)
2145 enum tree_code code
;
2146 int number_of_vects
;
2147 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2149 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2150 /* The number of vector defs is determined by the number of vector statements
2151 in the node from which we get those statements. */
2152 if (SLP_TREE_LEFT (slp_node
))
2153 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node
));
2156 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2157 /* Number of vector stmts was calculated according to LHS in
2158 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2159 necessary. See vect_get_smallest_scalar_type () for details. */
2160 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2162 if (rhs_size_unit
!= lhs_size_unit
)
2164 number_of_vects
*= rhs_size_unit
;
2165 number_of_vects
/= lhs_size_unit
;
2169 /* Allocate memory for vectorized defs. */
2170 *vec_oprnds0
= VEC_alloc (tree
, heap
, number_of_vects
);
2172 /* SLP_NODE corresponds either to a group of stores or to a group of
2173 unary/binary operations. We don't call this function for loads.
2174 For reduction defs we call vect_get_constant_vectors(), since we are
2175 looking for initial loop invariant values. */
2176 if (SLP_TREE_LEFT (slp_node
) && reduc_index
== -1)
2177 /* The defs are already vectorized. */
2178 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node
), vec_oprnds0
);
2180 /* Build vectors from scalar defs. */
2181 vect_get_constant_vectors (op0
, slp_node
, vec_oprnds0
, 0, number_of_vects
,
2184 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
)))
2185 /* Since we don't call this function with loads, this is a group of
2189 /* For reductions, we only need initial values. */
2190 if (reduc_index
!= -1)
2193 code
= gimple_assign_rhs_code (first_stmt
);
2194 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
|| !vec_oprnds1
|| !op1
)
2197 /* The number of vector defs is determined by the number of vector statements
2198 in the node from which we get those statements. */
2199 if (SLP_TREE_RIGHT (slp_node
))
2200 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node
));
2202 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2204 *vec_oprnds1
= VEC_alloc (tree
, heap
, number_of_vects
);
2206 if (SLP_TREE_RIGHT (slp_node
))
2207 /* The defs are already vectorized. */
2208 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node
), vec_oprnds1
);
2210 /* Build vectors from scalar defs. */
2211 vect_get_constant_vectors (op1
, slp_node
, vec_oprnds1
, 1, number_of_vects
,
2216 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2217 building a vector of type MASK_TYPE from it) and two input vectors placed in
2218 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2219 shifting by STRIDE elements of DR_CHAIN for every copy.
2220 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2222 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2223 the created stmts must be inserted. */
2226 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2227 tree mask
, int first_vec_indx
, int second_vec_indx
,
2228 gimple_stmt_iterator
*gsi
, slp_tree node
,
2229 tree builtin_decl
, tree vectype
,
2230 VEC(tree
,heap
) *dr_chain
,
2231 int ncopies
, int vect_stmts_counter
)
2234 gimple perm_stmt
= NULL
;
2235 stmt_vec_info next_stmt_info
;
2237 tree first_vec
, second_vec
, data_ref
;
2239 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2241 /* Initialize the vect stmts of NODE to properly insert the generated
2243 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2244 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2245 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2247 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2248 for (i
= 0; i
< ncopies
; i
++)
2250 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2251 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2253 /* Generate the permute statement. */
2254 perm_stmt
= gimple_build_call (builtin_decl
,
2255 3, first_vec
, second_vec
, mask
);
2256 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2257 gimple_call_set_lhs (perm_stmt
, data_ref
);
2258 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2260 /* Store the vector statement in NODE. */
2261 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2262 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2264 first_vec_indx
+= stride
;
2265 second_vec_indx
+= stride
;
2268 /* Mark the scalar stmt as vectorized. */
2269 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2270 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2274 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2275 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2276 representation. Check that the mask is valid and return FALSE if not.
2277 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2278 the next vector, i.e., the current first vector is not needed. */
2281 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2282 int mask_nunits
, bool only_one_vec
, int index
,
2283 int *mask
, int *current_mask_element
,
2284 bool *need_next_vector
, int *number_of_mask_fixes
,
2285 bool *mask_fixed
, bool *needs_first_vector
)
2289 /* Convert to target specific representation. */
2290 *current_mask_element
= first_mask_element
+ m
;
2291 /* Adjust the value in case it's a mask for second and third vectors. */
2292 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2294 if (*current_mask_element
< mask_nunits
)
2295 *needs_first_vector
= true;
2297 /* We have only one input vector to permute but the mask accesses values in
2298 the next vector as well. */
2299 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2301 if (vect_print_dump_info (REPORT_DETAILS
))
2303 fprintf (vect_dump
, "permutation requires at least two vectors ");
2304 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2310 /* The mask requires the next vector. */
2311 if (*current_mask_element
>= mask_nunits
* 2)
2313 if (*needs_first_vector
|| *mask_fixed
)
2315 /* We either need the first vector too or have already moved to the
2316 next vector. In both cases, this permutation needs three
2318 if (vect_print_dump_info (REPORT_DETAILS
))
2320 fprintf (vect_dump
, "permutation requires at "
2321 "least three vectors ");
2322 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2328 /* We move to the next vector, dropping the first one and working with
2329 the second and the third - we need to adjust the values of the mask
2331 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2333 for (i
= 0; i
< index
; i
++)
2334 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2336 (*number_of_mask_fixes
)++;
2340 *need_next_vector
= *mask_fixed
;
2342 /* This was the last element of this mask. Start a new one. */
2343 if (index
== mask_nunits
- 1)
2345 *number_of_mask_fixes
= 1;
2346 *mask_fixed
= false;
2347 *needs_first_vector
= false;
2354 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2355 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2356 permute statements for SLP_NODE_INSTANCE. */
2358 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2359 gimple_stmt_iterator
*gsi
, int vf
,
2360 slp_instance slp_node_instance
, bool analyze_only
)
2362 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2363 tree mask_element_type
= NULL_TREE
, mask_type
;
2364 int i
, j
, k
, m
, scale
, mask_nunits
, nunits
, vec_index
= 0, scalar_index
;
2366 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), builtin_decl
;
2367 gimple next_scalar_stmt
;
2368 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2369 int first_mask_element
;
2370 int index
, unroll_factor
, *mask
, current_mask_element
, ncopies
;
2371 bool only_one_vec
= false, need_next_vector
= false;
2372 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2373 int number_of_mask_fixes
= 1;
2374 bool mask_fixed
= false;
2375 bool needs_first_vector
= false;
2377 if (!targetm
.vectorize
.builtin_vec_perm
)
2379 if (vect_print_dump_info (REPORT_DETAILS
))
2381 fprintf (vect_dump
, "no builtin for vect permute for ");
2382 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2388 builtin_decl
= targetm
.vectorize
.builtin_vec_perm (vectype
,
2389 &mask_element_type
);
2390 if (!builtin_decl
|| !mask_element_type
)
2392 if (vect_print_dump_info (REPORT_DETAILS
))
2394 fprintf (vect_dump
, "no builtin for vect permute for ");
2395 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2401 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2402 mask_nunits
= TYPE_VECTOR_SUBPARTS (mask_type
);
2403 mask
= (int *) xmalloc (sizeof (int) * mask_nunits
);
2404 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2405 scale
= mask_nunits
/ nunits
;
2406 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2408 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2409 unrolling factor. */
2410 orig_vec_stmts_num
= group_size
*
2411 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2412 if (orig_vec_stmts_num
== 1)
2413 only_one_vec
= true;
2415 /* Number of copies is determined by the final vectorization factor
2416 relatively to SLP_NODE_INSTANCE unrolling factor. */
2417 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2419 /* Generate permutation masks for every NODE. Number of masks for each NODE
2420 is equal to GROUP_SIZE.
2421 E.g., we have a group of three nodes with three loads from the same
2422 location in each node, and the vector size is 4. I.e., we have a
2423 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2424 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2425 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2428 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2429 scpecific type, e.g., in bytes for Altivec.
2430 The last mask is illegal since we assume two operands for permute
2431 operation, and the mask element values can't be outside that range.
2432 Hence, the last mask must be converted into {2,5,5,5}.
2433 For the first two permutations we need the first and the second input
2434 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2435 we need the second and the third vectors: {b1,c1,a2,b2} and
2438 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2442 vect_stmts_counter
= 0;
2444 first_vec_index
= vec_index
++;
2446 second_vec_index
= first_vec_index
;
2448 second_vec_index
= vec_index
++;
2450 for (j
= 0; j
< unroll_factor
; j
++)
2452 for (k
= 0; k
< group_size
; k
++)
2454 first_mask_element
= (i
+ j
* group_size
) * scale
;
2455 for (m
= 0; m
< scale
; m
++)
2457 if (!vect_get_mask_element (stmt
, first_mask_element
, m
,
2458 mask_nunits
, only_one_vec
, index
, mask
,
2459 ¤t_mask_element
, &need_next_vector
,
2460 &number_of_mask_fixes
, &mask_fixed
,
2461 &needs_first_vector
))
2464 mask
[index
++] = current_mask_element
;
2467 if (index
== mask_nunits
)
2469 tree mask_vec
= NULL
;
2471 while (--index
>= 0)
2473 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2474 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2476 mask_vec
= build_vector (mask_type
, mask_vec
);
2479 if (!targetm
.vectorize
.builtin_vec_perm_ok (vectype
,
2482 if (vect_print_dump_info (REPORT_DETAILS
))
2484 fprintf (vect_dump
, "unsupported vect permute ");
2485 print_generic_expr (vect_dump
, mask_vec
, 0);
2493 if (need_next_vector
)
2495 first_vec_index
= second_vec_index
;
2496 second_vec_index
= vec_index
;
2499 next_scalar_stmt
= VEC_index (gimple
,
2500 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2502 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2503 mask_vec
, first_vec_index
, second_vec_index
,
2504 gsi
, node
, builtin_decl
, vectype
, dr_chain
,
2505 ncopies
, vect_stmts_counter
++);
2518 /* Vectorize SLP instance tree in postorder. */
2521 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2522 unsigned int vectorization_factor
)
2525 bool strided_store
, is_store
;
2526 gimple_stmt_iterator si
;
2527 stmt_vec_info stmt_info
;
2528 unsigned int vec_stmts_size
, nunits
, group_size
;
2531 slp_tree loads_node
;
2536 vect_schedule_slp_instance (SLP_TREE_LEFT (node
), instance
,
2537 vectorization_factor
);
2538 vect_schedule_slp_instance (SLP_TREE_RIGHT (node
), instance
,
2539 vectorization_factor
);
2541 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2542 stmt_info
= vinfo_for_stmt (stmt
);
2544 /* VECTYPE is the type of the destination. */
2545 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2546 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2547 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2549 /* For each SLP instance calculate number of vector stmts to be created
2550 for the scalar stmts in each node of the SLP tree. Number of vector
2551 elements in one vector iteration is the number of scalar elements in
2552 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2554 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2556 /* In case of load permutation we have to allocate vectorized statements for
2557 all the nodes that participate in that permutation. */
2558 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2560 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2562 if (!SLP_TREE_VEC_STMTS (loads_node
))
2564 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2566 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2571 if (!SLP_TREE_VEC_STMTS (node
))
2573 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2574 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2577 if (vect_print_dump_info (REPORT_DETAILS
))
2579 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2580 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2583 /* Loads should be inserted before the first load. */
2584 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2585 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2586 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2587 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2588 else if (is_pattern_stmt_p (stmt_info
))
2589 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2591 si
= gsi_for_stmt (stmt
);
2593 /* Stores should be inserted just before the last store. */
2594 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2595 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2597 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2598 si
= gsi_for_stmt (last_store
);
2601 /* Mark the first element of the reduction chain as reduction to properly
2602 transform the node. In the analysis phase only the last element of the
2603 chain is marked as reduction. */
2604 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2605 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2607 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2608 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2611 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2616 /* Generate vector code for all SLP instances in the loop/basic block. */
2619 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2621 VEC (slp_instance
, heap
) *slp_instances
;
2622 slp_instance instance
;
2624 bool is_store
= false;
2628 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2629 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2633 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2637 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2639 /* Schedule the tree of INSTANCE. */
2640 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2642 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2643 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2644 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2647 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2649 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2652 gimple_stmt_iterator gsi
;
2654 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
2655 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
2657 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
2660 /* Free the attached stmt_vec_info and remove the stmt. */
2661 gsi
= gsi_for_stmt (store
);
2662 gsi_remove (&gsi
, true);
2663 free_stmt_vec_info (store
);
2671 /* Vectorize the basic block. */
2674 vect_slp_transform_bb (basic_block bb
)
2676 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
2677 gimple_stmt_iterator si
;
2679 gcc_assert (bb_vinfo
);
2681 if (vect_print_dump_info (REPORT_DETAILS
))
2682 fprintf (vect_dump
, "SLPing BB\n");
2684 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2686 gimple stmt
= gsi_stmt (si
);
2687 stmt_vec_info stmt_info
;
2689 if (vect_print_dump_info (REPORT_DETAILS
))
2691 fprintf (vect_dump
, "------>SLPing statement: ");
2692 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2695 stmt_info
= vinfo_for_stmt (stmt
);
2696 gcc_assert (stmt_info
);
2698 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2699 if (STMT_SLP_TYPE (stmt_info
))
2701 vect_schedule_slp (NULL
, bb_vinfo
);
2706 mark_sym_for_renaming (gimple_vop (cfun
));
2707 /* The memory tags and pointers in vectorized statements need to
2708 have their SSA forms updated. FIXME, why can't this be delayed
2709 until all the loops have been transformed? */
2710 update_ssa (TODO_update_ssa
);
2712 if (vect_print_dump_info (REPORT_DETAILS
))
2713 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
2715 destroy_bb_vec_info (bb_vinfo
);