1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
36 #include "recog.h" /* FIXME: for insn_data */
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
45 find_bb_location (basic_block bb
)
48 gimple_stmt_iterator si
;
53 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
56 if (gimple_location (stmt
) != UNKNOWN_LOC
)
57 return gimple_location (stmt
);
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 vect_free_slp_tree (slp_tree node
)
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
76 vect_free_slp_tree ((slp_tree
) child
);
78 SLP_TREE_CHILDREN (node
).release ();
79 SLP_TREE_SCALAR_STMTS (node
).release ();
80 SLP_TREE_VEC_STMTS (node
).release ();
86 /* Free the memory allocated for the SLP instance. */
89 vect_free_slp_instance (slp_instance instance
)
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance
).release ();
93 SLP_INSTANCE_LOADS (instance
).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance
).release ();
99 /* Create an SLP node for SCALAR_STMTS. */
102 vect_create_new_slp_node (vec
<gimple
> scalar_stmts
)
105 gimple stmt
= scalar_stmts
[0];
108 if (is_gimple_call (stmt
))
109 nops
= gimple_call_num_args (stmt
);
110 else if (is_gimple_assign (stmt
))
112 nops
= gimple_num_ops (stmt
) - 1;
113 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
119 node
= XNEW (struct _slp_tree
);
120 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
121 SLP_TREE_VEC_STMTS (node
).create (0);
122 SLP_TREE_CHILDREN (node
).create (nops
);
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130 static vec
<slp_oprnd_info
>
131 vect_create_oprnd_info (int nops
, int group_size
)
134 slp_oprnd_info oprnd_info
;
135 vec
<slp_oprnd_info
> oprnds_info
;
137 oprnds_info
.create (nops
);
138 for (i
= 0; i
< nops
; i
++)
140 oprnd_info
= XNEW (struct _slp_oprnd_info
);
141 oprnd_info
->def_stmts
.create (group_size
);
142 oprnd_info
->first_dt
= vect_uninitialized_def
;
143 oprnd_info
->first_def_type
= NULL_TREE
;
144 oprnd_info
->first_const_oprnd
= NULL_TREE
;
145 oprnd_info
->first_pattern
= false;
146 oprnds_info
.quick_push (oprnd_info
);
153 /* Free operands info. */
156 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
159 slp_oprnd_info oprnd_info
;
161 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
163 oprnd_info
->def_stmts
.release ();
164 XDELETE (oprnd_info
);
167 oprnds_info
.release ();
171 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
176 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
177 slp_tree slp_node
, gimple stmt
,
178 int ncopies_for_cost
, bool first
,
179 vec
<slp_oprnd_info
> *oprnds_info
,
180 stmt_vector_for_cost
*prologue_cost_vec
,
181 stmt_vector_for_cost
*body_cost_vec
)
184 unsigned int i
, number_of_oprnds
;
185 tree def
, def_op0
= NULL_TREE
;
187 enum vect_def_type dt
= vect_uninitialized_def
;
188 enum vect_def_type dt_op0
= vect_uninitialized_def
;
189 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
190 tree lhs
= gimple_get_lhs (stmt
);
191 struct loop
*loop
= NULL
;
192 enum tree_code rhs_code
;
193 bool different_types
= false;
194 bool pattern
= false;
195 slp_oprnd_info oprnd_info
, oprnd0_info
, oprnd1_info
;
197 tree compare_rhs
= NULL_TREE
;
200 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
202 if (is_gimple_call (stmt
))
204 number_of_oprnds
= gimple_call_num_args (stmt
);
207 else if (is_gimple_assign (stmt
))
209 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
210 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
216 for (i
= 0; i
< number_of_oprnds
; i
++)
221 compare_rhs
= NULL_TREE
;
224 oprnd
= gimple_op (stmt
, op_idx
++);
226 oprnd_info
= (*oprnds_info
)[i
];
228 if (COMPARISON_CLASS_P (oprnd
))
230 compare_rhs
= TREE_OPERAND (oprnd
, 1);
231 oprnd
= TREE_OPERAND (oprnd
, 0);
234 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
236 || (!def_stmt
&& dt
!= vect_constant_def
))
238 if (dump_enabled_p ())
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
241 "Build SLP failed: can't find def for ");
242 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
248 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
249 from the pattern. Check that all the stmts of the node are in the
251 if (def_stmt
&& gimple_bb (def_stmt
)
252 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
253 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
254 && gimple_code (def_stmt
) != GIMPLE_PHI
))
255 && vinfo_for_stmt (def_stmt
)
256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
261 if (!first
&& !oprnd_info
->first_pattern
)
263 if (dump_enabled_p ())
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
266 "Build SLP failed: some of the stmts"
267 " are in a pattern, and others are not ");
268 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
274 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
275 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
277 if (dt
== vect_unknown_def_type
)
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
281 "Unsupported pattern.");
285 switch (gimple_code (def_stmt
))
288 def
= gimple_phi_result (def_stmt
);
292 def
= gimple_assign_lhs (def_stmt
);
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
298 "unsupported defining stmt: ");
305 oprnd_info
->first_dt
= dt
;
306 oprnd_info
->first_pattern
= pattern
;
309 oprnd_info
->first_def_type
= TREE_TYPE (def
);
310 oprnd_info
->first_const_oprnd
= NULL_TREE
;
314 oprnd_info
->first_def_type
= NULL_TREE
;
315 oprnd_info
->first_const_oprnd
= oprnd
;
322 /* Analyze costs (for the first stmt of the group only). */
323 if (REFERENCE_CLASS_P (lhs
))
325 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
326 dt
, slp_node
, prologue_cost_vec
,
330 enum vect_def_type dts
[2];
332 dts
[1] = vect_uninitialized_def
;
333 /* Not memory operation (we don't call this function for
335 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dts
,
336 prologue_cost_vec
, body_cost_vec
);
342 /* Not first stmt of the group, check that the def-stmt/s match
343 the def-stmt/s of the first stmt. Allow different definition
344 types for reduction chains: the first stmt must be a
345 vect_reduction_def (a phi node), and the rest
346 vect_internal_def. */
347 if (((oprnd_info
->first_dt
!= dt
348 && !(oprnd_info
->first_dt
== vect_reduction_def
349 && dt
== vect_internal_def
))
350 || (oprnd_info
->first_def_type
!= NULL_TREE
352 && !types_compatible_p (oprnd_info
->first_def_type
,
355 && !types_compatible_p (TREE_TYPE (oprnd_info
->first_const_oprnd
),
359 if (number_of_oprnds
!= 2)
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
363 "Build SLP failed: different types ");
368 /* Try to swap operands in case of binary operation. */
370 different_types
= true;
373 oprnd0_info
= (*oprnds_info
)[0];
374 if (is_gimple_assign (stmt
)
375 && (rhs_code
= gimple_assign_rhs_code (stmt
))
376 && TREE_CODE_CLASS (rhs_code
) == tcc_binary
377 && commutative_tree_code (rhs_code
)
378 && oprnd0_info
->first_dt
== dt
379 && oprnd_info
->first_dt
== dt_op0
381 && !(oprnd0_info
->first_def_type
382 && !types_compatible_p (oprnd0_info
->first_def_type
,
384 && !(oprnd_info
->first_def_type
385 && !types_compatible_p (oprnd_info
->first_def_type
,
386 TREE_TYPE (def_op0
))))
388 if (dump_enabled_p ())
390 dump_printf_loc (MSG_NOTE
, vect_location
,
391 "Swapping operands of ");
392 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
395 swap_tree_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
396 gimple_assign_rhs2_ptr (stmt
));
400 if (dump_enabled_p ())
401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
402 "Build SLP failed: different types ");
410 /* Check the types of the definitions. */
413 case vect_constant_def
:
414 case vect_external_def
:
415 case vect_reduction_def
:
418 case vect_internal_def
:
421 oprnd0_info
= (*oprnds_info
)[0];
422 oprnd1_info
= (*oprnds_info
)[0];
424 oprnd1_info
->def_stmts
.quick_push (def_stmt
);
426 oprnd0_info
->def_stmts
.quick_push (def_stmt
);
429 oprnd_info
->def_stmts
.quick_push (def_stmt
);
434 /* FORNOW: Not supported. */
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
438 "Build SLP failed: illegal type of def ");
439 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, def
);
450 /* Recursively build an SLP tree starting from NODE.
451 Fail (and return FALSE) if def-stmts are not isomorphic, require data
452 permutation or are of unsupported types of operation. Otherwise, return
456 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
457 slp_tree
*node
, unsigned int group_size
, int *outside_cost
,
458 int ncopies_for_cost
, unsigned int *max_nunits
,
459 vec
<int> *load_permutation
,
460 vec
<slp_tree
> *loads
,
461 unsigned int vectorization_factor
, bool *loads_permuted
,
462 stmt_vector_for_cost
*prologue_cost_vec
,
463 stmt_vector_for_cost
*body_cost_vec
)
466 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (*node
);
467 gimple stmt
= stmts
[0];
468 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
469 enum tree_code first_cond_code
= ERROR_MARK
;
471 bool stop_recursion
= false, need_same_oprnds
= false;
472 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
473 unsigned int ncopies
;
476 enum machine_mode optab_op2_mode
;
477 enum machine_mode vec_mode
;
478 struct data_reference
*first_dr
;
480 bool permutation
= false;
481 unsigned int load_place
;
482 gimple first_load
= NULL
, prev_first_load
= NULL
, old_first_load
= NULL
;
483 vec
<slp_oprnd_info
> oprnds_info
;
485 slp_oprnd_info oprnd_info
;
488 if (is_gimple_call (stmt
))
489 nops
= gimple_call_num_args (stmt
);
490 else if (is_gimple_assign (stmt
))
492 nops
= gimple_num_ops (stmt
) - 1;
493 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
499 oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
501 /* For every stmt in NODE find its def stmt/s. */
502 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
504 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
507 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
510 /* Fail to vectorize statements marked as unvectorizable. */
511 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
513 if (dump_enabled_p ())
515 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
516 "Build SLP failed: unvectorizable statement ");
517 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
520 vect_free_oprnd_info (oprnds_info
);
524 lhs
= gimple_get_lhs (stmt
);
525 if (lhs
== NULL_TREE
)
527 if (dump_enabled_p ())
529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
530 "Build SLP failed: not GIMPLE_ASSIGN nor "
532 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
535 vect_free_oprnd_info (oprnds_info
);
539 if (is_gimple_assign (stmt
)
540 && gimple_assign_rhs_code (stmt
) == COND_EXPR
541 && (cond
= gimple_assign_rhs1 (stmt
))
542 && !COMPARISON_CLASS_P (cond
))
544 if (dump_enabled_p ())
546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
547 "Build SLP failed: condition is not "
549 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
552 vect_free_oprnd_info (oprnds_info
);
556 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
557 vectype
= get_vectype_for_scalar_type (scalar_type
);
560 if (dump_enabled_p ())
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
563 "Build SLP failed: unsupported data-type ");
564 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
568 vect_free_oprnd_info (oprnds_info
);
572 /* In case of multiple types we need to detect the smallest type. */
573 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
575 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
577 vectorization_factor
= *max_nunits
;
580 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
582 if (is_gimple_call (stmt
))
584 rhs_code
= CALL_EXPR
;
585 if (gimple_call_internal_p (stmt
)
586 || gimple_call_tail_p (stmt
)
587 || gimple_call_noreturn_p (stmt
)
588 || !gimple_call_nothrow_p (stmt
)
589 || gimple_call_chain (stmt
))
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
594 "Build SLP failed: unsupported call type ");
595 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
598 vect_free_oprnd_info (oprnds_info
);
603 rhs_code
= gimple_assign_rhs_code (stmt
);
605 /* Check the operation. */
608 first_stmt_code
= rhs_code
;
610 /* Shift arguments should be equal in all the packed stmts for a
611 vector shift with scalar shift operand. */
612 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
613 || rhs_code
== LROTATE_EXPR
614 || rhs_code
== RROTATE_EXPR
)
616 vec_mode
= TYPE_MODE (vectype
);
618 /* First see if we have a vector/vector shift. */
619 optab
= optab_for_tree_code (rhs_code
, vectype
,
623 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
625 /* No vector/vector shift, try for a vector/scalar shift. */
626 optab
= optab_for_tree_code (rhs_code
, vectype
,
631 if (dump_enabled_p ())
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
633 "Build SLP failed: no optab.");
634 vect_free_oprnd_info (oprnds_info
);
637 icode
= (int) optab_handler (optab
, vec_mode
);
638 if (icode
== CODE_FOR_nothing
)
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
643 "op not supported by target.");
644 vect_free_oprnd_info (oprnds_info
);
647 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
648 if (!VECTOR_MODE_P (optab_op2_mode
))
650 need_same_oprnds
= true;
651 first_op1
= gimple_assign_rhs2 (stmt
);
655 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
657 need_same_oprnds
= true;
658 first_op1
= gimple_assign_rhs2 (stmt
);
663 if (first_stmt_code
!= rhs_code
664 && (first_stmt_code
!= IMAGPART_EXPR
665 || rhs_code
!= REALPART_EXPR
)
666 && (first_stmt_code
!= REALPART_EXPR
667 || rhs_code
!= IMAGPART_EXPR
)
668 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
669 && (first_stmt_code
== ARRAY_REF
670 || first_stmt_code
== BIT_FIELD_REF
671 || first_stmt_code
== INDIRECT_REF
672 || first_stmt_code
== COMPONENT_REF
673 || first_stmt_code
== MEM_REF
)))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
678 "Build SLP failed: different operation "
680 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
683 vect_free_oprnd_info (oprnds_info
);
688 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
693 "Build SLP failed: different shift "
695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
698 vect_free_oprnd_info (oprnds_info
);
702 if (rhs_code
== CALL_EXPR
)
704 gimple first_stmt
= stmts
[0];
705 if (gimple_call_num_args (stmt
) != nops
706 || !operand_equal_p (gimple_call_fn (first_stmt
),
707 gimple_call_fn (stmt
), 0)
708 || gimple_call_fntype (first_stmt
)
709 != gimple_call_fntype (stmt
))
711 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "Build SLP failed: different calls in ");
715 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
719 vect_free_oprnd_info (oprnds_info
);
725 /* Grouped store or load. */
726 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
728 if (REFERENCE_CLASS_P (lhs
))
731 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
732 stmt
, ncopies_for_cost
,
733 (i
== 0), &oprnds_info
,
737 vect_free_oprnd_info (oprnds_info
);
744 /* FORNOW: Check that there is no gap between the loads
745 and no gap between the groups when we need to load
746 multiple groups at once.
747 ??? We should enhance this to only disallow gaps
750 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
751 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
752 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
753 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
755 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
758 "Build SLP failed: grouped "
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
764 vect_free_oprnd_info (oprnds_info
);
768 /* Check that the size of interleaved loads group is not
769 greater than the SLP group size. */
771 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
772 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
773 - GROUP_GAP (vinfo_for_stmt (stmt
)))
774 > ncopies
* group_size
))
776 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
779 "Build SLP failed: the number "
780 "of interleaved loads is greater than "
781 "the SLP group size ");
782 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
786 vect_free_oprnd_info (oprnds_info
);
790 old_first_load
= first_load
;
791 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
794 /* Check that there are no loads from different interleaving
795 chains in the same node. The only exception is complex
797 if (prev_first_load
!= first_load
798 && rhs_code
!= REALPART_EXPR
799 && rhs_code
!= IMAGPART_EXPR
)
801 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
805 "Build SLP failed: different "
806 "interleaving chains in one node ");
807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
811 vect_free_oprnd_info (oprnds_info
);
816 prev_first_load
= first_load
;
818 /* In some cases a group of loads is just the same load
819 repeated N times. Only analyze its cost once. */
820 if (first_load
== stmt
&& old_first_load
!= first_load
)
822 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
823 if (vect_supportable_dr_alignment (first_dr
, false)
824 == dr_unaligned_unsupported
)
826 if (dump_enabled_p ())
828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
830 "Build SLP failed: unsupported "
832 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
836 vect_free_oprnd_info (oprnds_info
);
840 /* Analyze costs (for the first stmt in the group). */
841 vect_model_load_cost (vinfo_for_stmt (stmt
),
842 ncopies_for_cost
, false, *node
,
843 prologue_cost_vec
, body_cost_vec
);
846 /* Store the place of this load in the interleaving chain. In
847 case that permutation is needed we later decide if a specific
848 permutation is supported. */
849 load_place
= vect_get_place_in_interleaving_chain (stmt
,
854 load_permutation
->safe_push (load_place
);
856 /* We stop the tree when we reach a group of loads. */
857 stop_recursion
= true;
860 } /* Grouped access. */
863 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
865 /* Not grouped load. */
866 if (dump_enabled_p ())
868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
869 "Build SLP failed: not grouped load ");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
873 /* FORNOW: Not grouped loads are not supported. */
874 vect_free_oprnd_info (oprnds_info
);
878 /* Not memory operation. */
879 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
880 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
881 && rhs_code
!= COND_EXPR
882 && rhs_code
!= CALL_EXPR
)
884 if (dump_enabled_p ())
886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
887 "Build SLP failed: operation");
888 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
892 vect_free_oprnd_info (oprnds_info
);
896 if (rhs_code
== COND_EXPR
)
898 tree cond_expr
= gimple_assign_rhs1 (stmt
);
901 first_cond_code
= TREE_CODE (cond_expr
);
902 else if (first_cond_code
!= TREE_CODE (cond_expr
))
904 if (dump_enabled_p ())
906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
907 "Build SLP failed: different"
909 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
913 vect_free_oprnd_info (oprnds_info
);
918 /* Find the def-stmts. */
919 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
920 ncopies_for_cost
, (i
== 0),
921 &oprnds_info
, prologue_cost_vec
,
924 vect_free_oprnd_info (oprnds_info
);
930 /* Grouped loads were reached - stop the recursion. */
933 loads
->safe_push (*node
);
936 gimple first_stmt
= stmts
[0];
937 *loads_permuted
= true;
938 (void) record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
939 vinfo_for_stmt (first_stmt
), 0, vect_body
);
943 /* We don't check here complex numbers chains, so we set
944 LOADS_PERMUTED for further check in
945 vect_supported_load_permutation_p. */
946 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
947 *loads_permuted
= true;
950 vect_free_oprnd_info (oprnds_info
);
954 /* Create SLP_TREE nodes for the definition node/s. */
955 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
959 if (oprnd_info
->first_dt
!= vect_internal_def
)
962 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
964 || !vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
, group_size
,
965 outside_cost
, ncopies_for_cost
,
966 max_nunits
, load_permutation
, loads
,
967 vectorization_factor
, loads_permuted
,
968 prologue_cost_vec
, body_cost_vec
))
971 oprnd_info
->def_stmts
= vNULL
;
972 vect_free_slp_tree (child
);
973 vect_free_oprnd_info (oprnds_info
);
977 oprnd_info
->def_stmts
.create (0);
978 SLP_TREE_CHILDREN (*node
).quick_push (child
);
981 vect_free_oprnd_info (oprnds_info
);
985 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
988 vect_print_slp_tree (int dump_kind
, slp_tree node
)
997 dump_printf (dump_kind
, "node ");
998 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1000 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
1001 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1003 dump_printf (dump_kind
, "\n");
1005 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1006 vect_print_slp_tree (dump_kind
, (slp_tree
) child
);
1010 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1011 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1012 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1013 stmts in NODE are to be marked. */
1016 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1025 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1026 if (j
< 0 || i
== j
)
1027 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1029 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1030 vect_mark_slp_stmts ((slp_tree
) child
, mark
, j
);
1034 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1037 vect_mark_slp_stmts_relevant (slp_tree node
)
1041 stmt_vec_info stmt_info
;
1047 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1049 stmt_info
= vinfo_for_stmt (stmt
);
1050 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1051 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1052 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1055 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1056 vect_mark_slp_stmts_relevant ((slp_tree
) child
);
1060 /* Check if the permutation required by the SLP INSTANCE is supported.
1061 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1064 vect_supported_slp_permutation_p (slp_instance instance
)
1066 slp_tree node
= SLP_INSTANCE_LOADS (instance
)[0];
1067 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1068 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
1069 vec
<slp_tree
> sorted_loads
= vNULL
;
1071 slp_tree
*tmp_loads
= NULL
;
1072 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
1075 /* FORNOW: The only supported loads permutation is loads from the same
1076 location in all the loads in the node, when the data-refs in
1077 nodes of LOADS constitute an interleaving chain.
1078 Sort the nodes according to the order of accesses in the chain. */
1079 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
1081 SLP_INSTANCE_LOAD_PERMUTATION (instance
).iterate (i
, &index
)
1082 && SLP_INSTANCE_LOADS (instance
).iterate (j
, &load
);
1083 i
+= group_size
, j
++)
1085 gimple scalar_stmt
= SLP_TREE_SCALAR_STMTS (load
)[0];
1086 /* Check that the loads are all in the same interleaving chain. */
1087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
1089 if (dump_enabled_p ())
1091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1092 "Build SLP failed: unsupported data "
1094 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1102 tmp_loads
[index
] = load
;
1105 sorted_loads
.create (group_size
);
1106 for (i
= 0; i
< group_size
; i
++)
1107 sorted_loads
.safe_push (tmp_loads
[i
]);
1109 SLP_INSTANCE_LOADS (instance
).release ();
1110 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
1113 if (!vect_transform_slp_perm_load (stmt
, vNULL
, NULL
,
1114 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
1122 /* Rearrange the statements of NODE according to PERMUTATION. */
1125 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1126 vec
<int> permutation
)
1129 vec
<gimple
> tmp_stmts
;
1130 unsigned int index
, i
;
1136 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1137 vect_slp_rearrange_stmts ((slp_tree
) child
, group_size
, permutation
);
1139 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1140 tmp_stmts
.create (group_size
);
1142 for (i
= 0; i
< group_size
; i
++)
1143 tmp_stmts
.safe_push (NULL
);
1145 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1147 index
= permutation
[i
];
1148 tmp_stmts
[index
] = stmt
;
1151 SLP_TREE_SCALAR_STMTS (node
).release ();
1152 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1156 /* Check if the required load permutation is supported.
1157 LOAD_PERMUTATION contains a list of indices of the loads.
1158 In SLP this permutation is relative to the order of grouped stores that are
1159 the base of the SLP instance. */
1162 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
1163 vec
<int> load_permutation
)
1165 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
1166 bool supported
, bad_permutation
= false;
1168 slp_tree node
, other_complex_node
;
1169 gimple stmt
, first
= NULL
, other_node_first
, load
, next_load
, first_load
;
1170 unsigned complex_numbers
= 0;
1171 struct data_reference
*dr
;
1172 bb_vec_info bb_vinfo
;
1174 /* FORNOW: permutations are only supported in SLP. */
1178 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1181 FOR_EACH_VEC_ELT (load_permutation
, i
, next
)
1182 dump_printf (MSG_NOTE
, "%d ", next
);
1185 /* In case of reduction every load permutation is allowed, since the order
1186 of the reduction statements is not important (as opposed to the case of
1187 grouped stores). The only condition we need to check is that all the
1188 load nodes are of the same size and have the same permutation (and then
1189 rearrange all the nodes of the SLP instance according to this
1192 /* Check that all the load nodes are of the same size. */
1193 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1195 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1198 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1199 if (is_gimple_assign (stmt
)
1200 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
1201 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
1205 /* Complex operands can be swapped as following:
1206 real_c = real_b + real_a;
1207 imag_c = imag_a + imag_b;
1208 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1209 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1210 chains are mixed, they match the above pattern. */
1211 if (complex_numbers
)
1213 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1215 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
1223 if (complex_numbers
!= 2)
1231 other_complex_node
= SLP_INSTANCE_LOADS (slp_instn
)[k
];
1233 SLP_TREE_SCALAR_STMTS (other_complex_node
)[0];
1235 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1236 != other_node_first
)
1244 /* We checked that this case ok, so there is no need to proceed with
1245 permutation tests. */
1246 if (complex_numbers
== 2
1247 && SLP_INSTANCE_LOADS (slp_instn
).length () == 2)
1249 SLP_INSTANCE_LOADS (slp_instn
).release ();
1250 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
).release ();
1254 node
= SLP_INSTANCE_TREE (slp_instn
);
1255 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1256 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1257 instance, not all the loads belong to the same node or interleaving
1258 group. Hence, we need to divide them into groups according to
1260 number_of_groups
= load_permutation
.length () / group_size
;
1262 /* Reduction (there are no data-refs in the root).
1263 In reduction chain the order of the loads is important. */
1264 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1265 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1267 int first_group_load_index
;
1269 /* Compare all the permutation sequences to the first one. */
1270 for (i
= 1; i
< number_of_groups
; i
++)
1273 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
1275 next
= load_permutation
[j
];
1276 first_group_load_index
= load_permutation
[k
];
1278 if (next
!= first_group_load_index
)
1280 bad_permutation
= true;
1287 if (bad_permutation
)
1291 if (!bad_permutation
)
1293 /* Check that the loads in the first sequence are different and there
1294 are no gaps between them. */
1295 load_index
= sbitmap_alloc (group_size
);
1296 bitmap_clear (load_index
);
1297 for (k
= 0; k
< group_size
; k
++)
1299 first_group_load_index
= load_permutation
[k
];
1300 if (bitmap_bit_p (load_index
, first_group_load_index
))
1302 bad_permutation
= true;
1306 bitmap_set_bit (load_index
, first_group_load_index
);
1309 if (!bad_permutation
)
1310 for (k
= 0; k
< group_size
; k
++)
1311 if (!bitmap_bit_p (load_index
, k
))
1313 bad_permutation
= true;
1317 sbitmap_free (load_index
);
1320 if (!bad_permutation
)
1322 /* This permutation is valid for reduction. Since the order of the
1323 statements in the nodes is not important unless they are memory
1324 accesses, we can rearrange the statements in all the nodes
1325 according to the order of the loads. */
1326 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1328 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
).release ();
1333 /* In basic block vectorization we allow any subchain of an interleaving
1335 FORNOW: not supported in loop SLP because of realignment compications. */
1336 bb_vinfo
= STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
));
1337 bad_permutation
= false;
1338 /* Check that for every node in the instance the loads form a subchain. */
1341 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1345 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1348 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
));
1350 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
)))
1352 bad_permutation
= true;
1356 if (j
!= 0 && next_load
!= load
)
1358 bad_permutation
= true;
1362 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1365 if (bad_permutation
)
1369 /* Check that the alignment of the first load in every subchain, i.e.,
1370 the first statement in every load node, is supported. */
1371 if (!bad_permutation
)
1373 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1375 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1377 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1379 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1380 if (vect_supportable_dr_alignment (dr
, false)
1381 == dr_unaligned_unsupported
)
1383 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1387 "unsupported unaligned load ");
1388 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1391 bad_permutation
= true;
1397 if (!bad_permutation
)
1399 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
).release ();
1405 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1406 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1407 well (unless it's reduction). */
1408 if (load_permutation
.length ()
1409 != (unsigned int) (group_size
* group_size
))
1413 load_index
= sbitmap_alloc (group_size
);
1414 bitmap_clear (load_index
);
1415 for (j
= 0; j
< group_size
; j
++)
1417 for (i
= j
* group_size
, k
= 0;
1418 load_permutation
.iterate (i
, &next
) && k
< group_size
;
1421 if (i
!= j
* group_size
&& next
!= prev
)
1430 if (bitmap_bit_p (load_index
, prev
))
1436 bitmap_set_bit (load_index
, prev
);
1439 for (j
= 0; j
< group_size
; j
++)
1440 if (!bitmap_bit_p (load_index
, j
))
1442 sbitmap_free (load_index
);
1446 sbitmap_free (load_index
);
1448 if (supported
&& i
== group_size
* group_size
1449 && vect_supported_slp_permutation_p (slp_instn
))
1456 /* Find the first load in the loop that belongs to INSTANCE.
1457 When loads are in several SLP nodes, there can be a case in which the first
1458 load does not appear in the first SLP node to be transformed, causing
1459 incorrect order of statements. Since we generate all the loads together,
1460 they must be inserted before the first load of the SLP instance and not
1461 before the first load of the first node of the instance. */
1464 vect_find_first_load_in_slp_instance (slp_instance instance
)
1468 gimple first_load
= NULL
, load
;
1470 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1472 first_load
= get_earlier_stmt (load
, first_load
);
1478 /* Find the last store in SLP INSTANCE. */
1481 vect_find_last_store_in_slp_instance (slp_instance instance
)
1485 gimple last_store
= NULL
, store
;
1487 node
= SLP_INSTANCE_TREE (instance
);
1488 for (i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &store
); i
++)
1489 last_store
= get_later_stmt (store
, last_store
);
1495 /* Analyze an SLP instance starting from a group of grouped stores. Call
1496 vect_build_slp_tree to build a tree of packed stmts if possible.
1497 Return FALSE if it's impossible to SLP any stmt in the loop. */
1500 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1503 slp_instance new_instance
;
1505 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1506 unsigned int unrolling_factor
= 1, nunits
;
1507 tree vectype
, scalar_type
= NULL_TREE
;
1509 unsigned int vectorization_factor
= 0;
1510 int outside_cost
= 0, ncopies_for_cost
, i
;
1511 unsigned int max_nunits
= 0;
1512 vec
<int> load_permutation
;
1513 vec
<slp_tree
> loads
;
1514 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1515 bool loads_permuted
= false;
1516 vec
<gimple
> scalar_stmts
;
1517 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1518 stmt_info_for_cost
*si
;
1520 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1524 scalar_type
= TREE_TYPE (DR_REF (dr
));
1525 vectype
= get_vectype_for_scalar_type (scalar_type
);
1529 gcc_assert (loop_vinfo
);
1530 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1533 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1537 gcc_assert (loop_vinfo
);
1538 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1539 group_size
= LOOP_VINFO_REDUCTIONS (loop_vinfo
).length ();
1544 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1547 "Build SLP failed: unsupported data-type ");
1548 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1554 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1556 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1558 vectorization_factor
= nunits
;
1560 /* Calculate the unrolling factor. */
1561 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1562 if (unrolling_factor
!= 1 && !loop_vinfo
)
1564 if (dump_enabled_p ())
1565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1566 "Build SLP failed: unrolling required in basic"
1572 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1573 scalar_stmts
.create (group_size
);
1575 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1577 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1580 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1581 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1582 scalar_stmts
.safe_push (
1583 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1585 scalar_stmts
.safe_push (next
);
1586 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1591 /* Collect reduction statements. */
1592 vec
<gimple
> reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1593 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1594 scalar_stmts
.safe_push (next
);
1597 node
= vect_create_new_slp_node (scalar_stmts
);
1599 /* Calculate the number of vector stmts to create based on the unrolling
1600 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1601 GROUP_SIZE / NUNITS otherwise. */
1602 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1604 load_permutation
.create (group_size
* group_size
);
1605 loads
.create (group_size
);
1606 prologue_cost_vec
.create (10);
1607 body_cost_vec
.create (10);
1609 /* Build the tree for the SLP instance. */
1610 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1611 &outside_cost
, ncopies_for_cost
,
1612 &max_nunits
, &load_permutation
, &loads
,
1613 vectorization_factor
, &loads_permuted
,
1614 &prologue_cost_vec
, &body_cost_vec
))
1616 void *data
= (loop_vinfo
? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
)
1617 : BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1619 /* Calculate the unrolling factor based on the smallest type. */
1620 if (max_nunits
> nunits
)
1621 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1624 if (unrolling_factor
!= 1 && !loop_vinfo
)
1626 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1628 "Build SLP failed: unrolling required in basic"
1630 vect_free_slp_tree (node
);
1631 body_cost_vec
.release ();
1632 prologue_cost_vec
.release ();
1633 load_permutation
.release ();
1638 /* Create a new SLP instance. */
1639 new_instance
= XNEW (struct _slp_instance
);
1640 SLP_INSTANCE_TREE (new_instance
) = node
;
1641 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1642 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1643 SLP_INSTANCE_BODY_COST_VEC (new_instance
) = body_cost_vec
;
1644 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1645 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1646 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1650 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1653 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1656 "Build SLP failed: unsupported load "
1658 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1661 vect_free_slp_instance (new_instance
);
1662 prologue_cost_vec
.release ();
1666 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1667 = vect_find_first_load_in_slp_instance (new_instance
);
1670 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
).release ();
1672 /* Record the prologue costs, which were delayed until we were
1673 sure that SLP was successful. Unlike the body costs, we know
1674 the final values now regardless of the loop vectorization factor. */
1675 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1677 struct _stmt_vec_info
*stmt_info
1678 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1679 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1680 si
->misalign
, vect_prologue
);
1683 prologue_cost_vec
.release ();
1686 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).safe_push (new_instance
);
1688 BB_VINFO_SLP_INSTANCES (bb_vinfo
).safe_push (new_instance
);
1690 if (dump_enabled_p ())
1691 vect_print_slp_tree (MSG_NOTE
, node
);
1697 body_cost_vec
.release ();
1698 prologue_cost_vec
.release ();
1701 /* Failed to SLP. */
1702 /* Free the allocated memory. */
1703 vect_free_slp_tree (node
);
1704 load_permutation
.release ();
1711 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1712 trees of packed scalar stmts if SLP is possible. */
1715 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1718 vec
<gimple
> grouped_stores
;
1719 vec
<gimple
> reductions
= vNULL
;
1720 vec
<gimple
> reduc_chains
= vNULL
;
1721 gimple first_element
;
1724 if (dump_enabled_p ())
1725 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===");
1729 grouped_stores
= LOOP_VINFO_GROUPED_STORES (loop_vinfo
);
1730 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1731 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1734 grouped_stores
= BB_VINFO_GROUPED_STORES (bb_vinfo
);
1736 /* Find SLP sequences starting from groups of grouped stores. */
1737 FOR_EACH_VEC_ELT (grouped_stores
, i
, first_element
)
1738 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1741 if (bb_vinfo
&& !ok
)
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1745 "Failed to SLP the basic block.");
1751 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).length () > 0)
1753 /* Find SLP sequences starting from reduction chains. */
1754 FOR_EACH_VEC_ELT (reduc_chains
, i
, first_element
)
1755 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1760 /* Don't try to vectorize SLP reductions if reduction chain was
1765 /* Find SLP sequences starting from groups of reductions. */
1766 if (loop_vinfo
&& LOOP_VINFO_REDUCTIONS (loop_vinfo
).length () > 1
1767 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, reductions
[0]))
1774 /* For each possible SLP instance decide whether to SLP it and calculate overall
1775 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1776 least one instance. */
1779 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1781 unsigned int i
, unrolling_factor
= 1;
1782 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1783 slp_instance instance
;
1784 int decided_to_slp
= 0;
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ===");
1789 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1791 /* FORNOW: SLP if you can. */
1792 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1793 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1795 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1796 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1797 loop-based vectorization. Such stmts will be marked as HYBRID. */
1798 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1802 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1804 if (decided_to_slp
&& dump_enabled_p ())
1805 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
1806 "Decided to SLP %d instances. Unrolling factor %d",
1807 decided_to_slp
, unrolling_factor
);
1809 return (decided_to_slp
> 0);
1813 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1814 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1817 vect_detect_hybrid_slp_stmts (slp_tree node
)
1820 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (node
);
1821 gimple stmt
= stmts
[0];
1822 imm_use_iterator imm_iter
;
1824 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1826 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1827 struct loop
*loop
= NULL
;
1828 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1829 basic_block bb
= NULL
;
1835 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1837 bb
= BB_VINFO_BB (bb_vinfo
);
1839 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1840 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1841 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1842 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1843 if (gimple_bb (use_stmt
)
1844 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1845 || bb
== gimple_bb (use_stmt
))
1846 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1847 && !STMT_SLP_TYPE (stmt_vinfo
)
1848 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1849 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1850 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1851 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1852 == vect_reduction_def
))
1853 vect_mark_slp_stmts (node
, hybrid
, i
);
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1856 vect_detect_hybrid_slp_stmts ((slp_tree
) child
);
1860 /* Find stmts that must be both vectorized and SLPed. */
1863 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1866 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1867 slp_instance instance
;
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ===");
1872 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1873 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1877 /* Create and initialize a new bb_vec_info struct for BB, as well as
1878 stmt_vec_info structs for all the stmts in it. */
1881 new_bb_vec_info (basic_block bb
)
1883 bb_vec_info res
= NULL
;
1884 gimple_stmt_iterator gsi
;
1886 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1887 BB_VINFO_BB (res
) = bb
;
1889 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1891 gimple stmt
= gsi_stmt (gsi
);
1892 gimple_set_uid (stmt
, 0);
1893 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1896 BB_VINFO_GROUPED_STORES (res
).create (10);
1897 BB_VINFO_SLP_INSTANCES (res
).create (2);
1898 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
1905 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1906 stmts in the basic block. */
1909 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1911 vec
<slp_instance
> slp_instances
;
1912 slp_instance instance
;
1914 gimple_stmt_iterator si
;
1920 bb
= BB_VINFO_BB (bb_vinfo
);
1922 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1924 gimple stmt
= gsi_stmt (si
);
1925 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1928 /* Free stmt_vec_info. */
1929 free_stmt_vec_info (stmt
);
1932 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1933 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1934 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
1935 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1936 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1937 vect_free_slp_instance (instance
);
1938 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
1939 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1945 /* Analyze statements contained in SLP tree node after recursively analyzing
1946 the subtree. Return TRUE if the operations are supported. */
1949 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1959 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1960 if (!vect_slp_analyze_node_operations (bb_vinfo
, (slp_tree
) child
))
1963 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1965 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1966 gcc_assert (stmt_info
);
1967 gcc_assert (PURE_SLP_STMT (stmt_info
));
1969 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1977 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1978 operations are supported. */
1981 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1983 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1984 slp_instance instance
;
1987 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
1989 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1990 SLP_INSTANCE_TREE (instance
)))
1992 vect_free_slp_instance (instance
);
1993 slp_instances
.ordered_remove (i
);
1999 if (!slp_instances
.length ())
2005 /* Check if vectorization of the basic block is profitable. */
2008 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2010 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2011 slp_instance instance
;
2013 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2014 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2015 unsigned int stmt_cost
;
2017 gimple_stmt_iterator si
;
2018 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
2019 void *target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
2020 stmt_vec_info stmt_info
= NULL
;
2021 stmt_vector_for_cost body_cost_vec
;
2022 stmt_info_for_cost
*ci
;
2024 /* Calculate vector costs. */
2025 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2027 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2029 FOR_EACH_VEC_ELT (body_cost_vec
, j
, ci
)
2031 stmt_info
= ci
->stmt
? vinfo_for_stmt (ci
->stmt
) : NULL
;
2032 (void) add_stmt_cost (target_cost_data
, ci
->count
, ci
->kind
,
2033 stmt_info
, ci
->misalign
, vect_body
);
2037 /* Calculate scalar cost. */
2038 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2040 stmt
= gsi_stmt (si
);
2041 stmt_info
= vinfo_for_stmt (stmt
);
2043 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
2044 || !PURE_SLP_STMT (stmt_info
))
2047 if (STMT_VINFO_DATA_REF (stmt_info
))
2049 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2050 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2052 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2055 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2057 scalar_cost
+= stmt_cost
;
2060 /* Complete the target-specific cost calculation. */
2061 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2062 &vec_inside_cost
, &vec_epilogue_cost
);
2064 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2066 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2069 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2071 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2072 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2073 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d", scalar_cost
);
2076 /* Vectorization is profitable if its cost is less than the cost of scalar
2078 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2084 /* Check if the basic block can be vectorized. */
2087 vect_slp_analyze_bb_1 (basic_block bb
)
2089 bb_vec_info bb_vinfo
;
2090 vec
<slp_instance
> slp_instances
;
2091 slp_instance instance
;
2095 bb_vinfo
= new_bb_vec_info (bb
);
2099 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2103 "not vectorized: unhandled data-ref in basic "
2106 destroy_bb_vec_info (bb_vinfo
);
2110 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2114 "not vectorized: not enough data-refs in "
2117 destroy_bb_vec_info (bb_vinfo
);
2121 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2125 "not vectorized: unhandled data access in "
2128 destroy_bb_vec_info (bb_vinfo
);
2132 vect_pattern_recog (NULL
, bb_vinfo
);
2134 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2136 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2138 "not vectorized: unhandled data dependence "
2139 "in basic block.\n");
2141 destroy_bb_vec_info (bb_vinfo
);
2145 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2149 "not vectorized: bad data alignment in basic "
2152 destroy_bb_vec_info (bb_vinfo
);
2156 /* Check the SLP opportunities in the basic block, analyze and build SLP
2158 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2162 "not vectorized: failed to find SLP opportunities "
2163 "in basic block.\n");
2165 destroy_bb_vec_info (bb_vinfo
);
2169 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2171 /* Mark all the statements that we want to vectorize as pure SLP and
2173 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2176 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2179 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2183 "not vectorized: unsupported alignment in basic "
2185 destroy_bb_vec_info (bb_vinfo
);
2189 if (!vect_slp_analyze_operations (bb_vinfo
))
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2193 "not vectorized: bad operation in basic block.\n");
2195 destroy_bb_vec_info (bb_vinfo
);
2199 /* Cost model: check if the vectorization is worthwhile. */
2200 if (flag_vect_cost_model
2201 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2205 "not vectorized: vectorization is not "
2208 destroy_bb_vec_info (bb_vinfo
);
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE
, vect_location
,
2214 "Basic block will be vectorized using SLP\n");
2221 vect_slp_analyze_bb (basic_block bb
)
2223 bb_vec_info bb_vinfo
;
2225 gimple_stmt_iterator gsi
;
2226 unsigned int vector_sizes
;
2228 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2231 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2233 gimple stmt
= gsi_stmt (gsi
);
2234 if (!is_gimple_debug (stmt
)
2235 && !gimple_nop_p (stmt
)
2236 && gimple_code (stmt
) != GIMPLE_LABEL
)
2240 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2244 "not vectorized: too many instructions in "
2250 /* Autodetect first vector size we try. */
2251 current_vector_size
= 0;
2252 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2256 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2260 destroy_bb_vec_info (bb_vinfo
);
2262 vector_sizes
&= ~current_vector_size
;
2263 if (vector_sizes
== 0
2264 || current_vector_size
== 0)
2267 /* Try the next biggest vector size. */
2268 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_NOTE
, vect_location
,
2271 "***** Re-trying analysis with "
2272 "vector size %d\n", current_vector_size
);
2277 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2278 the number of created vector stmts depends on the unrolling factor).
2279 However, the actual number of vector stmts for every SLP node depends on
2280 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2281 should be updated. In this function we assume that the inside costs
2282 calculated in vect_model_xxx_cost are linear in ncopies. */
2285 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2287 unsigned int i
, j
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2288 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2289 slp_instance instance
;
2290 stmt_vector_for_cost body_cost_vec
;
2291 stmt_info_for_cost
*si
;
2292 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_NOTE
, vect_location
,
2296 "=== vect_update_slp_costs_according_to_vf ===");
2298 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2300 /* We assume that costs are linear in ncopies. */
2301 int ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2303 /* Record the instance's instructions in the target cost model.
2304 This was delayed until here because the count of instructions
2305 isn't known beforehand. */
2306 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2308 FOR_EACH_VEC_ELT (body_cost_vec
, j
, si
)
2309 (void) add_stmt_cost (data
, si
->count
* ncopies
, si
->kind
,
2310 vinfo_for_stmt (si
->stmt
), si
->misalign
,
2316 /* For constant and loop invariant defs of SLP_NODE this function returns
2317 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2318 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2319 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2320 REDUC_INDEX is the index of the reduction operand in the statements, unless
2324 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2325 vec
<tree
> *vec_oprnds
,
2326 unsigned int op_num
, unsigned int number_of_vectors
,
2329 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2330 gimple stmt
= stmts
[0];
2331 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2335 unsigned j
, number_of_places_left_in_vector
;
2338 int group_size
= stmts
.length ();
2339 unsigned int vec_num
, i
;
2340 unsigned number_of_copies
= 1;
2342 voprnds
.create (number_of_vectors
);
2343 bool constant_p
, is_store
;
2344 tree neutral_op
= NULL
;
2345 enum tree_code code
= gimple_expr_code (stmt
);
2348 gimple_seq ctor_seq
= NULL
;
2350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2351 && reduc_index
!= -1)
2353 op_num
= reduc_index
- 1;
2354 op
= gimple_op (stmt
, reduc_index
);
2355 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2356 we need either neutral operands or the original operands. See
2357 get_initial_def_for_reduction() for details. */
2360 case WIDEN_SUM_EXPR
:
2366 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2367 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2369 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2374 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2375 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2377 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2382 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2387 def_stmt
= SSA_NAME_DEF_STMT (op
);
2388 loop
= (gimple_bb (stmt
))->loop_father
;
2389 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2390 loop_preheader_edge (loop
));
2398 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2401 op
= gimple_assign_rhs1 (stmt
);
2408 if (CONSTANT_CLASS_P (op
))
2413 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2414 gcc_assert (vector_type
);
2415 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2417 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2418 created vectors. It is greater than 1 if unrolling is performed.
2420 For example, we have two scalar operands, s1 and s2 (e.g., group of
2421 strided accesses of size two), while NUNITS is four (i.e., four scalars
2422 of this type can be packed in a vector). The output vector will contain
2423 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2426 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2427 containing the operands.
2429 For example, NUNITS is four as before, and the group size is 8
2430 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2431 {s5, s6, s7, s8}. */
2433 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2435 number_of_places_left_in_vector
= nunits
;
2436 elts
= XALLOCAVEC (tree
, nunits
);
2437 for (j
= 0; j
< number_of_copies
; j
++)
2439 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2442 op
= gimple_assign_rhs1 (stmt
);
2448 if (op_num
== 0 || op_num
== 1)
2450 tree cond
= gimple_assign_rhs1 (stmt
);
2451 op
= TREE_OPERAND (cond
, op_num
);
2456 op
= gimple_assign_rhs2 (stmt
);
2458 op
= gimple_assign_rhs3 (stmt
);
2463 op
= gimple_call_arg (stmt
, op_num
);
2470 op
= gimple_op (stmt
, op_num
+ 1);
2471 /* Unlike the other binary operators, shifts/rotates have
2472 the shift count being int, instead of the same type as
2473 the lhs, so make sure the scalar is the right type if
2474 we are dealing with vectors of
2475 long long/long/short/char. */
2476 if (op_num
== 1 && constant_p
)
2477 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2481 op
= gimple_op (stmt
, op_num
+ 1);
2486 if (reduc_index
!= -1)
2488 loop
= (gimple_bb (stmt
))->loop_father
;
2489 def_stmt
= SSA_NAME_DEF_STMT (op
);
2493 /* Get the def before the loop. In reduction chain we have only
2494 one initial value. */
2495 if ((j
!= (number_of_copies
- 1)
2496 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2501 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2502 loop_preheader_edge (loop
));
2505 /* Create 'vect_ = {op0,op1,...,opn}'. */
2506 number_of_places_left_in_vector
--;
2507 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2511 op
= fold_unary (VIEW_CONVERT_EXPR
,
2512 TREE_TYPE (vector_type
), op
);
2513 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2518 = make_ssa_name (TREE_TYPE (vector_type
), NULL
);
2520 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
2523 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
2524 new_temp
, op
, NULL_TREE
);
2525 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2529 elts
[number_of_places_left_in_vector
] = op
;
2531 if (number_of_places_left_in_vector
== 0)
2533 number_of_places_left_in_vector
= nunits
;
2536 vec_cst
= build_vector (vector_type
, elts
);
2539 vec
<constructor_elt
, va_gc
> *v
;
2541 vec_alloc (v
, nunits
);
2542 for (k
= 0; k
< nunits
; ++k
)
2543 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2544 vec_cst
= build_constructor (vector_type
, v
);
2546 voprnds
.quick_push (vect_init_vector (stmt
, vec_cst
,
2547 vector_type
, NULL
));
2548 if (ctor_seq
!= NULL
)
2550 gimple init_stmt
= SSA_NAME_DEF_STMT (voprnds
.last ());
2551 gimple_stmt_iterator gsi
= gsi_for_stmt (init_stmt
);
2552 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2560 /* Since the vectors are created in the reverse order, we should invert
2562 vec_num
= voprnds
.length ();
2563 for (j
= vec_num
; j
!= 0; j
--)
2565 vop
= voprnds
[j
- 1];
2566 vec_oprnds
->quick_push (vop
);
2571 /* In case that VF is greater than the unrolling factor needed for the SLP
2572 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2573 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2574 to replicate the vectors. */
2575 while (number_of_vectors
> vec_oprnds
->length ())
2577 tree neutral_vec
= NULL
;
2582 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2584 vec_oprnds
->quick_push (neutral_vec
);
2588 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2589 vec_oprnds
->quick_push (vop
);
2595 /* Get vectorized definitions from SLP_NODE that contains corresponding
2596 vectorized def-stmts. */
2599 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2602 gimple vec_def_stmt
;
2605 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2607 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2609 gcc_assert (vec_def_stmt
);
2610 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2611 vec_oprnds
->quick_push (vec_oprnd
);
2616 /* Get vectorized definitions for SLP_NODE.
2617 If the scalar definitions are loop invariants or constants, collect them and
2618 call vect_get_constant_vectors() to create vector stmts.
2619 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2620 must be stored in the corresponding child of SLP_NODE, and we call
2621 vect_get_slp_vect_defs () to retrieve them. */
2624 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2625 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2628 int number_of_vects
= 0, i
;
2629 unsigned int child_index
= 0;
2630 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2631 slp_tree child
= NULL
;
2634 bool vectorized_defs
;
2636 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2637 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2639 /* For each operand we check if it has vectorized definitions in a child
2640 node or we need to create them (for invariants and constants). We
2641 check if the LHS of the first stmt of the next child matches OPRND.
2642 If it does, we found the correct child. Otherwise, we call
2643 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2644 to check this child node for the next operand. */
2645 vectorized_defs
= false;
2646 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2648 child
= (slp_tree
) SLP_TREE_CHILDREN (slp_node
)[child_index
];
2650 /* We have to check both pattern and original def, if available. */
2651 gimple first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2652 gimple related
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2654 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2656 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2658 /* The number of vector defs is determined by the number of
2659 vector statements in the node from which we get those
2661 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2662 vectorized_defs
= true;
2667 if (!vectorized_defs
)
2671 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2672 /* Number of vector stmts was calculated according to LHS in
2673 vect_schedule_slp_instance (), fix it by replacing LHS with
2674 RHS, if necessary. See vect_get_smallest_scalar_type () for
2676 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2678 if (rhs_size_unit
!= lhs_size_unit
)
2680 number_of_vects
*= rhs_size_unit
;
2681 number_of_vects
/= lhs_size_unit
;
2686 /* Allocate memory for vectorized defs. */
2688 vec_defs
.create (number_of_vects
);
2690 /* For reduction defs we call vect_get_constant_vectors (), since we are
2691 looking for initial loop invariant values. */
2692 if (vectorized_defs
&& reduc_index
== -1)
2693 /* The defs are already vectorized. */
2694 vect_get_slp_vect_defs (child
, &vec_defs
);
2696 /* Build vectors from scalar defs. */
2697 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2698 number_of_vects
, reduc_index
);
2700 vec_oprnds
->quick_push (vec_defs
);
2702 /* For reductions, we only need initial values. */
2703 if (reduc_index
!= -1)
2709 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2710 building a vector of type MASK_TYPE from it) and two input vectors placed in
2711 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2712 shifting by STRIDE elements of DR_CHAIN for every copy.
2713 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2715 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2716 the created stmts must be inserted. */
2719 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2720 tree mask
, int first_vec_indx
, int second_vec_indx
,
2721 gimple_stmt_iterator
*gsi
, slp_tree node
,
2722 tree vectype
, vec
<tree
> dr_chain
,
2723 int ncopies
, int vect_stmts_counter
)
2726 gimple perm_stmt
= NULL
;
2727 stmt_vec_info next_stmt_info
;
2729 tree first_vec
, second_vec
, data_ref
;
2731 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2733 /* Initialize the vect stmts of NODE to properly insert the generated
2735 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
2736 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2737 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
2739 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2740 for (i
= 0; i
< ncopies
; i
++)
2742 first_vec
= dr_chain
[first_vec_indx
];
2743 second_vec
= dr_chain
[second_vec_indx
];
2745 /* Generate the permute statement. */
2746 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, perm_dest
,
2747 first_vec
, second_vec
, mask
);
2748 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2749 gimple_set_lhs (perm_stmt
, data_ref
);
2750 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2752 /* Store the vector statement in NODE. */
2753 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
2755 first_vec_indx
+= stride
;
2756 second_vec_indx
+= stride
;
2759 /* Mark the scalar stmt as vectorized. */
2760 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2761 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2765 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2766 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2767 representation. Check that the mask is valid and return FALSE if not.
2768 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2769 the next vector, i.e., the current first vector is not needed. */
2772 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2773 int mask_nunits
, bool only_one_vec
, int index
,
2774 unsigned char *mask
, int *current_mask_element
,
2775 bool *need_next_vector
, int *number_of_mask_fixes
,
2776 bool *mask_fixed
, bool *needs_first_vector
)
2780 /* Convert to target specific representation. */
2781 *current_mask_element
= first_mask_element
+ m
;
2782 /* Adjust the value in case it's a mask for second and third vectors. */
2783 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2785 if (*current_mask_element
< mask_nunits
)
2786 *needs_first_vector
= true;
2788 /* We have only one input vector to permute but the mask accesses values in
2789 the next vector as well. */
2790 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2792 if (dump_enabled_p ())
2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2795 "permutation requires at least two vectors ");
2796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2802 /* The mask requires the next vector. */
2803 if (*current_mask_element
>= mask_nunits
* 2)
2805 if (*needs_first_vector
|| *mask_fixed
)
2807 /* We either need the first vector too or have already moved to the
2808 next vector. In both cases, this permutation needs three
2810 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2813 "permutation requires at "
2814 "least three vectors ");
2815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2821 /* We move to the next vector, dropping the first one and working with
2822 the second and the third - we need to adjust the values of the mask
2824 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2826 for (i
= 0; i
< index
; i
++)
2827 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2829 (*number_of_mask_fixes
)++;
2833 *need_next_vector
= *mask_fixed
;
2835 /* This was the last element of this mask. Start a new one. */
2836 if (index
== mask_nunits
- 1)
2838 *number_of_mask_fixes
= 1;
2839 *mask_fixed
= false;
2840 *needs_first_vector
= false;
2847 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2848 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2849 permute statements for SLP_NODE_INSTANCE. */
2851 vect_transform_slp_perm_load (gimple stmt
, vec
<tree
> dr_chain
,
2852 gimple_stmt_iterator
*gsi
, int vf
,
2853 slp_instance slp_node_instance
, bool analyze_only
)
2855 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2856 tree mask_element_type
= NULL_TREE
, mask_type
;
2857 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2859 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2860 gimple next_scalar_stmt
;
2861 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2862 int first_mask_element
;
2863 int index
, unroll_factor
, current_mask_element
, ncopies
;
2864 unsigned char *mask
;
2865 bool only_one_vec
= false, need_next_vector
= false;
2866 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2867 int number_of_mask_fixes
= 1;
2868 bool mask_fixed
= false;
2869 bool needs_first_vector
= false;
2870 enum machine_mode mode
;
2872 mode
= TYPE_MODE (vectype
);
2874 if (!can_vec_perm_p (mode
, false, NULL
))
2876 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2879 "no vect permute for ");
2880 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2885 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2886 same size as the vector element being permuted. */
2887 mask_element_type
= lang_hooks
.types
.type_for_mode
2888 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
2889 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2890 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2891 mask
= XALLOCAVEC (unsigned char, nunits
);
2892 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2894 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2895 unrolling factor. */
2896 orig_vec_stmts_num
= group_size
*
2897 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2898 if (orig_vec_stmts_num
== 1)
2899 only_one_vec
= true;
2901 /* Number of copies is determined by the final vectorization factor
2902 relatively to SLP_NODE_INSTANCE unrolling factor. */
2903 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2905 /* Generate permutation masks for every NODE. Number of masks for each NODE
2906 is equal to GROUP_SIZE.
2907 E.g., we have a group of three nodes with three loads from the same
2908 location in each node, and the vector size is 4. I.e., we have a
2909 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2910 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2911 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2914 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2915 The last mask is illegal since we assume two operands for permute
2916 operation, and the mask element values can't be outside that range.
2917 Hence, the last mask must be converted into {2,5,5,5}.
2918 For the first two permutations we need the first and the second input
2919 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2920 we need the second and the third vectors: {b1,c1,a2,b2} and
2923 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2927 vect_stmts_counter
= 0;
2929 first_vec_index
= vec_index
++;
2931 second_vec_index
= first_vec_index
;
2933 second_vec_index
= vec_index
++;
2935 for (j
= 0; j
< unroll_factor
; j
++)
2937 for (k
= 0; k
< group_size
; k
++)
2939 first_mask_element
= i
+ j
* group_size
;
2940 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2941 nunits
, only_one_vec
, index
,
2942 mask
, ¤t_mask_element
,
2944 &number_of_mask_fixes
, &mask_fixed
,
2945 &needs_first_vector
))
2947 mask
[index
++] = current_mask_element
;
2949 if (index
== nunits
)
2951 tree mask_vec
, *mask_elts
;
2954 if (!can_vec_perm_p (mode
, false, mask
))
2956 if (dump_enabled_p ())
2958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2960 "unsupported vect permute { ");
2961 for (i
= 0; i
< nunits
; ++i
)
2962 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
2964 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
2969 mask_elts
= XALLOCAVEC (tree
, nunits
);
2970 for (l
= 0; l
< nunits
; ++l
)
2971 mask_elts
[l
] = build_int_cst (mask_element_type
, mask
[l
]);
2972 mask_vec
= build_vector (mask_type
, mask_elts
);
2977 if (need_next_vector
)
2979 first_vec_index
= second_vec_index
;
2980 second_vec_index
= vec_index
;
2984 = SLP_TREE_SCALAR_STMTS (node
)[scalar_index
++];
2986 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2987 mask_vec
, first_vec_index
, second_vec_index
,
2988 gsi
, node
, vectype
, dr_chain
,
2989 ncopies
, vect_stmts_counter
++);
3001 /* Vectorize SLP instance tree in postorder. */
3004 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3005 unsigned int vectorization_factor
)
3008 bool grouped_store
, is_store
;
3009 gimple_stmt_iterator si
;
3010 stmt_vec_info stmt_info
;
3011 unsigned int vec_stmts_size
, nunits
, group_size
;
3014 slp_tree loads_node
;
3020 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3021 vect_schedule_slp_instance ((slp_tree
) child
, instance
,
3022 vectorization_factor
);
3024 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3025 stmt_info
= vinfo_for_stmt (stmt
);
3027 /* VECTYPE is the type of the destination. */
3028 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3029 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3030 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3032 /* For each SLP instance calculate number of vector stmts to be created
3033 for the scalar stmts in each node of the SLP tree. Number of vector
3034 elements in one vector iteration is the number of scalar elements in
3035 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3037 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3039 /* In case of load permutation we have to allocate vectorized statements for
3040 all the nodes that participate in that permutation. */
3041 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
).exists ())
3043 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
3045 if (!SLP_TREE_VEC_STMTS (loads_node
).exists ())
3047 SLP_TREE_VEC_STMTS (loads_node
).create (vec_stmts_size
);
3048 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
3053 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3055 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3056 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3059 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE
,vect_location
,
3062 "------>vectorizing SLP node starting from: ");
3063 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3066 /* Loads should be inserted before the first load. */
3067 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
3068 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3069 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
3070 && SLP_INSTANCE_LOAD_PERMUTATION (instance
).exists ())
3071 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
3072 else if (is_pattern_stmt_p (stmt_info
))
3073 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
3075 si
= gsi_for_stmt (stmt
);
3077 /* Stores should be inserted just before the last store. */
3078 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3079 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
3081 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
3082 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
3083 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
3084 si
= gsi_for_stmt (last_store
);
3087 /* Mark the first element of the reduction chain as reduction to properly
3088 transform the node. In the analysis phase only the last element of the
3089 chain is marked as reduction. */
3090 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3091 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3093 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3094 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3097 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3101 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3102 For loop vectorization this is done in vectorizable_call, but for SLP
3103 it needs to be deferred until end of vect_schedule_slp, because multiple
3104 SLP instances may refer to the same scalar stmt. */
3107 vect_remove_slp_scalar_calls (slp_tree node
)
3109 gimple stmt
, new_stmt
;
3110 gimple_stmt_iterator gsi
;
3114 stmt_vec_info stmt_info
;
3119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3120 vect_remove_slp_scalar_calls ((slp_tree
) child
);
3122 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3124 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3126 stmt_info
= vinfo_for_stmt (stmt
);
3127 if (stmt_info
== NULL
3128 || is_pattern_stmt_p (stmt_info
)
3129 || !PURE_SLP_STMT (stmt_info
))
3131 lhs
= gimple_call_lhs (stmt
);
3132 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3133 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3134 set_vinfo_for_stmt (stmt
, NULL
);
3135 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3136 gsi
= gsi_for_stmt (stmt
);
3137 gsi_replace (&gsi
, new_stmt
, false);
3138 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3142 /* Generate vector code for all SLP instances in the loop/basic block. */
3145 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3147 vec
<slp_instance
> slp_instances
;
3148 slp_instance instance
;
3149 slp_tree loads_node
;
3150 unsigned int i
, j
, vf
;
3151 bool is_store
= false;
3155 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3156 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3160 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3164 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3166 /* Schedule the tree of INSTANCE. */
3167 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3170 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3171 between SLP instances we fail to properly initialize the
3172 vectorized SLP stmts and confuse different load permutations. */
3173 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), j
, loads_node
)
3175 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node
)[0])) = NULL
;
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_NOTE
, vect_location
,
3179 "vectorizing stmts using SLP.");
3182 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3184 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3187 gimple_stmt_iterator gsi
;
3189 /* Remove scalar call stmts. Do not do this for basic-block
3190 vectorization as not all uses may be vectorized.
3191 ??? Why should this be necessary? DCE should be able to
3192 remove the stmts itself.
3193 ??? For BB vectorization we can as well remove scalar
3194 stmts starting from the SLP tree root if they have no
3197 vect_remove_slp_scalar_calls (root
);
3199 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3200 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3202 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3205 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3206 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3207 /* Free the attached stmt_vec_info and remove the stmt. */
3208 gsi
= gsi_for_stmt (store
);
3209 unlink_stmt_vdef (store
);
3210 gsi_remove (&gsi
, true);
3211 release_defs (store
);
3212 free_stmt_vec_info (store
);
3220 /* Vectorize the basic block. */
3223 vect_slp_transform_bb (basic_block bb
)
3225 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3226 gimple_stmt_iterator si
;
3228 gcc_assert (bb_vinfo
);
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3233 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3235 gimple stmt
= gsi_stmt (si
);
3236 stmt_vec_info stmt_info
;
3238 if (dump_enabled_p ())
3240 dump_printf_loc (MSG_NOTE
, vect_location
,
3241 "------>SLPing statement: ");
3242 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3245 stmt_info
= vinfo_for_stmt (stmt
);
3246 gcc_assert (stmt_info
);
3248 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3249 if (STMT_SLP_TYPE (stmt_info
))
3251 vect_schedule_slp (NULL
, bb_vinfo
);
3256 if (dump_enabled_p ())
3257 dump_printf (MSG_OPTIMIZED_LOCATIONS
, "BASIC BLOCK VECTORIZED\n");
3259 destroy_bb_vec_info (bb_vinfo
);