1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "basic-block.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-expr.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-pass.h"
61 #include "statistics.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
73 #include "recog.h" /* FIXME: for insn_data */
74 #include "insn-codes.h"
76 #include "tree-vectorizer.h"
77 #include "langhooks.h"
78 #include "gimple-walk.h"
80 /* Extract the location of the basic block in the source code.
81 Return the basic block location if succeed and NULL if not. */
84 find_bb_location (basic_block bb
)
87 gimple_stmt_iterator si
;
90 return UNKNOWN_LOCATION
;
92 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
95 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
96 return gimple_location (stmt
);
99 return UNKNOWN_LOCATION
;
103 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
106 vect_free_slp_tree (slp_tree node
)
114 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
115 vect_free_slp_tree (child
);
117 SLP_TREE_CHILDREN (node
).release ();
118 SLP_TREE_SCALAR_STMTS (node
).release ();
119 SLP_TREE_VEC_STMTS (node
).release ();
120 SLP_TREE_LOAD_PERMUTATION (node
).release ();
126 /* Free the memory allocated for the SLP instance. */
129 vect_free_slp_instance (slp_instance instance
)
131 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
132 SLP_INSTANCE_LOADS (instance
).release ();
137 /* Create an SLP node for SCALAR_STMTS. */
140 vect_create_new_slp_node (vec
<gimple
> scalar_stmts
)
143 gimple stmt
= scalar_stmts
[0];
146 if (is_gimple_call (stmt
))
147 nops
= gimple_call_num_args (stmt
);
148 else if (is_gimple_assign (stmt
))
150 nops
= gimple_num_ops (stmt
) - 1;
151 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
157 node
= XNEW (struct _slp_tree
);
158 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
159 SLP_TREE_VEC_STMTS (node
).create (0);
160 SLP_TREE_CHILDREN (node
).create (nops
);
161 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
162 SLP_TREE_TWO_OPERATORS (node
) = false;
168 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
170 static vec
<slp_oprnd_info
>
171 vect_create_oprnd_info (int nops
, int group_size
)
174 slp_oprnd_info oprnd_info
;
175 vec
<slp_oprnd_info
> oprnds_info
;
177 oprnds_info
.create (nops
);
178 for (i
= 0; i
< nops
; i
++)
180 oprnd_info
= XNEW (struct _slp_oprnd_info
);
181 oprnd_info
->def_stmts
.create (group_size
);
182 oprnd_info
->first_dt
= vect_uninitialized_def
;
183 oprnd_info
->first_op_type
= NULL_TREE
;
184 oprnd_info
->first_pattern
= false;
185 oprnd_info
->second_pattern
= false;
186 oprnds_info
.quick_push (oprnd_info
);
193 /* Free operands info. */
196 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
199 slp_oprnd_info oprnd_info
;
201 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
203 oprnd_info
->def_stmts
.release ();
204 XDELETE (oprnd_info
);
207 oprnds_info
.release ();
211 /* Find the place of the data-ref in STMT in the interleaving chain that starts
212 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
215 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
217 gimple next_stmt
= first_stmt
;
220 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
225 if (next_stmt
== stmt
)
227 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
229 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
237 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
238 they are of a valid type and that they match the defs of the first stmt of
239 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
240 return -1, if the error could be corrected by swapping operands of the
241 operation return 1, if everything is ok return 0. */
244 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
245 gimple stmt
, unsigned stmt_num
,
246 vec
<slp_oprnd_info
> *oprnds_info
)
249 unsigned int i
, number_of_oprnds
;
252 enum vect_def_type dt
= vect_uninitialized_def
;
253 struct loop
*loop
= NULL
;
254 bool pattern
= false;
255 slp_oprnd_info oprnd_info
;
256 int first_op_idx
= 1;
257 bool commutative
= false;
258 bool first_op_cond
= false;
259 bool first
= stmt_num
== 0;
260 bool second
= stmt_num
== 1;
263 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
265 if (is_gimple_call (stmt
))
267 number_of_oprnds
= gimple_call_num_args (stmt
);
270 else if (is_gimple_assign (stmt
))
272 enum tree_code code
= gimple_assign_rhs_code (stmt
);
273 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
274 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
276 first_op_cond
= true;
281 commutative
= commutative_tree_code (code
);
286 bool swapped
= false;
287 for (i
= 0; i
< number_of_oprnds
; i
++)
292 if (i
== 0 || i
== 1)
293 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
),
296 oprnd
= gimple_op (stmt
, first_op_idx
+ i
- 1);
299 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
301 oprnd_info
= (*oprnds_info
)[i
];
303 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
306 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "Build SLP failed: can't analyze def for ");
310 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
311 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
317 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
318 from the pattern. Check that all the stmts of the node are in the
320 if (def_stmt
&& gimple_bb (def_stmt
)
321 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
322 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
323 && gimple_code (def_stmt
) != GIMPLE_PHI
))
324 && vinfo_for_stmt (def_stmt
)
325 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
326 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
327 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
330 if (!first
&& !oprnd_info
->first_pattern
331 /* Allow different pattern state for the defs of the
332 first stmt in reduction chains. */
333 && (oprnd_info
->first_dt
!= vect_reduction_def
334 || (!second
&& !oprnd_info
->second_pattern
)))
344 if (dump_enabled_p ())
346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
347 "Build SLP failed: some of the stmts"
348 " are in a pattern, and others are not ");
349 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
350 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
356 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
357 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
359 if (dt
== vect_unknown_def_type
)
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
363 "Unsupported pattern.\n");
367 switch (gimple_code (def_stmt
))
370 def
= gimple_phi_result (def_stmt
);
374 def
= gimple_assign_lhs (def_stmt
);
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
380 "unsupported defining stmt:\n");
386 oprnd_info
->second_pattern
= pattern
;
390 oprnd_info
->first_dt
= dt
;
391 oprnd_info
->first_pattern
= pattern
;
392 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
396 /* Not first stmt of the group, check that the def-stmt/s match
397 the def-stmt/s of the first stmt. Allow different definition
398 types for reduction chains: the first stmt must be a
399 vect_reduction_def (a phi node), and the rest
400 vect_internal_def. */
401 if (((oprnd_info
->first_dt
!= dt
402 && !(oprnd_info
->first_dt
== vect_reduction_def
403 && dt
== vect_internal_def
)
404 && !((oprnd_info
->first_dt
== vect_external_def
405 || oprnd_info
->first_dt
== vect_constant_def
)
406 && (dt
== vect_external_def
407 || dt
== vect_constant_def
)))
408 || !types_compatible_p (oprnd_info
->first_op_type
,
411 /* Try swapping operands if we got a mismatch. */
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
422 "Build SLP failed: different types\n");
428 /* Check the types of the definitions. */
431 case vect_constant_def
:
432 case vect_external_def
:
433 case vect_reduction_def
:
436 case vect_internal_def
:
437 oprnd_info
->def_stmts
.quick_push (def_stmt
);
441 /* FORNOW: Not supported. */
442 if (dump_enabled_p ())
444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
445 "Build SLP failed: illegal type of def ");
446 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, def
);
447 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
459 tree cond
= gimple_assign_rhs1 (stmt
);
460 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
461 &TREE_OPERAND (cond
, 1));
462 TREE_SET_CODE (cond
, swap_tree_comparison (TREE_CODE (cond
)));
465 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
466 gimple_assign_rhs2_ptr (stmt
));
473 /* Verify if the scalar stmts STMTS are isomorphic, require data
474 permutation or are of unsupported types of operation. Return
475 true if they are, otherwise return false and indicate in *MATCHES
476 which stmts are not isomorphic to the first one. If MATCHES[0]
477 is false then this indicates the comparison could not be
478 carried out or the stmts will never be vectorized by SLP. */
481 vect_build_slp_tree_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
482 vec
<gimple
> stmts
, unsigned int group_size
,
483 unsigned nops
, unsigned int *max_nunits
,
484 unsigned int vectorization_factor
, bool *matches
,
488 gimple first_stmt
= stmts
[0], stmt
= stmts
[0];
489 enum tree_code first_stmt_code
= ERROR_MARK
;
490 enum tree_code alt_stmt_code
= ERROR_MARK
;
491 enum tree_code rhs_code
= ERROR_MARK
;
492 enum tree_code first_cond_code
= ERROR_MARK
;
494 bool need_same_oprnds
= false;
495 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
498 machine_mode optab_op2_mode
;
499 machine_mode vec_mode
;
500 struct data_reference
*first_dr
;
502 gimple first_load
= NULL
, prev_first_load
= NULL
, old_first_load
= NULL
;
505 /* For every stmt in NODE find its def stmt/s. */
506 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
510 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
513 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
514 dump_printf (MSG_NOTE
, "\n");
517 /* Fail to vectorize statements marked as unvectorizable. */
518 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
520 if (dump_enabled_p ())
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
523 "Build SLP failed: unvectorizable statement ");
524 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
525 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
527 /* Fatal mismatch. */
532 lhs
= gimple_get_lhs (stmt
);
533 if (lhs
== NULL_TREE
)
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: not GIMPLE_ASSIGN nor "
540 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
541 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
543 /* Fatal mismatch. */
548 if (is_gimple_assign (stmt
)
549 && gimple_assign_rhs_code (stmt
) == COND_EXPR
550 && (cond
= gimple_assign_rhs1 (stmt
))
551 && !COMPARISON_CLASS_P (cond
))
553 if (dump_enabled_p ())
555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
556 "Build SLP failed: condition is not "
558 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
559 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
561 /* Fatal mismatch. */
566 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
567 vectype
= get_vectype_for_scalar_type (scalar_type
);
570 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
573 "Build SLP failed: unsupported data-type ");
574 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
576 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
578 /* Fatal mismatch. */
583 /* If populating the vector type requires unrolling then fail
584 before adjusting *max_nunits for basic-block vectorization. */
586 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
589 "Build SLP failed: unrolling required "
590 "in basic block SLP\n");
591 /* Fatal mismatch. */
596 /* In case of multiple types we need to detect the smallest type. */
597 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
599 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
601 vectorization_factor
= *max_nunits
;
604 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
606 rhs_code
= CALL_EXPR
;
607 if (gimple_call_internal_p (call_stmt
)
608 || gimple_call_tail_p (call_stmt
)
609 || gimple_call_noreturn_p (call_stmt
)
610 || !gimple_call_nothrow_p (call_stmt
)
611 || gimple_call_chain (call_stmt
))
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
616 "Build SLP failed: unsupported call type ");
617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
619 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
621 /* Fatal mismatch. */
627 rhs_code
= gimple_assign_rhs_code (stmt
);
629 /* Check the operation. */
632 first_stmt_code
= rhs_code
;
634 /* Shift arguments should be equal in all the packed stmts for a
635 vector shift with scalar shift operand. */
636 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
637 || rhs_code
== LROTATE_EXPR
638 || rhs_code
== RROTATE_EXPR
)
640 vec_mode
= TYPE_MODE (vectype
);
642 /* First see if we have a vector/vector shift. */
643 optab
= optab_for_tree_code (rhs_code
, vectype
,
647 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
649 /* No vector/vector shift, try for a vector/scalar shift. */
650 optab
= optab_for_tree_code (rhs_code
, vectype
,
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
657 "Build SLP failed: no optab.\n");
658 /* Fatal mismatch. */
662 icode
= (int) optab_handler (optab
, vec_mode
);
663 if (icode
== CODE_FOR_nothing
)
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
668 "op not supported by target.\n");
669 /* Fatal mismatch. */
673 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
674 if (!VECTOR_MODE_P (optab_op2_mode
))
676 need_same_oprnds
= true;
677 first_op1
= gimple_assign_rhs2 (stmt
);
681 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
683 need_same_oprnds
= true;
684 first_op1
= gimple_assign_rhs2 (stmt
);
689 if (first_stmt_code
!= rhs_code
690 && alt_stmt_code
== ERROR_MARK
)
691 alt_stmt_code
= rhs_code
;
692 if (first_stmt_code
!= rhs_code
693 && (first_stmt_code
!= IMAGPART_EXPR
694 || rhs_code
!= REALPART_EXPR
)
695 && (first_stmt_code
!= REALPART_EXPR
696 || rhs_code
!= IMAGPART_EXPR
)
697 /* Handle mismatches in plus/minus by computing both
698 and merging the results. */
699 && !((first_stmt_code
== PLUS_EXPR
700 || first_stmt_code
== MINUS_EXPR
)
701 && (alt_stmt_code
== PLUS_EXPR
702 || alt_stmt_code
== MINUS_EXPR
)
703 && rhs_code
== alt_stmt_code
)
704 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
705 && (first_stmt_code
== ARRAY_REF
706 || first_stmt_code
== BIT_FIELD_REF
707 || first_stmt_code
== INDIRECT_REF
708 || first_stmt_code
== COMPONENT_REF
709 || first_stmt_code
== MEM_REF
)))
711 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "Build SLP failed: different operation "
716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
719 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
727 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
729 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
732 "Build SLP failed: different shift "
734 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
735 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
741 if (rhs_code
== CALL_EXPR
)
743 gimple first_stmt
= stmts
[0];
744 if (gimple_call_num_args (stmt
) != nops
745 || !operand_equal_p (gimple_call_fn (first_stmt
),
746 gimple_call_fn (stmt
), 0)
747 || gimple_call_fntype (first_stmt
)
748 != gimple_call_fntype (stmt
))
750 if (dump_enabled_p ())
752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
753 "Build SLP failed: different calls in ");
754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
756 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
764 /* Grouped store or load. */
765 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
767 if (REFERENCE_CLASS_P (lhs
))
775 unsigned unrolling_factor
776 = least_common_multiple
777 (*max_nunits
, group_size
) / group_size
;
778 /* FORNOW: Check that there is no gap between the loads
779 and no gap between the groups when we need to load
780 multiple groups at once.
781 ??? We should enhance this to only disallow gaps
783 if ((unrolling_factor
> 1
784 && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
785 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
786 /* If the group is split up then GROUP_GAP
787 isn't correct here, nor is GROUP_FIRST_ELEMENT. */
788 || GROUP_SIZE (vinfo_for_stmt (stmt
)) > group_size
))
789 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
790 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
795 "Build SLP failed: grouped "
797 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
799 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
801 /* Fatal mismatch. */
806 /* Check that the size of interleaved loads group is not
807 greater than the SLP group size. */
809 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
811 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
812 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
813 - GROUP_GAP (vinfo_for_stmt (stmt
)))
814 > ncopies
* group_size
))
816 if (dump_enabled_p ())
818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
819 "Build SLP failed: the number "
820 "of interleaved loads is greater than "
821 "the SLP group size ");
822 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
824 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
826 /* Fatal mismatch. */
831 old_first_load
= first_load
;
832 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
835 /* Check that there are no loads from different interleaving
836 chains in the same node. */
837 if (prev_first_load
!= first_load
)
839 if (dump_enabled_p ())
841 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
843 "Build SLP failed: different "
844 "interleaving chains in one node ");
845 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
847 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
854 prev_first_load
= first_load
;
856 /* In some cases a group of loads is just the same load
857 repeated N times. Only analyze its cost once. */
858 if (first_load
== stmt
&& old_first_load
!= first_load
)
860 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
861 if (vect_supportable_dr_alignment (first_dr
, false)
862 == dr_unaligned_unsupported
)
864 if (dump_enabled_p ())
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
868 "Build SLP failed: unsupported "
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
872 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
874 /* Fatal mismatch. */
880 } /* Grouped access. */
883 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
885 /* Not grouped load. */
886 if (dump_enabled_p ())
888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
889 "Build SLP failed: not grouped load ");
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
891 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
894 /* FORNOW: Not grouped loads are not supported. */
895 /* Fatal mismatch. */
900 /* Not memory operation. */
901 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
902 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
903 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
904 && rhs_code
!= CALL_EXPR
)
906 if (dump_enabled_p ())
908 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
909 "Build SLP failed: operation");
910 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
911 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
912 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
914 /* Fatal mismatch. */
919 if (rhs_code
== COND_EXPR
)
921 tree cond_expr
= gimple_assign_rhs1 (stmt
);
924 first_cond_code
= TREE_CODE (cond_expr
);
925 else if (first_cond_code
!= TREE_CODE (cond_expr
))
927 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
930 "Build SLP failed: different"
932 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
934 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
945 for (i
= 0; i
< group_size
; ++i
)
949 /* If we allowed a two-operation SLP node verify the target can cope
950 with the permute we are going to use. */
951 if (alt_stmt_code
!= ERROR_MARK
952 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
955 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
956 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
959 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
960 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
962 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
964 for (i
= 0; i
< group_size
; ++i
)
965 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
968 if (dump_enabled_p ())
970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
971 "Build SLP failed: different operation "
973 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
977 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
983 *two_operators
= true;
989 /* Recursively build an SLP tree starting from NODE.
990 Fail (and return a value not equal to zero) if def-stmts are not
991 isomorphic, require data permutation or are of unsupported types of
992 operation. Otherwise, return 0.
993 The value returned is the depth in the SLP tree where a mismatch
997 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
998 slp_tree
*node
, unsigned int group_size
,
999 unsigned int *max_nunits
,
1000 vec
<slp_tree
> *loads
,
1001 unsigned int vectorization_factor
,
1002 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1003 unsigned max_tree_size
)
1005 unsigned nops
, i
, this_tree_size
= 0;
1010 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
1011 if (is_gimple_call (stmt
))
1012 nops
= gimple_call_num_args (stmt
);
1013 else if (is_gimple_assign (stmt
))
1015 nops
= gimple_num_ops (stmt
) - 1;
1016 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1022 bool two_operators
= false;
1023 if (!vect_build_slp_tree_1 (loop_vinfo
, bb_vinfo
,
1024 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
1025 max_nunits
, vectorization_factor
, matches
,
1028 SLP_TREE_TWO_OPERATORS (*node
) = two_operators
;
1030 /* If the SLP node is a load, terminate the recursion. */
1031 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1032 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1034 loads
->safe_push (*node
);
1038 /* Get at the operands, verifying they are compatible. */
1039 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1040 slp_oprnd_info oprnd_info
;
1041 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
1043 switch (vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
,
1044 stmt
, i
, &oprnds_info
))
1050 vect_free_oprnd_info (oprnds_info
);
1057 for (i
= 0; i
< group_size
; ++i
)
1060 vect_free_oprnd_info (oprnds_info
);
1064 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
1066 /* Create SLP_TREE nodes for the definition node/s. */
1067 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1070 unsigned old_nloads
= loads
->length ();
1071 unsigned old_max_nunits
= *max_nunits
;
1073 if (oprnd_info
->first_dt
!= vect_internal_def
)
1076 if (++this_tree_size
> max_tree_size
)
1078 vect_free_oprnd_info (oprnds_info
);
1082 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1085 vect_free_oprnd_info (oprnds_info
);
1089 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
1090 group_size
, max_nunits
, loads
,
1091 vectorization_factor
, matches
,
1092 npermutes
, &this_tree_size
, max_tree_size
))
1094 /* If we have all children of child built up from scalars then just
1095 throw that away and build it up this node from scalars. */
1096 if (!SLP_TREE_CHILDREN (child
).is_empty ())
1099 slp_tree grandchild
;
1101 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1102 if (grandchild
!= NULL
)
1107 *max_nunits
= old_max_nunits
;
1108 loads
->truncate (old_nloads
);
1109 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1110 vect_free_slp_tree (grandchild
);
1111 SLP_TREE_CHILDREN (child
).truncate (0);
1113 dump_printf_loc (MSG_NOTE
, vect_location
,
1114 "Building parent vector operands from "
1115 "scalars instead\n");
1116 oprnd_info
->def_stmts
= vNULL
;
1117 vect_free_slp_tree (child
);
1118 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1123 oprnd_info
->def_stmts
= vNULL
;
1124 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1128 /* If the SLP build failed fatally and we analyze a basic-block
1129 simply treat nodes we fail to build as externally defined
1130 (and thus build vectors from the scalar defs).
1131 The cost model will reject outright expensive cases.
1132 ??? This doesn't treat cases where permutation ultimatively
1133 fails (or we don't try permutation below). Ideally we'd
1134 even compute a permutation that will end up with the maximum
1138 /* ??? Rejecting patterns this way doesn't work. We'd have to
1139 do extra work to cancel the pattern so the uses see the
1141 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1144 slp_tree grandchild
;
1147 *max_nunits
= old_max_nunits
;
1148 loads
->truncate (old_nloads
);
1149 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1150 vect_free_slp_tree (grandchild
);
1151 SLP_TREE_CHILDREN (child
).truncate (0);
1153 dump_printf_loc (MSG_NOTE
, vect_location
,
1154 "Building vector operands from scalars\n");
1155 oprnd_info
->def_stmts
= vNULL
;
1156 vect_free_slp_tree (child
);
1157 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1161 /* If the SLP build for operand zero failed and operand zero
1162 and one can be commutated try that for the scalar stmts
1163 that failed the match. */
1165 /* A first scalar stmt mismatch signals a fatal mismatch. */
1167 /* ??? For COND_EXPRs we can swap the comparison operands
1168 as well as the arms under some constraints. */
1170 && oprnds_info
[1]->first_dt
== vect_internal_def
1171 && is_gimple_assign (stmt
)
1172 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1173 && !SLP_TREE_TWO_OPERATORS (*node
)
1174 /* Do so only if the number of not successful permutes was nor more
1175 than a cut-ff as re-trying the recursive match on
1176 possibly each level of the tree would expose exponential
1181 slp_tree grandchild
;
1184 *max_nunits
= old_max_nunits
;
1185 loads
->truncate (old_nloads
);
1186 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1187 vect_free_slp_tree (grandchild
);
1188 SLP_TREE_CHILDREN (child
).truncate (0);
1190 /* Swap mismatched definition stmts. */
1191 dump_printf_loc (MSG_NOTE
, vect_location
,
1192 "Re-trying with swapped operands of stmts ");
1193 for (j
= 0; j
< group_size
; ++j
)
1196 gimple tem
= oprnds_info
[0]->def_stmts
[j
];
1197 oprnds_info
[0]->def_stmts
[j
] = oprnds_info
[1]->def_stmts
[j
];
1198 oprnds_info
[1]->def_stmts
[j
] = tem
;
1199 dump_printf (MSG_NOTE
, "%d ", j
);
1201 dump_printf (MSG_NOTE
, "\n");
1202 /* And try again with scratch 'matches' ... */
1203 bool *tem
= XALLOCAVEC (bool, group_size
);
1204 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
1205 group_size
, max_nunits
, loads
,
1206 vectorization_factor
,
1207 tem
, npermutes
, &this_tree_size
,
1210 /* ... so if successful we can apply the operand swapping
1211 to the GIMPLE IL. This is necessary because for example
1212 vect_get_slp_defs uses operand indexes and thus expects
1213 canonical operand order. */
1214 for (j
= 0; j
< group_size
; ++j
)
1217 gimple stmt
= SLP_TREE_SCALAR_STMTS (*node
)[j
];
1218 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1219 gimple_assign_rhs2_ptr (stmt
));
1221 oprnd_info
->def_stmts
= vNULL
;
1222 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1229 oprnd_info
->def_stmts
= vNULL
;
1230 vect_free_slp_tree (child
);
1231 vect_free_oprnd_info (oprnds_info
);
1236 *tree_size
+= this_tree_size
;
1238 vect_free_oprnd_info (oprnds_info
);
1242 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1245 vect_print_slp_tree (int dump_kind
, slp_tree node
)
1254 dump_printf (dump_kind
, "node ");
1255 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1257 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
1258 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1260 dump_printf (dump_kind
, "\n");
1262 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1263 vect_print_slp_tree (dump_kind
, child
);
1267 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1268 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1269 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1270 stmts in NODE are to be marked. */
1273 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1282 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1283 if (j
< 0 || i
== j
)
1284 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1286 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1287 vect_mark_slp_stmts (child
, mark
, j
);
1291 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1294 vect_mark_slp_stmts_relevant (slp_tree node
)
1298 stmt_vec_info stmt_info
;
1304 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1306 stmt_info
= vinfo_for_stmt (stmt
);
1307 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1308 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1309 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1313 vect_mark_slp_stmts_relevant (child
);
1317 /* Rearrange the statements of NODE according to PERMUTATION. */
1320 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1321 vec
<unsigned> permutation
)
1324 vec
<gimple
> tmp_stmts
;
1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1329 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1331 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1332 tmp_stmts
.create (group_size
);
1333 tmp_stmts
.quick_grow_cleared (group_size
);
1335 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1336 tmp_stmts
[permutation
[i
]] = stmt
;
1338 SLP_TREE_SCALAR_STMTS (node
).release ();
1339 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1343 /* Check if the required load permutations in the SLP instance
1344 SLP_INSTN are supported. */
1347 vect_supported_load_permutation_p (slp_instance slp_instn
)
1349 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1350 unsigned int i
, j
, k
, next
;
1353 gimple stmt
, load
, next_load
, first_load
;
1354 struct data_reference
*dr
;
1356 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1359 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1360 if (node
->load_permutation
.exists ())
1361 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1362 dump_printf (MSG_NOTE
, "%d ", next
);
1364 for (k
= 0; k
< group_size
; ++k
)
1365 dump_printf (MSG_NOTE
, "%d ", k
);
1366 dump_printf (MSG_NOTE
, "\n");
1369 /* In case of reduction every load permutation is allowed, since the order
1370 of the reduction statements is not important (as opposed to the case of
1371 grouped stores). The only condition we need to check is that all the
1372 load nodes are of the same size and have the same permutation (and then
1373 rearrange all the nodes of the SLP instance according to this
1376 /* Check that all the load nodes are of the same size. */
1377 /* ??? Can't we assert this? */
1378 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1379 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1382 node
= SLP_INSTANCE_TREE (slp_instn
);
1383 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1385 /* Reduction (there are no data-refs in the root).
1386 In reduction chain the order of the loads is important. */
1387 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1388 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1393 /* Compare all the permutation sequences to the first one. We know
1394 that at least one load is permuted. */
1395 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1396 if (!node
->load_permutation
.exists ())
1398 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1400 if (!load
->load_permutation
.exists ())
1402 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1403 if (lidx
!= node
->load_permutation
[j
])
1407 /* Check that the loads in the first sequence are different and there
1408 are no gaps between them. */
1409 load_index
= sbitmap_alloc (group_size
);
1410 bitmap_clear (load_index
);
1411 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1413 if (bitmap_bit_p (load_index
, lidx
))
1415 sbitmap_free (load_index
);
1418 bitmap_set_bit (load_index
, lidx
);
1420 for (i
= 0; i
< group_size
; i
++)
1421 if (!bitmap_bit_p (load_index
, i
))
1423 sbitmap_free (load_index
);
1426 sbitmap_free (load_index
);
1428 /* This permutation is valid for reduction. Since the order of the
1429 statements in the nodes is not important unless they are memory
1430 accesses, we can rearrange the statements in all the nodes
1431 according to the order of the loads. */
1432 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1433 node
->load_permutation
);
1435 /* We are done, no actual permutations need to be generated. */
1436 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1437 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1441 /* In basic block vectorization we allow any subchain of an interleaving
1443 FORNOW: not supported in loop SLP because of realignment compications. */
1444 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1446 /* Check whether the loads in an instance form a subchain and thus
1447 no permutation is necessary. */
1448 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1450 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1452 bool subchain_p
= true;
1454 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1456 if (j
!= 0 && next_load
!= load
)
1461 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1464 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1467 /* Verify the permutation can be generated. */
1469 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1470 1, slp_instn
, true))
1472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1474 "unsupported load permutation\n");
1480 /* Check that the alignment of the first load in every subchain, i.e.,
1481 the first statement in every load node, is supported.
1482 ??? This belongs in alignment checking. */
1483 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1485 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1486 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1488 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1489 if (vect_supportable_dr_alignment (dr
, false)
1490 == dr_unaligned_unsupported
)
1492 if (dump_enabled_p ())
1494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1496 "unsupported unaligned load ");
1497 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1499 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1509 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1510 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1511 well (unless it's reduction). */
1512 if (SLP_INSTANCE_LOADS (slp_instn
).length () != group_size
)
1514 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1515 if (!node
->load_permutation
.exists ())
1518 load_index
= sbitmap_alloc (group_size
);
1519 bitmap_clear (load_index
);
1520 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1522 unsigned int lidx
= node
->load_permutation
[0];
1523 if (bitmap_bit_p (load_index
, lidx
))
1525 sbitmap_free (load_index
);
1528 bitmap_set_bit (load_index
, lidx
);
1529 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, k
)
1532 sbitmap_free (load_index
);
1536 for (i
= 0; i
< group_size
; i
++)
1537 if (!bitmap_bit_p (load_index
, i
))
1539 sbitmap_free (load_index
);
1542 sbitmap_free (load_index
);
1544 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1545 if (node
->load_permutation
.exists ()
1546 && !vect_transform_slp_perm_load
1548 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1554 /* Find the last store in SLP INSTANCE. */
1557 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1559 gimple last
= NULL
, stmt
;
1561 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1563 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1564 if (is_pattern_stmt_p (stmt_vinfo
))
1565 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1567 last
= get_later_stmt (stmt
, last
);
1573 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1576 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1577 stmt_vector_for_cost
*prologue_cost_vec
,
1578 stmt_vector_for_cost
*body_cost_vec
,
1579 unsigned ncopies_for_cost
)
1584 stmt_vec_info stmt_info
;
1586 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1588 /* Recurse down the SLP tree. */
1589 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1591 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1592 body_cost_vec
, ncopies_for_cost
);
1594 /* Look at the first scalar stmt to determine the cost. */
1595 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1596 stmt_info
= vinfo_for_stmt (stmt
);
1597 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1599 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1600 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1601 vect_uninitialized_def
,
1602 node
, prologue_cost_vec
, body_cost_vec
);
1606 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1607 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1608 node
, prologue_cost_vec
, body_cost_vec
);
1609 /* If the load is permuted record the cost for the permutation.
1610 ??? Loads from multiple chains are let through here only
1611 for a single special case involving complex numbers where
1612 in the end no permutation is necessary. */
1613 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1614 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1615 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1616 && vect_get_place_in_interleaving_chain
1617 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1619 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1620 stmt_info
, 0, vect_body
);
1627 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1628 stmt_info
, 0, vect_body
);
1629 if (SLP_TREE_TWO_OPERATORS (node
))
1631 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1632 stmt_info
, 0, vect_body
);
1633 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1634 stmt_info
, 0, vect_body
);
1638 /* Scan operands and account for prologue cost of constants/externals.
1639 ??? This over-estimates cost for multiple uses and should be
1641 lhs
= gimple_get_lhs (stmt
);
1642 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1644 tree def
, op
= gimple_op (stmt
, i
);
1646 enum vect_def_type dt
;
1647 if (!op
|| op
== lhs
)
1649 if (vect_is_simple_use (op
, NULL
, STMT_VINFO_LOOP_VINFO (stmt_info
),
1650 STMT_VINFO_BB_VINFO (stmt_info
),
1651 &def_stmt
, &def
, &dt
))
1653 /* Without looking at the actual initializer a vector of
1654 constants can be implemented as load from the constant pool.
1655 ??? We need to pass down stmt_info for a vector type
1656 even if it points to the wrong stmt. */
1657 if (dt
== vect_constant_def
)
1658 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1659 stmt_info
, 0, vect_prologue
);
1660 else if (dt
== vect_external_def
)
1661 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1662 stmt_info
, 0, vect_prologue
);
1667 /* Compute the cost for the SLP instance INSTANCE. */
1670 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1672 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1673 unsigned ncopies_for_cost
;
1674 stmt_info_for_cost
*si
;
1677 /* Calculate the number of vector stmts to create based on the unrolling
1678 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1679 GROUP_SIZE / NUNITS otherwise. */
1680 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1681 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1682 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1683 /* Adjust the group_size by the vectorization factor which is always one
1684 for basic-block vectorization. */
1685 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1686 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1687 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1688 /* For reductions look at a reduction operand in case the reduction
1689 operation is widening like DOT_PROD or SAD. */
1690 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1692 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1693 switch (gimple_assign_rhs_code (stmt
))
1697 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1698 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1703 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1705 prologue_cost_vec
.create (10);
1706 body_cost_vec
.create (10);
1707 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1708 &prologue_cost_vec
, &body_cost_vec
,
1711 /* Record the prologue costs, which were delayed until we were
1712 sure that SLP was successful. */
1713 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1715 struct _stmt_vec_info
*stmt_info
1716 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1717 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1718 si
->misalign
, vect_prologue
);
1721 /* Record the instance's instructions in the target cost model. */
1722 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1724 struct _stmt_vec_info
*stmt_info
1725 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1726 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1727 si
->misalign
, vect_body
);
1730 prologue_cost_vec
.release ();
1731 body_cost_vec
.release ();
1734 /* Analyze an SLP instance starting from a group of grouped stores. Call
1735 vect_build_slp_tree to build a tree of packed stmts if possible.
1736 Return FALSE if it's impossible to SLP any stmt in the loop. */
1739 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1740 gimple stmt
, unsigned max_tree_size
)
1742 slp_instance new_instance
;
1744 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1745 unsigned int unrolling_factor
= 1, nunits
;
1746 tree vectype
, scalar_type
= NULL_TREE
;
1748 unsigned int vectorization_factor
= 0;
1750 unsigned int max_nunits
= 0;
1751 vec
<slp_tree
> loads
;
1752 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1753 vec
<gimple
> scalar_stmts
;
1755 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1759 scalar_type
= TREE_TYPE (DR_REF (dr
));
1760 vectype
= get_vectype_for_scalar_type (scalar_type
);
1764 gcc_assert (loop_vinfo
);
1765 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1768 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1772 gcc_assert (loop_vinfo
);
1773 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1774 group_size
= LOOP_VINFO_REDUCTIONS (loop_vinfo
).length ();
1779 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1782 "Build SLP failed: unsupported data-type ");
1783 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1784 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1790 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1792 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1794 vectorization_factor
= nunits
;
1796 /* Calculate the unrolling factor. */
1797 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1798 if (unrolling_factor
!= 1 && !loop_vinfo
)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1802 "Build SLP failed: unrolling required in basic"
1808 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1809 scalar_stmts
.create (group_size
);
1811 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1813 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1816 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1817 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1818 scalar_stmts
.safe_push (
1819 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1821 scalar_stmts
.safe_push (next
);
1822 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1824 /* Mark the first element of the reduction chain as reduction to properly
1825 transform the node. In the reduction analysis phase only the last
1826 element of the chain is marked as reduction. */
1827 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1828 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1832 /* Collect reduction statements. */
1833 vec
<gimple
> reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1834 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1835 scalar_stmts
.safe_push (next
);
1838 node
= vect_create_new_slp_node (scalar_stmts
);
1840 loads
.create (group_size
);
1842 /* Build the tree for the SLP instance. */
1843 bool *matches
= XALLOCAVEC (bool, group_size
);
1844 unsigned npermutes
= 0;
1845 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1846 &max_nunits
, &loads
,
1847 vectorization_factor
, matches
, &npermutes
, NULL
,
1850 /* Calculate the unrolling factor based on the smallest type. */
1851 if (max_nunits
> nunits
)
1852 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1855 if (unrolling_factor
!= 1 && !loop_vinfo
)
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1859 "Build SLP failed: unrolling required in basic"
1861 vect_free_slp_tree (node
);
1866 /* Create a new SLP instance. */
1867 new_instance
= XNEW (struct _slp_instance
);
1868 SLP_INSTANCE_TREE (new_instance
) = node
;
1869 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1870 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1871 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1873 /* Compute the load permutation. */
1875 bool loads_permuted
= false;
1876 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1878 vec
<unsigned> load_permutation
;
1880 gimple load
, first_stmt
;
1881 bool this_load_permuted
= false;
1882 load_permutation
.create (group_size
);
1883 first_stmt
= GROUP_FIRST_ELEMENT
1884 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1885 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1888 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1889 gcc_assert (load_place
!= -1);
1890 if (load_place
!= j
)
1891 this_load_permuted
= true;
1892 load_permutation
.safe_push (load_place
);
1894 if (!this_load_permuted
)
1896 load_permutation
.release ();
1899 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1900 loads_permuted
= true;
1905 if (!vect_supported_load_permutation_p (new_instance
))
1907 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1910 "Build SLP failed: unsupported load "
1912 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1913 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1915 vect_free_slp_instance (new_instance
);
1922 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).safe_push (new_instance
);
1924 BB_VINFO_SLP_INSTANCES (bb_vinfo
).safe_push (new_instance
);
1926 if (dump_enabled_p ())
1927 vect_print_slp_tree (MSG_NOTE
, node
);
1932 /* Failed to SLP. */
1933 /* Free the allocated memory. */
1934 vect_free_slp_tree (node
);
1941 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1942 trees of packed scalar stmts if SLP is possible. */
1945 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1946 unsigned max_tree_size
)
1949 vec
<gimple
> grouped_stores
;
1950 vec
<gimple
> reductions
= vNULL
;
1951 vec
<gimple
> reduc_chains
= vNULL
;
1952 gimple first_element
;
1955 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
1960 grouped_stores
= LOOP_VINFO_GROUPED_STORES (loop_vinfo
);
1961 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1962 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1965 grouped_stores
= BB_VINFO_GROUPED_STORES (bb_vinfo
);
1967 /* Find SLP sequences starting from groups of grouped stores. */
1968 FOR_EACH_VEC_ELT (grouped_stores
, i
, first_element
)
1969 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
,
1973 if (reduc_chains
.length () > 0)
1975 /* Find SLP sequences starting from reduction chains. */
1976 FOR_EACH_VEC_ELT (reduc_chains
, i
, first_element
)
1977 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
,
1983 /* Don't try to vectorize SLP reductions if reduction chain was
1988 /* Find SLP sequences starting from groups of reductions. */
1989 if (reductions
.length () > 1
1990 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, reductions
[0],
1998 /* For each possible SLP instance decide whether to SLP it and calculate overall
1999 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2000 least one instance. */
2003 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2005 unsigned int i
, unrolling_factor
= 1;
2006 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2007 slp_instance instance
;
2008 int decided_to_slp
= 0;
2010 if (dump_enabled_p ())
2011 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2014 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2016 /* FORNOW: SLP if you can. */
2017 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
2018 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2020 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2021 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2022 loop-based vectorization. Such stmts will be marked as HYBRID. */
2023 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2027 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2029 if (decided_to_slp
&& dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE
, vect_location
,
2031 "Decided to SLP %d instances. Unrolling factor %d\n",
2032 decided_to_slp
, unrolling_factor
);
2034 return (decided_to_slp
> 0);
2038 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2039 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2042 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2044 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2045 imm_use_iterator imm_iter
;
2047 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2049 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2050 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2053 /* Propagate hybrid down the SLP tree. */
2054 if (stype
== hybrid
)
2056 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2060 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2061 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2062 /* We always get the pattern stmt here, but for immediate
2063 uses we have to use the LHS of the original stmt. */
2064 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
2065 if (STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2066 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2067 if (TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
2068 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
2070 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2072 use_vinfo
= vinfo_for_stmt (use_stmt
);
2073 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2074 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2075 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2076 if (!STMT_SLP_TYPE (use_vinfo
)
2077 && (STMT_VINFO_RELEVANT (use_vinfo
)
2078 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2079 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2080 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2085 if (stype
== hybrid
)
2087 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2090 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2092 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2095 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2097 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2100 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2103 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2105 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2106 struct loop
*loopp
= (struct loop
*)wi
->info
;
2111 if (TREE_CODE (*tp
) == SSA_NAME
2112 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2114 gimple def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2115 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2116 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2118 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2121 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2123 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2131 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2134 /* If the stmt is in a SLP instance then this isn't a reason
2135 to mark use definitions in other SLP instances as hybrid. */
2136 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi
))) != loop_vect
)
2141 /* Find stmts that must be both vectorized and SLPed. */
2144 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2147 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2148 slp_instance instance
;
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2154 /* First walk all pattern stmt in the loop and mark defs of uses as
2155 hybrid because immediate uses in them are not recorded. */
2156 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2158 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2159 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2162 gimple stmt
= gsi_stmt (gsi
);
2163 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2164 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2167 memset (&wi
, 0, sizeof (wi
));
2168 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2169 gimple_stmt_iterator gsi2
2170 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2171 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2172 vect_detect_hybrid_slp_1
, &wi
);
2173 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2174 vect_detect_hybrid_slp_2
,
2175 vect_detect_hybrid_slp_1
, &wi
);
2180 /* Then walk the SLP instance trees marking stmts with uses in
2181 non-SLP stmts as hybrid, also propagating hybrid down the
2182 SLP tree, collecting the above info on-the-fly. */
2183 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2185 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2186 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2192 /* Create and initialize a new bb_vec_info struct for BB, as well as
2193 stmt_vec_info structs for all the stmts in it. */
2196 new_bb_vec_info (basic_block bb
)
2198 bb_vec_info res
= NULL
;
2199 gimple_stmt_iterator gsi
;
2201 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
2202 BB_VINFO_BB (res
) = bb
;
2204 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2206 gimple stmt
= gsi_stmt (gsi
);
2207 gimple_set_uid (stmt
, 0);
2208 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
2211 BB_VINFO_GROUPED_STORES (res
).create (10);
2212 BB_VINFO_SLP_INSTANCES (res
).create (2);
2213 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
2220 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2221 stmts in the basic block. */
2224 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
2226 vec
<slp_instance
> slp_instances
;
2227 slp_instance instance
;
2229 gimple_stmt_iterator si
;
2235 bb
= BB_VINFO_BB (bb_vinfo
);
2237 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2239 gimple stmt
= gsi_stmt (si
);
2240 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2243 /* Free stmt_vec_info. */
2244 free_stmt_vec_info (stmt
);
2247 vect_destroy_datarefs (NULL
, bb_vinfo
);
2248 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
2249 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
2250 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2251 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2252 vect_free_slp_instance (instance
);
2253 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
2254 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
2260 /* Analyze statements contained in SLP tree node after recursively analyzing
2261 the subtree. Return TRUE if the operations are supported. */
2264 vect_slp_analyze_node_operations (slp_tree node
)
2274 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2275 if (!vect_slp_analyze_node_operations (child
))
2278 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2280 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2281 gcc_assert (stmt_info
);
2282 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2284 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
2292 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2293 operations are supported. */
2296 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2298 slp_instance instance
;
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_NOTE
, vect_location
,
2303 "=== vect_slp_analyze_operations ===\n");
2305 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2307 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
)))
2309 dump_printf_loc (MSG_NOTE
, vect_location
,
2310 "removing SLP instance operations starting from: ");
2311 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2312 SLP_TREE_SCALAR_STMTS
2313 (SLP_INSTANCE_TREE (instance
))[0], 0);
2314 vect_free_slp_instance (instance
);
2315 slp_instances
.ordered_remove (i
);
2319 /* Compute the costs of the SLP instance. */
2320 vect_analyze_slp_cost (instance
, data
);
2325 if (!slp_instances
.length ())
2332 /* Compute the scalar cost of the SLP node NODE and its children
2333 and return it. Do not account defs that are marked in LIFE and
2334 update LIFE according to uses of NODE. */
2337 vect_bb_slp_scalar_cost (basic_block bb
,
2338 slp_tree node
, vec
<bool, va_heap
> *life
)
2340 unsigned scalar_cost
= 0;
2345 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2348 ssa_op_iter op_iter
;
2349 def_operand_p def_p
;
2350 stmt_vec_info stmt_info
;
2355 /* If there is a non-vectorized use of the defs then the scalar
2356 stmt is kept live in which case we do not account it or any
2357 required defs in the SLP children in the scalar cost. This
2358 way we make the vectorization more costly when compared to
2360 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2362 imm_use_iterator use_iter
;
2364 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2365 if (!is_gimple_debug (use_stmt
)
2366 && (gimple_code (use_stmt
) == GIMPLE_PHI
2367 || gimple_bb (use_stmt
) != bb
2368 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt
))))
2371 BREAK_FROM_IMM_USE_STMT (use_iter
);
2377 stmt_info
= vinfo_for_stmt (stmt
);
2378 if (STMT_VINFO_DATA_REF (stmt_info
))
2380 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2381 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2383 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2386 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2388 scalar_cost
+= stmt_cost
;
2391 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2393 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2398 /* Check if vectorization of the basic block is profitable. */
2401 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2403 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2404 slp_instance instance
;
2406 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2407 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2409 /* Calculate scalar cost. */
2410 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2412 auto_vec
<bool, 20> life
;
2413 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2414 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2415 SLP_INSTANCE_TREE (instance
),
2419 /* Complete the target-specific cost calculation. */
2420 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2421 &vec_inside_cost
, &vec_epilogue_cost
);
2423 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2425 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2428 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2430 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2431 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2432 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2435 /* Vectorization is profitable if its cost is less than the cost of scalar
2437 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2443 /* Check if the basic block can be vectorized. */
2446 vect_slp_analyze_bb_1 (basic_block bb
)
2448 bb_vec_info bb_vinfo
;
2449 vec
<slp_instance
> slp_instances
;
2450 slp_instance instance
;
2453 unsigned n_stmts
= 0;
2455 bb_vinfo
= new_bb_vec_info (bb
);
2459 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
, &n_stmts
))
2461 if (dump_enabled_p ())
2462 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2463 "not vectorized: unhandled data-ref in basic "
2466 destroy_bb_vec_info (bb_vinfo
);
2470 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2474 "not vectorized: not enough data-refs in "
2477 destroy_bb_vec_info (bb_vinfo
);
2481 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2485 "not vectorized: unhandled data access in "
2488 destroy_bb_vec_info (bb_vinfo
);
2492 vect_pattern_recog (NULL
, bb_vinfo
);
2494 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2498 "not vectorized: bad data alignment in basic "
2501 destroy_bb_vec_info (bb_vinfo
);
2505 /* Check the SLP opportunities in the basic block, analyze and build SLP
2507 if (!vect_analyze_slp (NULL
, bb_vinfo
, n_stmts
))
2509 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2512 "Failed to SLP the basic block.\n");
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "not vectorized: failed to find SLP opportunities "
2515 "in basic block.\n");
2518 destroy_bb_vec_info (bb_vinfo
);
2522 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2524 /* Mark all the statements that we want to vectorize as pure SLP and
2526 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2528 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2529 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2532 /* Mark all the statements that we do not want to vectorize. */
2533 for (gimple_stmt_iterator gsi
= gsi_start_bb (BB_VINFO_BB (bb_vinfo
));
2534 !gsi_end_p (gsi
); gsi_next (&gsi
))
2536 stmt_vec_info vinfo
= vinfo_for_stmt (gsi_stmt (gsi
));
2537 if (STMT_SLP_TYPE (vinfo
) != pure_slp
)
2538 STMT_VINFO_VECTORIZABLE (vinfo
) = false;
2541 /* Analyze dependences. At this point all stmts not participating in
2542 vectorization have to be marked. Dependence analysis assumes
2543 that we either vectorize all SLP instances or none at all. */
2544 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2548 "not vectorized: unhandled data dependence "
2549 "in basic block.\n");
2551 destroy_bb_vec_info (bb_vinfo
);
2555 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2559 "not vectorized: unsupported alignment in basic "
2561 destroy_bb_vec_info (bb_vinfo
);
2565 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2566 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2568 if (dump_enabled_p ())
2569 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2570 "not vectorized: bad operation in basic block.\n");
2572 destroy_bb_vec_info (bb_vinfo
);
2576 /* Cost model: check if the vectorization is worthwhile. */
2577 if (!unlimited_cost_model (NULL
)
2578 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2582 "not vectorized: vectorization is not "
2585 destroy_bb_vec_info (bb_vinfo
);
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_NOTE
, vect_location
,
2591 "Basic block will be vectorized using SLP\n");
2598 vect_slp_analyze_bb (basic_block bb
)
2600 bb_vec_info bb_vinfo
;
2602 gimple_stmt_iterator gsi
;
2603 unsigned int vector_sizes
;
2605 if (dump_enabled_p ())
2606 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2608 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2610 gimple stmt
= gsi_stmt (gsi
);
2611 if (!is_gimple_debug (stmt
)
2612 && !gimple_nop_p (stmt
)
2613 && gimple_code (stmt
) != GIMPLE_LABEL
)
2617 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2621 "not vectorized: too many instructions in "
2627 /* Autodetect first vector size we try. */
2628 current_vector_size
= 0;
2629 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2633 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2637 destroy_bb_vec_info (bb_vinfo
);
2639 vector_sizes
&= ~current_vector_size
;
2640 if (vector_sizes
== 0
2641 || current_vector_size
== 0)
2644 /* Try the next biggest vector size. */
2645 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2646 if (dump_enabled_p ())
2647 dump_printf_loc (MSG_NOTE
, vect_location
,
2648 "***** Re-trying analysis with "
2649 "vector size %d\n", current_vector_size
);
2654 /* For constant and loop invariant defs of SLP_NODE this function returns
2655 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2656 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2657 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2658 REDUC_INDEX is the index of the reduction operand in the statements, unless
2662 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2663 vec
<tree
> *vec_oprnds
,
2664 unsigned int op_num
, unsigned int number_of_vectors
,
2667 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2668 gimple stmt
= stmts
[0];
2669 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2673 unsigned j
, number_of_places_left_in_vector
;
2676 int group_size
= stmts
.length ();
2677 unsigned int vec_num
, i
;
2678 unsigned number_of_copies
= 1;
2680 voprnds
.create (number_of_vectors
);
2681 bool constant_p
, is_store
;
2682 tree neutral_op
= NULL
;
2683 enum tree_code code
= gimple_expr_code (stmt
);
2686 gimple_seq ctor_seq
= NULL
;
2688 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2689 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2691 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2692 && reduc_index
!= -1)
2694 op_num
= reduc_index
;
2695 op
= gimple_op (stmt
, op_num
+ 1);
2696 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2697 we need either neutral operands or the original operands. See
2698 get_initial_def_for_reduction() for details. */
2701 case WIDEN_SUM_EXPR
:
2708 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2709 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2711 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2716 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2717 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2719 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2724 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2727 /* For MIN/MAX we don't have an easy neutral operand but
2728 the initial values can be used fine here. Only for
2729 a reduction chain we have to force a neutral element. */
2732 if (!GROUP_FIRST_ELEMENT (stmt_vinfo
))
2736 def_stmt
= SSA_NAME_DEF_STMT (op
);
2737 loop
= (gimple_bb (stmt
))->loop_father
;
2738 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2739 loop_preheader_edge (loop
));
2744 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo
));
2749 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2752 op
= gimple_assign_rhs1 (stmt
);
2759 if (CONSTANT_CLASS_P (op
))
2764 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2765 created vectors. It is greater than 1 if unrolling is performed.
2767 For example, we have two scalar operands, s1 and s2 (e.g., group of
2768 strided accesses of size two), while NUNITS is four (i.e., four scalars
2769 of this type can be packed in a vector). The output vector will contain
2770 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2773 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2774 containing the operands.
2776 For example, NUNITS is four as before, and the group size is 8
2777 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2778 {s5, s6, s7, s8}. */
2780 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
2782 number_of_places_left_in_vector
= nunits
;
2783 elts
= XALLOCAVEC (tree
, nunits
);
2784 bool place_after_defs
= false;
2785 for (j
= 0; j
< number_of_copies
; j
++)
2787 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2790 op
= gimple_assign_rhs1 (stmt
);
2796 if (op_num
== 0 || op_num
== 1)
2798 tree cond
= gimple_assign_rhs1 (stmt
);
2799 op
= TREE_OPERAND (cond
, op_num
);
2804 op
= gimple_assign_rhs2 (stmt
);
2806 op
= gimple_assign_rhs3 (stmt
);
2811 op
= gimple_call_arg (stmt
, op_num
);
2818 op
= gimple_op (stmt
, op_num
+ 1);
2819 /* Unlike the other binary operators, shifts/rotates have
2820 the shift count being int, instead of the same type as
2821 the lhs, so make sure the scalar is the right type if
2822 we are dealing with vectors of
2823 long long/long/short/char. */
2824 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2825 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2829 op
= gimple_op (stmt
, op_num
+ 1);
2834 if (reduc_index
!= -1)
2836 loop
= (gimple_bb (stmt
))->loop_father
;
2837 def_stmt
= SSA_NAME_DEF_STMT (op
);
2841 /* Get the def before the loop. In reduction chain we have only
2842 one initial value. */
2843 if ((j
!= (number_of_copies
- 1)
2844 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2849 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2850 loop_preheader_edge (loop
));
2853 /* Create 'vect_ = {op0,op1,...,opn}'. */
2854 number_of_places_left_in_vector
--;
2856 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2858 if (CONSTANT_CLASS_P (op
))
2860 op
= fold_unary (VIEW_CONVERT_EXPR
,
2861 TREE_TYPE (vector_type
), op
);
2862 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2866 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
2868 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
), op
);
2870 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
, op
);
2871 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2875 elts
[number_of_places_left_in_vector
] = op
;
2876 if (!CONSTANT_CLASS_P (op
))
2878 if (TREE_CODE (orig_op
) == SSA_NAME
2879 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
2880 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
2881 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
2882 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
2883 place_after_defs
= true;
2885 if (number_of_places_left_in_vector
== 0)
2887 number_of_places_left_in_vector
= nunits
;
2890 vec_cst
= build_vector (vector_type
, elts
);
2893 vec
<constructor_elt
, va_gc
> *v
;
2895 vec_alloc (v
, nunits
);
2896 for (k
= 0; k
< nunits
; ++k
)
2897 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2898 vec_cst
= build_constructor (vector_type
, v
);
2901 gimple_stmt_iterator gsi
;
2902 if (place_after_defs
)
2905 (vect_find_last_scalar_stmt_in_slp (slp_node
));
2906 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
2909 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
2910 if (ctor_seq
!= NULL
)
2912 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
2913 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2917 voprnds
.quick_push (init
);
2918 place_after_defs
= false;
2923 /* Since the vectors are created in the reverse order, we should invert
2925 vec_num
= voprnds
.length ();
2926 for (j
= vec_num
; j
!= 0; j
--)
2928 vop
= voprnds
[j
- 1];
2929 vec_oprnds
->quick_push (vop
);
2934 /* In case that VF is greater than the unrolling factor needed for the SLP
2935 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2936 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2937 to replicate the vectors. */
2938 while (number_of_vectors
> vec_oprnds
->length ())
2940 tree neutral_vec
= NULL
;
2945 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2947 vec_oprnds
->quick_push (neutral_vec
);
2951 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2952 vec_oprnds
->quick_push (vop
);
2958 /* Get vectorized definitions from SLP_NODE that contains corresponding
2959 vectorized def-stmts. */
2962 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2965 gimple vec_def_stmt
;
2968 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2970 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2972 gcc_assert (vec_def_stmt
);
2973 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2974 vec_oprnds
->quick_push (vec_oprnd
);
2979 /* Get vectorized definitions for SLP_NODE.
2980 If the scalar definitions are loop invariants or constants, collect them and
2981 call vect_get_constant_vectors() to create vector stmts.
2982 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2983 must be stored in the corresponding child of SLP_NODE, and we call
2984 vect_get_slp_vect_defs () to retrieve them. */
2987 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2988 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2991 int number_of_vects
= 0, i
;
2992 unsigned int child_index
= 0;
2993 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2994 slp_tree child
= NULL
;
2997 bool vectorized_defs
;
2999 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3000 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3002 /* For each operand we check if it has vectorized definitions in a child
3003 node or we need to create them (for invariants and constants). We
3004 check if the LHS of the first stmt of the next child matches OPRND.
3005 If it does, we found the correct child. Otherwise, we call
3006 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3007 to check this child node for the next operand. */
3008 vectorized_defs
= false;
3009 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3011 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3013 /* We have to check both pattern and original def, if available. */
3016 gimple first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3018 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3020 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
3022 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3024 /* The number of vector defs is determined by the number of
3025 vector statements in the node from which we get those
3027 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3028 vectorized_defs
= true;
3036 if (!vectorized_defs
)
3040 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3041 /* Number of vector stmts was calculated according to LHS in
3042 vect_schedule_slp_instance (), fix it by replacing LHS with
3043 RHS, if necessary. See vect_get_smallest_scalar_type () for
3045 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3047 if (rhs_size_unit
!= lhs_size_unit
)
3049 number_of_vects
*= rhs_size_unit
;
3050 number_of_vects
/= lhs_size_unit
;
3055 /* Allocate memory for vectorized defs. */
3057 vec_defs
.create (number_of_vects
);
3059 /* For reduction defs we call vect_get_constant_vectors (), since we are
3060 looking for initial loop invariant values. */
3061 if (vectorized_defs
&& reduc_index
== -1)
3062 /* The defs are already vectorized. */
3063 vect_get_slp_vect_defs (child
, &vec_defs
);
3065 /* Build vectors from scalar defs. */
3066 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3067 number_of_vects
, reduc_index
);
3069 vec_oprnds
->quick_push (vec_defs
);
3071 /* For reductions, we only need initial values. */
3072 if (reduc_index
!= -1)
3078 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3079 building a vector of type MASK_TYPE from it) and two input vectors placed in
3080 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3081 shifting by STRIDE elements of DR_CHAIN for every copy.
3082 (STRIDE is the number of vectorized stmts for NODE divided by the number of
3084 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3085 the created stmts must be inserted. */
3088 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
3089 tree mask
, int first_vec_indx
, int second_vec_indx
,
3090 gimple_stmt_iterator
*gsi
, slp_tree node
,
3091 tree vectype
, vec
<tree
> dr_chain
,
3092 int ncopies
, int vect_stmts_counter
)
3095 gimple perm_stmt
= NULL
;
3096 stmt_vec_info next_stmt_info
;
3098 tree first_vec
, second_vec
, data_ref
;
3100 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
3102 /* Initialize the vect stmts of NODE to properly insert the generated
3104 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
3105 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3106 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3108 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
3109 for (i
= 0; i
< ncopies
; i
++)
3111 first_vec
= dr_chain
[first_vec_indx
];
3112 second_vec
= dr_chain
[second_vec_indx
];
3114 /* Generate the permute statement. */
3115 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3116 first_vec
, second_vec
, mask
);
3117 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3118 gimple_set_lhs (perm_stmt
, data_ref
);
3119 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3121 /* Store the vector statement in NODE. */
3122 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
3124 first_vec_indx
+= stride
;
3125 second_vec_indx
+= stride
;
3128 /* Mark the scalar stmt as vectorized. */
3129 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
3130 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
3134 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3135 return in CURRENT_MASK_ELEMENT its equivalent in target specific
3136 representation. Check that the mask is valid and return FALSE if not.
3137 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3138 the next vector, i.e., the current first vector is not needed. */
3141 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
3142 int mask_nunits
, bool only_one_vec
, int index
,
3143 unsigned char *mask
, int *current_mask_element
,
3144 bool *need_next_vector
, int *number_of_mask_fixes
,
3145 bool *mask_fixed
, bool *needs_first_vector
)
3149 /* Convert to target specific representation. */
3150 *current_mask_element
= first_mask_element
+ m
;
3151 /* Adjust the value in case it's a mask for second and third vectors. */
3152 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
3154 if (*current_mask_element
< 0)
3156 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3159 "permutation requires past vector ");
3160 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3161 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3166 if (*current_mask_element
< mask_nunits
)
3167 *needs_first_vector
= true;
3169 /* We have only one input vector to permute but the mask accesses values in
3170 the next vector as well. */
3171 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
3173 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3176 "permutation requires at least two vectors ");
3177 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3178 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3184 /* The mask requires the next vector. */
3185 while (*current_mask_element
>= mask_nunits
* 2)
3187 if (*needs_first_vector
|| *mask_fixed
)
3189 /* We either need the first vector too or have already moved to the
3190 next vector. In both cases, this permutation needs three
3192 if (dump_enabled_p ())
3194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3195 "permutation requires at "
3196 "least three vectors ");
3197 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3198 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3204 /* We move to the next vector, dropping the first one and working with
3205 the second and the third - we need to adjust the values of the mask
3207 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
3209 for (i
= 0; i
< index
; i
++)
3210 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
3212 (*number_of_mask_fixes
)++;
3216 *need_next_vector
= *mask_fixed
;
3218 /* This was the last element of this mask. Start a new one. */
3219 if (index
== mask_nunits
- 1)
3221 *number_of_mask_fixes
= 1;
3222 *mask_fixed
= false;
3223 *needs_first_vector
= false;
3230 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3231 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3232 permute statements for the SLP node NODE of the SLP instance
3233 SLP_NODE_INSTANCE. */
3236 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3237 gimple_stmt_iterator
*gsi
, int vf
,
3238 slp_instance slp_node_instance
, bool analyze_only
)
3240 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3241 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3242 tree mask_element_type
= NULL_TREE
, mask_type
;
3243 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
3244 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3245 gimple next_scalar_stmt
;
3246 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3247 int first_mask_element
;
3248 int index
, unroll_factor
, current_mask_element
, ncopies
;
3249 unsigned char *mask
;
3250 bool only_one_vec
= false, need_next_vector
= false;
3251 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
3252 int number_of_mask_fixes
= 1;
3253 bool mask_fixed
= false;
3254 bool needs_first_vector
= false;
3257 mode
= TYPE_MODE (vectype
);
3259 if (!can_vec_perm_p (mode
, false, NULL
))
3261 if (dump_enabled_p ())
3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3264 "no vect permute for ");
3265 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3266 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3271 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3272 same size as the vector element being permuted. */
3273 mask_element_type
= lang_hooks
.types
.type_for_mode
3274 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3275 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3276 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3277 mask
= XALLOCAVEC (unsigned char, nunits
);
3278 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3280 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3281 unrolling factor. */
3282 orig_vec_stmts_num
= group_size
*
3283 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
3284 if (orig_vec_stmts_num
== 1)
3285 only_one_vec
= true;
3287 /* Number of copies is determined by the final vectorization factor
3288 relatively to SLP_NODE_INSTANCE unrolling factor. */
3289 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3291 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3294 /* Generate permutation masks for every NODE. Number of masks for each NODE
3295 is equal to GROUP_SIZE.
3296 E.g., we have a group of three nodes with three loads from the same
3297 location in each node, and the vector size is 4. I.e., we have a
3298 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3299 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3300 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3303 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3304 The last mask is illegal since we assume two operands for permute
3305 operation, and the mask element values can't be outside that range.
3306 Hence, the last mask must be converted into {2,5,5,5}.
3307 For the first two permutations we need the first and the second input
3308 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3309 we need the second and the third vectors: {b1,c1,a2,b2} and
3315 vect_stmts_counter
= 0;
3317 first_vec_index
= vec_index
++;
3319 second_vec_index
= first_vec_index
;
3321 second_vec_index
= vec_index
++;
3323 for (j
= 0; j
< unroll_factor
; j
++)
3325 for (k
= 0; k
< group_size
; k
++)
3327 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
3328 first_mask_element
= i
+ j
* group_size
;
3329 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
3330 nunits
, only_one_vec
, index
,
3331 mask
, ¤t_mask_element
,
3333 &number_of_mask_fixes
, &mask_fixed
,
3334 &needs_first_vector
))
3336 gcc_assert (current_mask_element
>= 0
3337 && current_mask_element
< 2 * nunits
);
3338 mask
[index
++] = current_mask_element
;
3340 if (index
== nunits
)
3343 if (!can_vec_perm_p (mode
, false, mask
))
3345 if (dump_enabled_p ())
3347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3349 "unsupported vect permute { ");
3350 for (i
= 0; i
< nunits
; ++i
)
3351 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
3353 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3361 tree mask_vec
, *mask_elts
;
3362 mask_elts
= XALLOCAVEC (tree
, nunits
);
3363 for (l
= 0; l
< nunits
; ++l
)
3364 mask_elts
[l
] = build_int_cst (mask_element_type
,
3366 mask_vec
= build_vector (mask_type
, mask_elts
);
3368 if (need_next_vector
)
3370 first_vec_index
= second_vec_index
;
3371 second_vec_index
= vec_index
;
3375 = SLP_TREE_SCALAR_STMTS (node
)[scalar_index
++];
3377 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
3378 mask_vec
, first_vec_index
, second_vec_index
,
3379 gsi
, node
, vectype
, dr_chain
,
3380 ncopies
, vect_stmts_counter
++);
3392 /* Vectorize SLP instance tree in postorder. */
3395 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3396 unsigned int vectorization_factor
)
3399 bool grouped_store
, is_store
;
3400 gimple_stmt_iterator si
;
3401 stmt_vec_info stmt_info
;
3402 unsigned int vec_stmts_size
, nunits
, group_size
;
3410 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3411 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3413 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3414 stmt_info
= vinfo_for_stmt (stmt
);
3416 /* VECTYPE is the type of the destination. */
3417 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3418 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3419 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3421 /* For each SLP instance calculate number of vector stmts to be created
3422 for the scalar stmts in each node of the SLP tree. Number of vector
3423 elements in one vector iteration is the number of scalar elements in
3424 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3426 Unless this is a SLP reduction in which case the number of vector
3427 stmts is equal to the number of vector stmts of the children. */
3428 if (GROUP_FIRST_ELEMENT (stmt_info
)
3429 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3430 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3432 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3434 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3436 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3437 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3440 if (dump_enabled_p ())
3442 dump_printf_loc (MSG_NOTE
,vect_location
,
3443 "------>vectorizing SLP node starting from: ");
3444 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3445 dump_printf (MSG_NOTE
, "\n");
3448 /* Vectorized stmts go before the last scalar stmt which is where
3449 all uses are ready. */
3450 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3452 /* Mark the first element of the reduction chain as reduction to properly
3453 transform the node. In the analysis phase only the last element of the
3454 chain is marked as reduction. */
3455 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3456 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3458 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3459 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3462 /* Handle two-operation SLP nodes by vectorizing the group with
3463 both operations and then performing a merge. */
3464 if (SLP_TREE_TWO_OPERATORS (node
))
3466 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3467 enum tree_code ocode
;
3469 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3470 bool allsame
= true;
3471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3472 if (gimple_assign_rhs_code (ostmt
) != code0
)
3476 ocode
= gimple_assign_rhs_code (ostmt
);
3485 tree tmask
= NULL_TREE
;
3486 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3487 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3488 SLP_TREE_VEC_STMTS (node
).truncate (0);
3489 gimple_assign_set_rhs_code (stmt
, ocode
);
3490 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3491 gimple_assign_set_rhs_code (stmt
, code0
);
3492 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3493 SLP_TREE_VEC_STMTS (node
).truncate (0);
3494 tree meltype
= build_nonstandard_integer_type
3495 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3496 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3498 for (j
= 0; j
< v0
.length (); ++j
)
3500 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3501 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3503 if (k
>= group_size
)
3505 melts
[l
] = build_int_cst
3506 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3508 tmask
= build_vector (mvectype
, melts
);
3510 /* ??? Not all targets support a VEC_PERM_EXPR with a
3511 constant mask that would translate to a vec_merge RTX
3512 (with their vec_perm_const_ok). We can either not
3513 vectorize in that case or let veclower do its job.
3514 Unfortunately that isn't too great and at least for
3515 plus/minus we'd eventually like to match targets
3516 vector addsub instructions. */
3518 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3520 gimple_assign_lhs (v0
[j
]),
3521 gimple_assign_lhs (v1
[j
]), tmask
);
3522 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3523 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3530 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3534 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3535 For loop vectorization this is done in vectorizable_call, but for SLP
3536 it needs to be deferred until end of vect_schedule_slp, because multiple
3537 SLP instances may refer to the same scalar stmt. */
3540 vect_remove_slp_scalar_calls (slp_tree node
)
3542 gimple stmt
, new_stmt
;
3543 gimple_stmt_iterator gsi
;
3547 stmt_vec_info stmt_info
;
3552 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3553 vect_remove_slp_scalar_calls (child
);
3555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3557 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3559 stmt_info
= vinfo_for_stmt (stmt
);
3560 if (stmt_info
== NULL
3561 || is_pattern_stmt_p (stmt_info
)
3562 || !PURE_SLP_STMT (stmt_info
))
3564 lhs
= gimple_call_lhs (stmt
);
3565 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3566 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3567 set_vinfo_for_stmt (stmt
, NULL
);
3568 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3569 gsi
= gsi_for_stmt (stmt
);
3570 gsi_replace (&gsi
, new_stmt
, false);
3571 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3575 /* Generate vector code for all SLP instances in the loop/basic block. */
3578 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3580 vec
<slp_instance
> slp_instances
;
3581 slp_instance instance
;
3583 bool is_store
= false;
3587 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3588 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3592 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3596 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3598 /* Schedule the tree of INSTANCE. */
3599 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3601 if (dump_enabled_p ())
3602 dump_printf_loc (MSG_NOTE
, vect_location
,
3603 "vectorizing stmts using SLP.\n");
3606 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3608 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3611 gimple_stmt_iterator gsi
;
3613 /* Remove scalar call stmts. Do not do this for basic-block
3614 vectorization as not all uses may be vectorized.
3615 ??? Why should this be necessary? DCE should be able to
3616 remove the stmts itself.
3617 ??? For BB vectorization we can as well remove scalar
3618 stmts starting from the SLP tree root if they have no
3621 vect_remove_slp_scalar_calls (root
);
3623 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3624 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3626 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3629 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3630 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3631 /* Free the attached stmt_vec_info and remove the stmt. */
3632 gsi
= gsi_for_stmt (store
);
3633 unlink_stmt_vdef (store
);
3634 gsi_remove (&gsi
, true);
3635 release_defs (store
);
3636 free_stmt_vec_info (store
);
3644 /* Vectorize the basic block. */
3647 vect_slp_transform_bb (basic_block bb
)
3649 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3650 gimple_stmt_iterator si
;
3652 gcc_assert (bb_vinfo
);
3654 if (dump_enabled_p ())
3655 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3657 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3659 gimple stmt
= gsi_stmt (si
);
3660 stmt_vec_info stmt_info
;
3662 if (dump_enabled_p ())
3664 dump_printf_loc (MSG_NOTE
, vect_location
,
3665 "------>SLPing statement: ");
3666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3667 dump_printf (MSG_NOTE
, "\n");
3670 stmt_info
= vinfo_for_stmt (stmt
);
3671 gcc_assert (stmt_info
);
3673 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3674 if (STMT_SLP_TYPE (stmt_info
))
3676 vect_schedule_slp (NULL
, bb_vinfo
);
3681 if (dump_enabled_p ())
3682 dump_printf_loc (MSG_NOTE
, vect_location
,
3683 "BASIC BLOCK VECTORIZED\n");
3685 destroy_bb_vec_info (bb_vinfo
);