1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
33 #include "gimple-ssa.h"
34 #include "tree-phinodes.h"
35 #include "ssa-iterators.h"
36 #include "tree-ssanames.h"
37 #include "tree-pass.h"
40 #include "recog.h" /* FIXME: for insn_data */
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
45 /* Extract the location of the basic block in the source code.
46 Return the basic block location if succeed and NULL if not. */
49 find_bb_location (basic_block bb
)
52 gimple_stmt_iterator si
;
57 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
60 if (gimple_location (stmt
) != UNKNOWN_LOC
)
61 return gimple_location (stmt
);
68 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
71 vect_free_slp_tree (slp_tree node
)
79 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
80 vect_free_slp_tree (child
);
82 SLP_TREE_CHILDREN (node
).release ();
83 SLP_TREE_SCALAR_STMTS (node
).release ();
84 SLP_TREE_VEC_STMTS (node
).release ();
85 SLP_TREE_LOAD_PERMUTATION (node
).release ();
91 /* Free the memory allocated for the SLP instance. */
94 vect_free_slp_instance (slp_instance instance
)
96 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
97 SLP_INSTANCE_LOADS (instance
).release ();
98 SLP_INSTANCE_BODY_COST_VEC (instance
).release ();
103 /* Create an SLP node for SCALAR_STMTS. */
106 vect_create_new_slp_node (vec
<gimple
> scalar_stmts
)
109 gimple stmt
= scalar_stmts
[0];
112 if (is_gimple_call (stmt
))
113 nops
= gimple_call_num_args (stmt
);
114 else if (is_gimple_assign (stmt
))
116 nops
= gimple_num_ops (stmt
) - 1;
117 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_VEC_STMTS (node
).create (0);
126 SLP_TREE_CHILDREN (node
).create (nops
);
127 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
133 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
135 static vec
<slp_oprnd_info
>
136 vect_create_oprnd_info (int nops
, int group_size
)
139 slp_oprnd_info oprnd_info
;
140 vec
<slp_oprnd_info
> oprnds_info
;
142 oprnds_info
.create (nops
);
143 for (i
= 0; i
< nops
; i
++)
145 oprnd_info
= XNEW (struct _slp_oprnd_info
);
146 oprnd_info
->def_stmts
.create (group_size
);
147 oprnd_info
->first_dt
= vect_uninitialized_def
;
148 oprnd_info
->first_op_type
= NULL_TREE
;
149 oprnd_info
->first_pattern
= false;
150 oprnds_info
.quick_push (oprnd_info
);
157 /* Free operands info. */
160 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
163 slp_oprnd_info oprnd_info
;
165 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
167 oprnd_info
->def_stmts
.release ();
168 XDELETE (oprnd_info
);
171 oprnds_info
.release ();
175 /* Find the place of the data-ref in STMT in the interleaving chain that starts
176 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
179 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
181 gimple next_stmt
= first_stmt
;
184 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
189 if (next_stmt
== stmt
)
192 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
200 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
201 they are of a valid type and that they match the defs of the first stmt of
202 the SLP group (stored in OPRNDS_INFO). */
205 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
206 gimple stmt
, bool first
,
207 vec
<slp_oprnd_info
> *oprnds_info
)
210 unsigned int i
, number_of_oprnds
;
213 enum vect_def_type dt
= vect_uninitialized_def
;
214 struct loop
*loop
= NULL
;
215 bool pattern
= false;
216 slp_oprnd_info oprnd_info
;
218 tree compare_rhs
= NULL_TREE
;
221 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
223 if (is_gimple_call (stmt
))
225 number_of_oprnds
= gimple_call_num_args (stmt
);
228 else if (is_gimple_assign (stmt
))
230 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
231 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
237 for (i
= 0; i
< number_of_oprnds
; i
++)
242 compare_rhs
= NULL_TREE
;
245 oprnd
= gimple_op (stmt
, op_idx
++);
247 oprnd_info
= (*oprnds_info
)[i
];
249 if (COMPARISON_CLASS_P (oprnd
))
251 compare_rhs
= TREE_OPERAND (oprnd
, 1);
252 oprnd
= TREE_OPERAND (oprnd
, 0);
255 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
257 || (!def_stmt
&& dt
!= vect_constant_def
))
259 if (dump_enabled_p ())
261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
262 "Build SLP failed: can't find def for ");
263 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
264 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
270 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
271 from the pattern. Check that all the stmts of the node are in the
273 if (def_stmt
&& gimple_bb (def_stmt
)
274 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
275 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
276 && gimple_code (def_stmt
) != GIMPLE_PHI
))
277 && vinfo_for_stmt (def_stmt
)
278 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
279 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
280 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
283 if (!first
&& !oprnd_info
->first_pattern
)
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
288 "Build SLP failed: some of the stmts"
289 " are in a pattern, and others are not ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
291 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
297 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
298 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
300 if (dt
== vect_unknown_def_type
)
302 if (dump_enabled_p ())
303 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
304 "Unsupported pattern.\n");
308 switch (gimple_code (def_stmt
))
311 def
= gimple_phi_result (def_stmt
);
315 def
= gimple_assign_lhs (def_stmt
);
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
321 "unsupported defining stmt:\n");
328 oprnd_info
->first_dt
= dt
;
329 oprnd_info
->first_pattern
= pattern
;
330 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
334 /* Not first stmt of the group, check that the def-stmt/s match
335 the def-stmt/s of the first stmt. Allow different definition
336 types for reduction chains: the first stmt must be a
337 vect_reduction_def (a phi node), and the rest
338 vect_internal_def. */
339 if (((oprnd_info
->first_dt
!= dt
340 && !(oprnd_info
->first_dt
== vect_reduction_def
341 && dt
== vect_internal_def
)
342 && !((oprnd_info
->first_dt
== vect_external_def
343 || oprnd_info
->first_dt
== vect_constant_def
)
344 && (dt
== vect_external_def
345 || dt
== vect_constant_def
)))
346 || !types_compatible_p (oprnd_info
->first_op_type
,
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
351 "Build SLP failed: different types\n");
357 /* Check the types of the definitions. */
360 case vect_constant_def
:
361 case vect_external_def
:
362 case vect_reduction_def
:
365 case vect_internal_def
:
366 oprnd_info
->def_stmts
.quick_push (def_stmt
);
370 /* FORNOW: Not supported. */
371 if (dump_enabled_p ())
373 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
374 "Build SLP failed: illegal type of def ");
375 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, def
);
376 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
387 /* Verify if the scalar stmts STMTS are isomorphic, require data
388 permutation or are of unsupported types of operation. Return
389 true if they are, otherwise return false and indicate in *MATCHES
390 which stmts are not isomorphic to the first one. If MATCHES[0]
391 is false then this indicates the comparison could not be
392 carried out or the stmts will never be vectorized by SLP. */
395 vect_build_slp_tree_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
396 vec
<gimple
> stmts
, unsigned int group_size
,
397 unsigned nops
, unsigned int *max_nunits
,
398 unsigned int vectorization_factor
, bool *matches
)
401 gimple stmt
= stmts
[0];
402 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
403 enum tree_code first_cond_code
= ERROR_MARK
;
405 bool need_same_oprnds
= false;
406 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
409 enum machine_mode optab_op2_mode
;
410 enum machine_mode vec_mode
;
411 struct data_reference
*first_dr
;
413 gimple first_load
= NULL
, prev_first_load
= NULL
, old_first_load
= NULL
;
416 /* For every stmt in NODE find its def stmt/s. */
417 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
421 if (dump_enabled_p ())
423 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
424 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
425 dump_printf (MSG_NOTE
, "\n");
428 /* Fail to vectorize statements marked as unvectorizable. */
429 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
431 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
434 "Build SLP failed: unvectorizable statement ");
435 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
436 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
438 /* Fatal mismatch. */
443 lhs
= gimple_get_lhs (stmt
);
444 if (lhs
== NULL_TREE
)
446 if (dump_enabled_p ())
448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
449 "Build SLP failed: not GIMPLE_ASSIGN nor "
451 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
452 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
454 /* Fatal mismatch. */
459 if (is_gimple_assign (stmt
)
460 && gimple_assign_rhs_code (stmt
) == COND_EXPR
461 && (cond
= gimple_assign_rhs1 (stmt
))
462 && !COMPARISON_CLASS_P (cond
))
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
467 "Build SLP failed: condition is not "
469 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
470 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
472 /* Fatal mismatch. */
477 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
478 vectype
= get_vectype_for_scalar_type (scalar_type
);
481 if (dump_enabled_p ())
483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
484 "Build SLP failed: unsupported data-type ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
487 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
489 /* Fatal mismatch. */
494 /* In case of multiple types we need to detect the smallest type. */
495 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
497 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
499 vectorization_factor
= *max_nunits
;
502 if (is_gimple_call (stmt
))
504 rhs_code
= CALL_EXPR
;
505 if (gimple_call_internal_p (stmt
)
506 || gimple_call_tail_p (stmt
)
507 || gimple_call_noreturn_p (stmt
)
508 || !gimple_call_nothrow_p (stmt
)
509 || gimple_call_chain (stmt
))
511 if (dump_enabled_p ())
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
514 "Build SLP failed: unsupported call type ");
515 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
516 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
518 /* Fatal mismatch. */
524 rhs_code
= gimple_assign_rhs_code (stmt
);
526 /* Check the operation. */
529 first_stmt_code
= rhs_code
;
531 /* Shift arguments should be equal in all the packed stmts for a
532 vector shift with scalar shift operand. */
533 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
534 || rhs_code
== LROTATE_EXPR
535 || rhs_code
== RROTATE_EXPR
)
537 vec_mode
= TYPE_MODE (vectype
);
539 /* First see if we have a vector/vector shift. */
540 optab
= optab_for_tree_code (rhs_code
, vectype
,
544 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
546 /* No vector/vector shift, try for a vector/scalar shift. */
547 optab
= optab_for_tree_code (rhs_code
, vectype
,
552 if (dump_enabled_p ())
553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
554 "Build SLP failed: no optab.\n");
555 /* Fatal mismatch. */
559 icode
= (int) optab_handler (optab
, vec_mode
);
560 if (icode
== CODE_FOR_nothing
)
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
565 "op not supported by target.\n");
566 /* Fatal mismatch. */
570 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
571 if (!VECTOR_MODE_P (optab_op2_mode
))
573 need_same_oprnds
= true;
574 first_op1
= gimple_assign_rhs2 (stmt
);
578 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
580 need_same_oprnds
= true;
581 first_op1
= gimple_assign_rhs2 (stmt
);
586 if (first_stmt_code
!= rhs_code
587 && (first_stmt_code
!= IMAGPART_EXPR
588 || rhs_code
!= REALPART_EXPR
)
589 && (first_stmt_code
!= REALPART_EXPR
590 || rhs_code
!= IMAGPART_EXPR
)
591 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
592 && (first_stmt_code
== ARRAY_REF
593 || first_stmt_code
== BIT_FIELD_REF
594 || first_stmt_code
== INDIRECT_REF
595 || first_stmt_code
== COMPONENT_REF
596 || first_stmt_code
== MEM_REF
)))
598 if (dump_enabled_p ())
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
601 "Build SLP failed: different operation "
603 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
604 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
611 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
616 "Build SLP failed: different shift "
618 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
619 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
625 if (rhs_code
== CALL_EXPR
)
627 gimple first_stmt
= stmts
[0];
628 if (gimple_call_num_args (stmt
) != nops
629 || !operand_equal_p (gimple_call_fn (first_stmt
),
630 gimple_call_fn (stmt
), 0)
631 || gimple_call_fntype (first_stmt
)
632 != gimple_call_fntype (stmt
))
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
637 "Build SLP failed: different calls in ");
638 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
640 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
648 /* Grouped store or load. */
649 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
651 if (REFERENCE_CLASS_P (lhs
))
659 unsigned unrolling_factor
660 = least_common_multiple
661 (*max_nunits
, group_size
) / group_size
;
662 /* FORNOW: Check that there is no gap between the loads
663 and no gap between the groups when we need to load
664 multiple groups at once.
665 ??? We should enhance this to only disallow gaps
667 if ((unrolling_factor
> 1
668 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
669 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
670 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
671 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
673 if (dump_enabled_p ())
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
676 "Build SLP failed: grouped "
678 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
680 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
682 /* Fatal mismatch. */
687 /* Check that the size of interleaved loads group is not
688 greater than the SLP group size. */
690 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
692 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
693 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
694 - GROUP_GAP (vinfo_for_stmt (stmt
)))
695 > ncopies
* group_size
))
697 if (dump_enabled_p ())
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
700 "Build SLP failed: the number "
701 "of interleaved loads is greater than "
702 "the SLP group size ");
703 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
705 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
707 /* Fatal mismatch. */
712 old_first_load
= first_load
;
713 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
716 /* Check that there are no loads from different interleaving
717 chains in the same node. */
718 if (prev_first_load
!= first_load
)
720 if (dump_enabled_p ())
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
724 "Build SLP failed: different "
725 "interleaving chains in one node ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
728 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
735 prev_first_load
= first_load
;
737 /* In some cases a group of loads is just the same load
738 repeated N times. Only analyze its cost once. */
739 if (first_load
== stmt
&& old_first_load
!= first_load
)
741 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
742 if (vect_supportable_dr_alignment (first_dr
, false)
743 == dr_unaligned_unsupported
)
745 if (dump_enabled_p ())
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
749 "Build SLP failed: unsupported "
751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
753 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
755 /* Fatal mismatch. */
761 } /* Grouped access. */
764 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
766 /* Not grouped load. */
767 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
770 "Build SLP failed: not grouped load ");
771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
772 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
775 /* FORNOW: Not grouped loads are not supported. */
776 /* Fatal mismatch. */
781 /* Not memory operation. */
782 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
783 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
784 && rhs_code
!= COND_EXPR
785 && rhs_code
!= CALL_EXPR
)
787 if (dump_enabled_p ())
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
790 "Build SLP failed: operation");
791 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
793 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
795 /* Fatal mismatch. */
800 if (rhs_code
== COND_EXPR
)
802 tree cond_expr
= gimple_assign_rhs1 (stmt
);
805 first_cond_code
= TREE_CODE (cond_expr
);
806 else if (first_cond_code
!= TREE_CODE (cond_expr
))
808 if (dump_enabled_p ())
810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
811 "Build SLP failed: different"
813 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
815 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
826 for (i
= 0; i
< group_size
; ++i
)
833 /* Recursively build an SLP tree starting from NODE.
834 Fail (and return a value not equal to zero) if def-stmts are not
835 isomorphic, require data permutation or are of unsupported types of
836 operation. Otherwise, return 0.
837 The value returned is the depth in the SLP tree where a mismatch
841 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
842 slp_tree
*node
, unsigned int group_size
,
843 unsigned int *max_nunits
,
844 vec
<slp_tree
> *loads
,
845 unsigned int vectorization_factor
,
846 bool *matches
, unsigned *npermutes
)
848 unsigned nops
, i
, this_npermutes
= 0;
852 matches
= XALLOCAVEC (bool, group_size
);
854 npermutes
= &this_npermutes
;
858 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
859 if (is_gimple_call (stmt
))
860 nops
= gimple_call_num_args (stmt
);
861 else if (is_gimple_assign (stmt
))
863 nops
= gimple_num_ops (stmt
) - 1;
864 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
870 if (!vect_build_slp_tree_1 (loop_vinfo
, bb_vinfo
,
871 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
872 max_nunits
, vectorization_factor
, matches
))
875 /* If the SLP node is a load, terminate the recursion. */
876 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
877 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
879 loads
->safe_push (*node
);
883 /* Get at the operands, verifying they are compatible. */
884 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
885 slp_oprnd_info oprnd_info
;
886 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
888 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
,
889 stmt
, (i
== 0), &oprnds_info
))
891 vect_free_oprnd_info (oprnds_info
);
896 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
898 /* Create SLP_TREE nodes for the definition node/s. */
899 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
902 unsigned old_nloads
= loads
->length ();
903 unsigned old_max_nunits
= *max_nunits
;
905 if (oprnd_info
->first_dt
!= vect_internal_def
)
908 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
911 vect_free_oprnd_info (oprnds_info
);
915 bool *matches
= XALLOCAVEC (bool, group_size
);
916 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
917 group_size
, max_nunits
, loads
,
918 vectorization_factor
, matches
, npermutes
))
920 oprnd_info
->def_stmts
= vNULL
;
921 SLP_TREE_CHILDREN (*node
).quick_push (child
);
925 /* If the SLP build for operand zero failed and operand zero
926 and one can be commutated try that for the scalar stmts
927 that failed the match. */
929 /* A first scalar stmt mismatch signals a fatal mismatch. */
931 /* ??? For COND_EXPRs we can swap the comparison operands
932 as well as the arms under some constraints. */
934 && oprnds_info
[1]->first_dt
== vect_internal_def
935 && is_gimple_assign (stmt
)
936 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
937 /* Do so only if the number of not successful permutes was nor more
938 than a cut-ff as re-trying the recursive match on
939 possibly each level of the tree would expose exponential
944 *max_nunits
= old_max_nunits
;
945 loads
->truncate (old_nloads
);
946 /* Swap mismatched definition stmts. */
947 for (unsigned j
= 0; j
< group_size
; ++j
)
950 gimple tem
= oprnds_info
[0]->def_stmts
[j
];
951 oprnds_info
[0]->def_stmts
[j
] = oprnds_info
[1]->def_stmts
[j
];
952 oprnds_info
[1]->def_stmts
[j
] = tem
;
954 /* And try again ... */
955 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
,
956 group_size
, max_nunits
, loads
,
957 vectorization_factor
,
960 oprnd_info
->def_stmts
= vNULL
;
961 SLP_TREE_CHILDREN (*node
).quick_push (child
);
968 oprnd_info
->def_stmts
= vNULL
;
969 vect_free_slp_tree (child
);
970 vect_free_oprnd_info (oprnds_info
);
974 vect_free_oprnd_info (oprnds_info
);
978 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
981 vect_print_slp_tree (int dump_kind
, slp_tree node
)
990 dump_printf (dump_kind
, "node ");
991 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
993 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
994 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
996 dump_printf (dump_kind
, "\n");
998 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
999 vect_print_slp_tree (dump_kind
, child
);
1003 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1004 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1005 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1006 stmts in NODE are to be marked. */
1009 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1018 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1019 if (j
< 0 || i
== j
)
1020 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1023 vect_mark_slp_stmts (child
, mark
, j
);
1027 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1030 vect_mark_slp_stmts_relevant (slp_tree node
)
1034 stmt_vec_info stmt_info
;
1040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1042 stmt_info
= vinfo_for_stmt (stmt
);
1043 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1044 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1045 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1048 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1049 vect_mark_slp_stmts_relevant (child
);
1053 /* Rearrange the statements of NODE according to PERMUTATION. */
1056 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1057 vec
<unsigned> permutation
)
1060 vec
<gimple
> tmp_stmts
;
1064 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1065 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1067 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1068 tmp_stmts
.create (group_size
);
1069 tmp_stmts
.quick_grow_cleared (group_size
);
1071 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1072 tmp_stmts
[permutation
[i
]] = stmt
;
1074 SLP_TREE_SCALAR_STMTS (node
).release ();
1075 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1079 /* Check if the required load permutations in the SLP instance
1080 SLP_INSTN are supported. */
1083 vect_supported_load_permutation_p (slp_instance slp_instn
)
1085 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1086 unsigned int i
, j
, k
, next
;
1089 gimple stmt
, load
, next_load
, first_load
;
1090 struct data_reference
*dr
;
1092 if (dump_enabled_p ())
1094 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1095 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1096 if (node
->load_permutation
.exists ())
1097 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1098 dump_printf (MSG_NOTE
, "%d ", next
);
1100 for (i
= 0; i
< group_size
; ++i
)
1101 dump_printf (MSG_NOTE
, "%d ", i
);
1102 dump_printf (MSG_NOTE
, "\n");
1105 /* In case of reduction every load permutation is allowed, since the order
1106 of the reduction statements is not important (as opposed to the case of
1107 grouped stores). The only condition we need to check is that all the
1108 load nodes are of the same size and have the same permutation (and then
1109 rearrange all the nodes of the SLP instance according to this
1112 /* Check that all the load nodes are of the same size. */
1113 /* ??? Can't we assert this? */
1114 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1115 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1118 node
= SLP_INSTANCE_TREE (slp_instn
);
1119 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1121 /* Reduction (there are no data-refs in the root).
1122 In reduction chain the order of the loads is important. */
1123 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1124 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1129 /* Compare all the permutation sequences to the first one. We know
1130 that at least one load is permuted. */
1131 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1132 if (!node
->load_permutation
.exists ())
1134 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1136 if (!load
->load_permutation
.exists ())
1138 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1139 if (lidx
!= node
->load_permutation
[j
])
1143 /* Check that the loads in the first sequence are different and there
1144 are no gaps between them. */
1145 load_index
= sbitmap_alloc (group_size
);
1146 bitmap_clear (load_index
);
1147 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1149 if (bitmap_bit_p (load_index
, lidx
))
1151 sbitmap_free (load_index
);
1154 bitmap_set_bit (load_index
, lidx
);
1156 for (i
= 0; i
< group_size
; i
++)
1157 if (!bitmap_bit_p (load_index
, i
))
1159 sbitmap_free (load_index
);
1162 sbitmap_free (load_index
);
1164 /* This permutation is valid for reduction. Since the order of the
1165 statements in the nodes is not important unless they are memory
1166 accesses, we can rearrange the statements in all the nodes
1167 according to the order of the loads. */
1168 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1169 node
->load_permutation
);
1171 /* We are done, no actual permutations need to be generated. */
1172 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1173 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1177 /* In basic block vectorization we allow any subchain of an interleaving
1179 FORNOW: not supported in loop SLP because of realignment compications. */
1180 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1182 /* Check that for every node in the instance the loads
1184 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1189 if (j
!= 0 && next_load
!= load
)
1191 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1195 /* Check that the alignment of the first load in every subchain, i.e.,
1196 the first statement in every load node, is supported.
1197 ??? This belongs in alignment checking. */
1198 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1200 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1201 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1203 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1204 if (vect_supportable_dr_alignment (dr
, false)
1205 == dr_unaligned_unsupported
)
1207 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1211 "unsupported unaligned load ");
1212 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1214 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1221 /* We are done, no actual permutations need to be generated. */
1222 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1223 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1227 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1228 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1229 well (unless it's reduction). */
1230 if (SLP_INSTANCE_LOADS (slp_instn
).length () != group_size
)
1232 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1233 if (!node
->load_permutation
.exists ())
1236 load_index
= sbitmap_alloc (group_size
);
1237 bitmap_clear (load_index
);
1238 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1240 unsigned int lidx
= node
->load_permutation
[0];
1241 if (bitmap_bit_p (load_index
, lidx
))
1243 sbitmap_free (load_index
);
1246 bitmap_set_bit (load_index
, lidx
);
1247 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, k
)
1250 sbitmap_free (load_index
);
1254 for (i
= 0; i
< group_size
; i
++)
1255 if (!bitmap_bit_p (load_index
, i
))
1257 sbitmap_free (load_index
);
1260 sbitmap_free (load_index
);
1262 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1263 if (node
->load_permutation
.exists ()
1264 && !vect_transform_slp_perm_load
1266 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1272 /* Find the first load in the loop that belongs to INSTANCE.
1273 When loads are in several SLP nodes, there can be a case in which the first
1274 load does not appear in the first SLP node to be transformed, causing
1275 incorrect order of statements. Since we generate all the loads together,
1276 they must be inserted before the first load of the SLP instance and not
1277 before the first load of the first node of the instance. */
1280 vect_find_first_load_in_slp_instance (slp_instance instance
)
1284 gimple first_load
= NULL
, load
;
1286 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1287 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1288 first_load
= get_earlier_stmt (load
, first_load
);
1294 /* Find the last store in SLP INSTANCE. */
1297 vect_find_last_store_in_slp_instance (slp_instance instance
)
1301 gimple last_store
= NULL
, store
;
1303 node
= SLP_INSTANCE_TREE (instance
);
1304 for (i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &store
); i
++)
1305 last_store
= get_later_stmt (store
, last_store
);
1310 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1313 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1314 slp_instance instance
, slp_tree node
,
1315 stmt_vector_for_cost
*prologue_cost_vec
,
1316 unsigned ncopies_for_cost
)
1318 stmt_vector_for_cost
*body_cost_vec
= &SLP_INSTANCE_BODY_COST_VEC (instance
);
1323 stmt_vec_info stmt_info
;
1325 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1327 /* Recurse down the SLP tree. */
1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1329 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1330 instance
, child
, prologue_cost_vec
,
1333 /* Look at the first scalar stmt to determine the cost. */
1334 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1335 stmt_info
= vinfo_for_stmt (stmt
);
1336 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1338 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1339 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1340 vect_uninitialized_def
,
1341 node
, prologue_cost_vec
, body_cost_vec
);
1345 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1346 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1347 node
, prologue_cost_vec
, body_cost_vec
);
1348 /* If the load is permuted record the cost for the permutation.
1349 ??? Loads from multiple chains are let through here only
1350 for a single special case involving complex numbers where
1351 in the end no permutation is necessary. */
1352 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1353 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1354 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1355 && vect_get_place_in_interleaving_chain
1356 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1358 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1359 stmt_info
, 0, vect_body
);
1365 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1366 stmt_info
, 0, vect_body
);
1368 /* Scan operands and account for prologue cost of constants/externals.
1369 ??? This over-estimates cost for multiple uses and should be
1371 lhs
= gimple_get_lhs (stmt
);
1372 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1374 tree def
, op
= gimple_op (stmt
, i
);
1376 enum vect_def_type dt
;
1377 if (!op
|| op
== lhs
)
1379 if (vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
,
1380 &def_stmt
, &def
, &dt
)
1381 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
1382 record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
1383 stmt_info
, 0, vect_prologue
);
1387 /* Compute the cost for the SLP instance INSTANCE. */
1390 vect_analyze_slp_cost (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1391 slp_instance instance
, unsigned nunits
)
1393 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1394 unsigned ncopies_for_cost
;
1395 stmt_info_for_cost
*si
;
1398 /* Calculate the number of vector stmts to create based on the unrolling
1399 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1400 GROUP_SIZE / NUNITS otherwise. */
1401 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1402 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1404 prologue_cost_vec
.create (10);
1405 body_cost_vec
.create (10);
1406 SLP_INSTANCE_BODY_COST_VEC (instance
) = body_cost_vec
;
1407 vect_analyze_slp_cost_1 (loop_vinfo
, bb_vinfo
,
1408 instance
, SLP_INSTANCE_TREE (instance
),
1409 &prologue_cost_vec
, ncopies_for_cost
);
1411 /* Record the prologue costs, which were delayed until we were
1412 sure that SLP was successful. Unlike the body costs, we know
1413 the final values now regardless of the loop vectorization factor. */
1414 void *data
= (loop_vinfo
? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
)
1415 : BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1416 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1418 struct _stmt_vec_info
*stmt_info
1419 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1420 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1421 si
->misalign
, vect_prologue
);
1424 prologue_cost_vec
.release ();
1427 /* Analyze an SLP instance starting from a group of grouped stores. Call
1428 vect_build_slp_tree to build a tree of packed stmts if possible.
1429 Return FALSE if it's impossible to SLP any stmt in the loop. */
1432 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1435 slp_instance new_instance
;
1437 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1438 unsigned int unrolling_factor
= 1, nunits
;
1439 tree vectype
, scalar_type
= NULL_TREE
;
1441 unsigned int vectorization_factor
= 0;
1443 unsigned int max_nunits
= 0;
1444 vec
<slp_tree
> loads
;
1445 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1446 vec
<gimple
> scalar_stmts
;
1448 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1452 scalar_type
= TREE_TYPE (DR_REF (dr
));
1453 vectype
= get_vectype_for_scalar_type (scalar_type
);
1457 gcc_assert (loop_vinfo
);
1458 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1461 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1465 gcc_assert (loop_vinfo
);
1466 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1467 group_size
= LOOP_VINFO_REDUCTIONS (loop_vinfo
).length ();
1472 if (dump_enabled_p ())
1474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1475 "Build SLP failed: unsupported data-type ");
1476 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1477 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1483 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1485 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1487 vectorization_factor
= nunits
;
1489 /* Calculate the unrolling factor. */
1490 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1491 if (unrolling_factor
!= 1 && !loop_vinfo
)
1493 if (dump_enabled_p ())
1494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1495 "Build SLP failed: unrolling required in basic"
1501 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1502 scalar_stmts
.create (group_size
);
1504 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1506 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1509 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1510 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1511 scalar_stmts
.safe_push (
1512 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1514 scalar_stmts
.safe_push (next
);
1515 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1520 /* Collect reduction statements. */
1521 vec
<gimple
> reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1522 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1523 scalar_stmts
.safe_push (next
);
1526 node
= vect_create_new_slp_node (scalar_stmts
);
1528 loads
.create (group_size
);
1530 /* Build the tree for the SLP instance. */
1531 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1532 &max_nunits
, &loads
,
1533 vectorization_factor
, NULL
, NULL
))
1535 /* Calculate the unrolling factor based on the smallest type. */
1536 if (max_nunits
> nunits
)
1537 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1540 if (unrolling_factor
!= 1 && !loop_vinfo
)
1542 if (dump_enabled_p ())
1543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1544 "Build SLP failed: unrolling required in basic"
1546 vect_free_slp_tree (node
);
1551 /* Create a new SLP instance. */
1552 new_instance
= XNEW (struct _slp_instance
);
1553 SLP_INSTANCE_TREE (new_instance
) = node
;
1554 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1555 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1556 SLP_INSTANCE_BODY_COST_VEC (new_instance
) = vNULL
;
1557 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1558 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1560 /* Compute the load permutation. */
1562 bool loads_permuted
= false;
1563 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1565 vec
<unsigned> load_permutation
;
1567 gimple load
, first_stmt
;
1568 bool this_load_permuted
= false;
1569 load_permutation
.create (group_size
);
1570 first_stmt
= GROUP_FIRST_ELEMENT
1571 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1572 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1575 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1576 gcc_assert (load_place
!= -1);
1577 if (load_place
!= j
)
1578 this_load_permuted
= true;
1579 load_permutation
.safe_push (load_place
);
1581 if (!this_load_permuted
)
1583 load_permutation
.release ();
1586 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1587 loads_permuted
= true;
1592 if (!vect_supported_load_permutation_p (new_instance
))
1594 if (dump_enabled_p ())
1596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1597 "Build SLP failed: unsupported load "
1599 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1600 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1602 vect_free_slp_instance (new_instance
);
1606 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1607 = vect_find_first_load_in_slp_instance (new_instance
);
1610 /* Compute the costs of this SLP instance. */
1611 vect_analyze_slp_cost (loop_vinfo
, bb_vinfo
,
1612 new_instance
, TYPE_VECTOR_SUBPARTS (vectype
));
1615 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
).safe_push (new_instance
);
1617 BB_VINFO_SLP_INSTANCES (bb_vinfo
).safe_push (new_instance
);
1619 if (dump_enabled_p ())
1620 vect_print_slp_tree (MSG_NOTE
, node
);
1625 /* Failed to SLP. */
1626 /* Free the allocated memory. */
1627 vect_free_slp_tree (node
);
1634 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1635 trees of packed scalar stmts if SLP is possible. */
1638 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1641 vec
<gimple
> grouped_stores
;
1642 vec
<gimple
> reductions
= vNULL
;
1643 vec
<gimple
> reduc_chains
= vNULL
;
1644 gimple first_element
;
1647 if (dump_enabled_p ())
1648 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
1652 grouped_stores
= LOOP_VINFO_GROUPED_STORES (loop_vinfo
);
1653 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1654 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1657 grouped_stores
= BB_VINFO_GROUPED_STORES (bb_vinfo
);
1659 /* Find SLP sequences starting from groups of grouped stores. */
1660 FOR_EACH_VEC_ELT (grouped_stores
, i
, first_element
)
1661 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1664 if (bb_vinfo
&& !ok
)
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1668 "Failed to SLP the basic block.\n");
1674 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
).length () > 0)
1676 /* Find SLP sequences starting from reduction chains. */
1677 FOR_EACH_VEC_ELT (reduc_chains
, i
, first_element
)
1678 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1683 /* Don't try to vectorize SLP reductions if reduction chain was
1688 /* Find SLP sequences starting from groups of reductions. */
1689 if (loop_vinfo
&& LOOP_VINFO_REDUCTIONS (loop_vinfo
).length () > 1
1690 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, reductions
[0]))
1697 /* For each possible SLP instance decide whether to SLP it and calculate overall
1698 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1699 least one instance. */
1702 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1704 unsigned int i
, unrolling_factor
= 1;
1705 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1706 slp_instance instance
;
1707 int decided_to_slp
= 0;
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
1713 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1715 /* FORNOW: SLP if you can. */
1716 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1717 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1719 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1720 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1721 loop-based vectorization. Such stmts will be marked as HYBRID. */
1722 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1726 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1728 if (decided_to_slp
&& dump_enabled_p ())
1729 dump_printf_loc (MSG_NOTE
, vect_location
,
1730 "Decided to SLP %d instances. Unrolling factor %d\n",
1731 decided_to_slp
, unrolling_factor
);
1733 return (decided_to_slp
> 0);
1737 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1738 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1741 vect_detect_hybrid_slp_stmts (slp_tree node
)
1744 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (node
);
1745 gimple stmt
= stmts
[0];
1746 imm_use_iterator imm_iter
;
1748 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1750 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1751 struct loop
*loop
= NULL
;
1752 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1753 basic_block bb
= NULL
;
1759 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1761 bb
= BB_VINFO_BB (bb_vinfo
);
1763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1764 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1765 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1766 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1767 if (gimple_bb (use_stmt
)
1768 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1769 || bb
== gimple_bb (use_stmt
))
1770 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1771 && !STMT_SLP_TYPE (stmt_vinfo
)
1772 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1773 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1774 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1775 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1776 == vect_reduction_def
))
1777 vect_mark_slp_stmts (node
, hybrid
, i
);
1779 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1780 vect_detect_hybrid_slp_stmts (child
);
1784 /* Find stmts that must be both vectorized and SLPed. */
1787 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1790 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1791 slp_instance instance
;
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
1797 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1798 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1802 /* Create and initialize a new bb_vec_info struct for BB, as well as
1803 stmt_vec_info structs for all the stmts in it. */
1806 new_bb_vec_info (basic_block bb
)
1808 bb_vec_info res
= NULL
;
1809 gimple_stmt_iterator gsi
;
1811 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1812 BB_VINFO_BB (res
) = bb
;
1814 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1816 gimple stmt
= gsi_stmt (gsi
);
1817 gimple_set_uid (stmt
, 0);
1818 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1821 BB_VINFO_GROUPED_STORES (res
).create (10);
1822 BB_VINFO_SLP_INSTANCES (res
).create (2);
1823 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
1830 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1831 stmts in the basic block. */
1834 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1836 vec
<slp_instance
> slp_instances
;
1837 slp_instance instance
;
1839 gimple_stmt_iterator si
;
1845 bb
= BB_VINFO_BB (bb_vinfo
);
1847 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1849 gimple stmt
= gsi_stmt (si
);
1850 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1853 /* Free stmt_vec_info. */
1854 free_stmt_vec_info (stmt
);
1857 vect_destroy_datarefs (NULL
, bb_vinfo
);
1858 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1859 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
1860 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1861 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1862 vect_free_slp_instance (instance
);
1863 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
1864 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
1870 /* Analyze statements contained in SLP tree node after recursively analyzing
1871 the subtree. Return TRUE if the operations are supported. */
1874 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1884 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1885 if (!vect_slp_analyze_node_operations (bb_vinfo
, child
))
1888 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1890 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1891 gcc_assert (stmt_info
);
1892 gcc_assert (PURE_SLP_STMT (stmt_info
));
1894 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1902 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1903 operations are supported. */
1906 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1908 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1909 slp_instance instance
;
1912 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
1914 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1915 SLP_INSTANCE_TREE (instance
)))
1917 vect_free_slp_instance (instance
);
1918 slp_instances
.ordered_remove (i
);
1924 if (!slp_instances
.length ())
1931 /* Compute the scalar cost of the SLP node NODE and its children
1932 and return it. Do not account defs that are marked in LIFE and
1933 update LIFE according to uses of NODE. */
1936 vect_bb_slp_scalar_cost (basic_block bb
,
1937 slp_tree node
, vec
<bool, va_heap
> *life
)
1939 unsigned scalar_cost
= 0;
1944 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1947 ssa_op_iter op_iter
;
1948 def_operand_p def_p
;
1949 stmt_vec_info stmt_info
;
1954 /* If there is a non-vectorized use of the defs then the scalar
1955 stmt is kept live in which case we do not account it or any
1956 required defs in the SLP children in the scalar cost. This
1957 way we make the vectorization more costly when compared to
1959 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
1961 imm_use_iterator use_iter
;
1963 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
1964 if (gimple_code (use_stmt
) == GIMPLE_PHI
1965 || gimple_bb (use_stmt
) != bb
1966 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt
)))
1969 BREAK_FROM_IMM_USE_STMT (use_iter
);
1975 stmt_info
= vinfo_for_stmt (stmt
);
1976 if (STMT_VINFO_DATA_REF (stmt_info
))
1978 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1979 stmt_cost
= vect_get_stmt_cost (scalar_load
);
1981 stmt_cost
= vect_get_stmt_cost (scalar_store
);
1984 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
1986 scalar_cost
+= stmt_cost
;
1989 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1990 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
1995 /* Check if vectorization of the basic block is profitable. */
1998 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2000 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2001 slp_instance instance
;
2003 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2004 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2005 void *target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
2006 stmt_vec_info stmt_info
= NULL
;
2007 stmt_vector_for_cost body_cost_vec
;
2008 stmt_info_for_cost
*ci
;
2010 /* Calculate vector costs. */
2011 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2013 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2015 FOR_EACH_VEC_ELT (body_cost_vec
, j
, ci
)
2017 stmt_info
= ci
->stmt
? vinfo_for_stmt (ci
->stmt
) : NULL
;
2018 (void) add_stmt_cost (target_cost_data
, ci
->count
, ci
->kind
,
2019 stmt_info
, ci
->misalign
, vect_body
);
2023 /* Calculate scalar cost. */
2024 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2026 stack_vec
<bool, 20> life
;
2027 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2028 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2029 SLP_INSTANCE_TREE (instance
),
2033 /* Complete the target-specific cost calculation. */
2034 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2035 &vec_inside_cost
, &vec_epilogue_cost
);
2037 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2039 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2042 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2044 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2045 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2046 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2049 /* Vectorization is profitable if its cost is less than the cost of scalar
2051 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2057 /* Check if the basic block can be vectorized. */
2060 vect_slp_analyze_bb_1 (basic_block bb
)
2062 bb_vec_info bb_vinfo
;
2063 vec
<slp_instance
> slp_instances
;
2064 slp_instance instance
;
2068 bb_vinfo
= new_bb_vec_info (bb
);
2072 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
2074 if (dump_enabled_p ())
2075 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2076 "not vectorized: unhandled data-ref in basic "
2079 destroy_bb_vec_info (bb_vinfo
);
2083 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2085 if (dump_enabled_p ())
2086 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2087 "not vectorized: not enough data-refs in "
2090 destroy_bb_vec_info (bb_vinfo
);
2094 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2096 if (dump_enabled_p ())
2097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2098 "not vectorized: unhandled data access in "
2101 destroy_bb_vec_info (bb_vinfo
);
2105 vect_pattern_recog (NULL
, bb_vinfo
);
2107 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2111 "not vectorized: unhandled data dependence "
2112 "in basic block.\n");
2114 destroy_bb_vec_info (bb_vinfo
);
2118 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2120 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2122 "not vectorized: bad data alignment in basic "
2125 destroy_bb_vec_info (bb_vinfo
);
2129 /* Check the SLP opportunities in the basic block, analyze and build SLP
2131 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2133 if (dump_enabled_p ())
2134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2135 "not vectorized: failed to find SLP opportunities "
2136 "in basic block.\n");
2138 destroy_bb_vec_info (bb_vinfo
);
2142 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2144 /* Mark all the statements that we want to vectorize as pure SLP and
2146 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2148 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2149 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2152 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2154 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2156 "not vectorized: unsupported alignment in basic "
2158 destroy_bb_vec_info (bb_vinfo
);
2162 if (!vect_slp_analyze_operations (bb_vinfo
))
2164 if (dump_enabled_p ())
2165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2166 "not vectorized: bad operation in basic block.\n");
2168 destroy_bb_vec_info (bb_vinfo
);
2172 /* Cost model: check if the vectorization is worthwhile. */
2173 if (!unlimited_cost_model ()
2174 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2178 "not vectorized: vectorization is not "
2181 destroy_bb_vec_info (bb_vinfo
);
2185 if (dump_enabled_p ())
2186 dump_printf_loc (MSG_NOTE
, vect_location
,
2187 "Basic block will be vectorized using SLP\n");
2194 vect_slp_analyze_bb (basic_block bb
)
2196 bb_vec_info bb_vinfo
;
2198 gimple_stmt_iterator gsi
;
2199 unsigned int vector_sizes
;
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2204 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2206 gimple stmt
= gsi_stmt (gsi
);
2207 if (!is_gimple_debug (stmt
)
2208 && !gimple_nop_p (stmt
)
2209 && gimple_code (stmt
) != GIMPLE_LABEL
)
2213 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2217 "not vectorized: too many instructions in "
2223 /* Autodetect first vector size we try. */
2224 current_vector_size
= 0;
2225 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2229 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2233 destroy_bb_vec_info (bb_vinfo
);
2235 vector_sizes
&= ~current_vector_size
;
2236 if (vector_sizes
== 0
2237 || current_vector_size
== 0)
2240 /* Try the next biggest vector size. */
2241 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_NOTE
, vect_location
,
2244 "***** Re-trying analysis with "
2245 "vector size %d\n", current_vector_size
);
2250 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2251 the number of created vector stmts depends on the unrolling factor).
2252 However, the actual number of vector stmts for every SLP node depends on
2253 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2254 should be updated. In this function we assume that the inside costs
2255 calculated in vect_model_xxx_cost are linear in ncopies. */
2258 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2260 unsigned int i
, j
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2261 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2262 slp_instance instance
;
2263 stmt_vector_for_cost body_cost_vec
;
2264 stmt_info_for_cost
*si
;
2265 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE
, vect_location
,
2269 "=== vect_update_slp_costs_according_to_vf ===\n");
2271 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2273 /* We assume that costs are linear in ncopies. */
2274 int ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2276 /* Record the instance's instructions in the target cost model.
2277 This was delayed until here because the count of instructions
2278 isn't known beforehand. */
2279 body_cost_vec
= SLP_INSTANCE_BODY_COST_VEC (instance
);
2281 FOR_EACH_VEC_ELT (body_cost_vec
, j
, si
)
2282 (void) add_stmt_cost (data
, si
->count
* ncopies
, si
->kind
,
2283 vinfo_for_stmt (si
->stmt
), si
->misalign
,
2289 /* For constant and loop invariant defs of SLP_NODE this function returns
2290 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2291 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2292 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2293 REDUC_INDEX is the index of the reduction operand in the statements, unless
2297 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2298 vec
<tree
> *vec_oprnds
,
2299 unsigned int op_num
, unsigned int number_of_vectors
,
2302 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2303 gimple stmt
= stmts
[0];
2304 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2308 unsigned j
, number_of_places_left_in_vector
;
2311 int group_size
= stmts
.length ();
2312 unsigned int vec_num
, i
;
2313 unsigned number_of_copies
= 1;
2315 voprnds
.create (number_of_vectors
);
2316 bool constant_p
, is_store
;
2317 tree neutral_op
= NULL
;
2318 enum tree_code code
= gimple_expr_code (stmt
);
2321 gimple_seq ctor_seq
= NULL
;
2323 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2324 && reduc_index
!= -1)
2326 op_num
= reduc_index
- 1;
2327 op
= gimple_op (stmt
, reduc_index
);
2328 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2329 we need either neutral operands or the original operands. See
2330 get_initial_def_for_reduction() for details. */
2333 case WIDEN_SUM_EXPR
:
2339 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2340 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2342 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2347 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2348 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2350 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2355 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2360 def_stmt
= SSA_NAME_DEF_STMT (op
);
2361 loop
= (gimple_bb (stmt
))->loop_father
;
2362 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2363 loop_preheader_edge (loop
));
2371 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2374 op
= gimple_assign_rhs1 (stmt
);
2381 if (CONSTANT_CLASS_P (op
))
2386 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2387 gcc_assert (vector_type
);
2388 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2390 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2391 created vectors. It is greater than 1 if unrolling is performed.
2393 For example, we have two scalar operands, s1 and s2 (e.g., group of
2394 strided accesses of size two), while NUNITS is four (i.e., four scalars
2395 of this type can be packed in a vector). The output vector will contain
2396 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2399 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2400 containing the operands.
2402 For example, NUNITS is four as before, and the group size is 8
2403 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2404 {s5, s6, s7, s8}. */
2406 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2408 number_of_places_left_in_vector
= nunits
;
2409 elts
= XALLOCAVEC (tree
, nunits
);
2410 for (j
= 0; j
< number_of_copies
; j
++)
2412 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2415 op
= gimple_assign_rhs1 (stmt
);
2421 if (op_num
== 0 || op_num
== 1)
2423 tree cond
= gimple_assign_rhs1 (stmt
);
2424 op
= TREE_OPERAND (cond
, op_num
);
2429 op
= gimple_assign_rhs2 (stmt
);
2431 op
= gimple_assign_rhs3 (stmt
);
2436 op
= gimple_call_arg (stmt
, op_num
);
2443 op
= gimple_op (stmt
, op_num
+ 1);
2444 /* Unlike the other binary operators, shifts/rotates have
2445 the shift count being int, instead of the same type as
2446 the lhs, so make sure the scalar is the right type if
2447 we are dealing with vectors of
2448 long long/long/short/char. */
2449 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2450 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2454 op
= gimple_op (stmt
, op_num
+ 1);
2459 if (reduc_index
!= -1)
2461 loop
= (gimple_bb (stmt
))->loop_father
;
2462 def_stmt
= SSA_NAME_DEF_STMT (op
);
2466 /* Get the def before the loop. In reduction chain we have only
2467 one initial value. */
2468 if ((j
!= (number_of_copies
- 1)
2469 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2474 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2475 loop_preheader_edge (loop
));
2478 /* Create 'vect_ = {op0,op1,...,opn}'. */
2479 number_of_places_left_in_vector
--;
2480 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2482 if (CONSTANT_CLASS_P (op
))
2484 op
= fold_unary (VIEW_CONVERT_EXPR
,
2485 TREE_TYPE (vector_type
), op
);
2486 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2491 = make_ssa_name (TREE_TYPE (vector_type
), NULL
);
2493 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
2496 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
2497 new_temp
, op
, NULL_TREE
);
2498 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2502 elts
[number_of_places_left_in_vector
] = op
;
2503 if (!CONSTANT_CLASS_P (op
))
2506 if (number_of_places_left_in_vector
== 0)
2508 number_of_places_left_in_vector
= nunits
;
2511 vec_cst
= build_vector (vector_type
, elts
);
2514 vec
<constructor_elt
, va_gc
> *v
;
2516 vec_alloc (v
, nunits
);
2517 for (k
= 0; k
< nunits
; ++k
)
2518 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2519 vec_cst
= build_constructor (vector_type
, v
);
2521 voprnds
.quick_push (vect_init_vector (stmt
, vec_cst
,
2522 vector_type
, NULL
));
2523 if (ctor_seq
!= NULL
)
2525 gimple init_stmt
= SSA_NAME_DEF_STMT (voprnds
.last ());
2526 gimple_stmt_iterator gsi
= gsi_for_stmt (init_stmt
);
2527 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2535 /* Since the vectors are created in the reverse order, we should invert
2537 vec_num
= voprnds
.length ();
2538 for (j
= vec_num
; j
!= 0; j
--)
2540 vop
= voprnds
[j
- 1];
2541 vec_oprnds
->quick_push (vop
);
2546 /* In case that VF is greater than the unrolling factor needed for the SLP
2547 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2548 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2549 to replicate the vectors. */
2550 while (number_of_vectors
> vec_oprnds
->length ())
2552 tree neutral_vec
= NULL
;
2557 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2559 vec_oprnds
->quick_push (neutral_vec
);
2563 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2564 vec_oprnds
->quick_push (vop
);
2570 /* Get vectorized definitions from SLP_NODE that contains corresponding
2571 vectorized def-stmts. */
2574 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2577 gimple vec_def_stmt
;
2580 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2582 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2584 gcc_assert (vec_def_stmt
);
2585 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2586 vec_oprnds
->quick_push (vec_oprnd
);
2591 /* Get vectorized definitions for SLP_NODE.
2592 If the scalar definitions are loop invariants or constants, collect them and
2593 call vect_get_constant_vectors() to create vector stmts.
2594 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2595 must be stored in the corresponding child of SLP_NODE, and we call
2596 vect_get_slp_vect_defs () to retrieve them. */
2599 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2600 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2603 int number_of_vects
= 0, i
;
2604 unsigned int child_index
= 0;
2605 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2606 slp_tree child
= NULL
;
2609 bool vectorized_defs
;
2611 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2612 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2614 /* For each operand we check if it has vectorized definitions in a child
2615 node or we need to create them (for invariants and constants). We
2616 check if the LHS of the first stmt of the next child matches OPRND.
2617 If it does, we found the correct child. Otherwise, we call
2618 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2619 to check this child node for the next operand. */
2620 vectorized_defs
= false;
2621 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2623 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
2625 /* We have to check both pattern and original def, if available. */
2626 gimple first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2627 gimple related
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2629 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2631 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2633 /* The number of vector defs is determined by the number of
2634 vector statements in the node from which we get those
2636 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2637 vectorized_defs
= true;
2642 if (!vectorized_defs
)
2646 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2647 /* Number of vector stmts was calculated according to LHS in
2648 vect_schedule_slp_instance (), fix it by replacing LHS with
2649 RHS, if necessary. See vect_get_smallest_scalar_type () for
2651 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2653 if (rhs_size_unit
!= lhs_size_unit
)
2655 number_of_vects
*= rhs_size_unit
;
2656 number_of_vects
/= lhs_size_unit
;
2661 /* Allocate memory for vectorized defs. */
2663 vec_defs
.create (number_of_vects
);
2665 /* For reduction defs we call vect_get_constant_vectors (), since we are
2666 looking for initial loop invariant values. */
2667 if (vectorized_defs
&& reduc_index
== -1)
2668 /* The defs are already vectorized. */
2669 vect_get_slp_vect_defs (child
, &vec_defs
);
2671 /* Build vectors from scalar defs. */
2672 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2673 number_of_vects
, reduc_index
);
2675 vec_oprnds
->quick_push (vec_defs
);
2677 /* For reductions, we only need initial values. */
2678 if (reduc_index
!= -1)
2684 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2685 building a vector of type MASK_TYPE from it) and two input vectors placed in
2686 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2687 shifting by STRIDE elements of DR_CHAIN for every copy.
2688 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2690 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2691 the created stmts must be inserted. */
2694 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2695 tree mask
, int first_vec_indx
, int second_vec_indx
,
2696 gimple_stmt_iterator
*gsi
, slp_tree node
,
2697 tree vectype
, vec
<tree
> dr_chain
,
2698 int ncopies
, int vect_stmts_counter
)
2701 gimple perm_stmt
= NULL
;
2702 stmt_vec_info next_stmt_info
;
2704 tree first_vec
, second_vec
, data_ref
;
2706 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2708 /* Initialize the vect stmts of NODE to properly insert the generated
2710 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
2711 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2712 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
2714 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2715 for (i
= 0; i
< ncopies
; i
++)
2717 first_vec
= dr_chain
[first_vec_indx
];
2718 second_vec
= dr_chain
[second_vec_indx
];
2720 /* Generate the permute statement. */
2721 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, perm_dest
,
2722 first_vec
, second_vec
, mask
);
2723 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2724 gimple_set_lhs (perm_stmt
, data_ref
);
2725 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2727 /* Store the vector statement in NODE. */
2728 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
2730 first_vec_indx
+= stride
;
2731 second_vec_indx
+= stride
;
2734 /* Mark the scalar stmt as vectorized. */
2735 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2736 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2740 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2741 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2742 representation. Check that the mask is valid and return FALSE if not.
2743 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2744 the next vector, i.e., the current first vector is not needed. */
2747 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2748 int mask_nunits
, bool only_one_vec
, int index
,
2749 unsigned char *mask
, int *current_mask_element
,
2750 bool *need_next_vector
, int *number_of_mask_fixes
,
2751 bool *mask_fixed
, bool *needs_first_vector
)
2755 /* Convert to target specific representation. */
2756 *current_mask_element
= first_mask_element
+ m
;
2757 /* Adjust the value in case it's a mask for second and third vectors. */
2758 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2760 if (*current_mask_element
< mask_nunits
)
2761 *needs_first_vector
= true;
2763 /* We have only one input vector to permute but the mask accesses values in
2764 the next vector as well. */
2765 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2767 if (dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2770 "permutation requires at least two vectors ");
2771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2772 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2778 /* The mask requires the next vector. */
2779 if (*current_mask_element
>= mask_nunits
* 2)
2781 if (*needs_first_vector
|| *mask_fixed
)
2783 /* We either need the first vector too or have already moved to the
2784 next vector. In both cases, this permutation needs three
2786 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2789 "permutation requires at "
2790 "least three vectors ");
2791 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2792 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2798 /* We move to the next vector, dropping the first one and working with
2799 the second and the third - we need to adjust the values of the mask
2801 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2803 for (i
= 0; i
< index
; i
++)
2804 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2806 (*number_of_mask_fixes
)++;
2810 *need_next_vector
= *mask_fixed
;
2812 /* This was the last element of this mask. Start a new one. */
2813 if (index
== mask_nunits
- 1)
2815 *number_of_mask_fixes
= 1;
2816 *mask_fixed
= false;
2817 *needs_first_vector
= false;
2824 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2825 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2826 permute statements for the SLP node NODE of the SLP instance
2827 SLP_NODE_INSTANCE. */
2830 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
2831 gimple_stmt_iterator
*gsi
, int vf
,
2832 slp_instance slp_node_instance
, bool analyze_only
)
2834 gimple stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2835 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2836 tree mask_element_type
= NULL_TREE
, mask_type
;
2837 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2838 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2839 gimple next_scalar_stmt
;
2840 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2841 int first_mask_element
;
2842 int index
, unroll_factor
, current_mask_element
, ncopies
;
2843 unsigned char *mask
;
2844 bool only_one_vec
= false, need_next_vector
= false;
2845 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2846 int number_of_mask_fixes
= 1;
2847 bool mask_fixed
= false;
2848 bool needs_first_vector
= false;
2849 enum machine_mode mode
;
2851 mode
= TYPE_MODE (vectype
);
2853 if (!can_vec_perm_p (mode
, false, NULL
))
2855 if (dump_enabled_p ())
2857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2858 "no vect permute for ");
2859 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2860 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2865 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2866 same size as the vector element being permuted. */
2867 mask_element_type
= lang_hooks
.types
.type_for_mode
2868 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
2869 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2870 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2871 mask
= XALLOCAVEC (unsigned char, nunits
);
2872 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2874 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2875 unrolling factor. */
2876 orig_vec_stmts_num
= group_size
*
2877 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2878 if (orig_vec_stmts_num
== 1)
2879 only_one_vec
= true;
2881 /* Number of copies is determined by the final vectorization factor
2882 relatively to SLP_NODE_INSTANCE unrolling factor. */
2883 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2885 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2888 /* Generate permutation masks for every NODE. Number of masks for each NODE
2889 is equal to GROUP_SIZE.
2890 E.g., we have a group of three nodes with three loads from the same
2891 location in each node, and the vector size is 4. I.e., we have a
2892 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2893 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2894 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2897 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2898 The last mask is illegal since we assume two operands for permute
2899 operation, and the mask element values can't be outside that range.
2900 Hence, the last mask must be converted into {2,5,5,5}.
2901 For the first two permutations we need the first and the second input
2902 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2903 we need the second and the third vectors: {b1,c1,a2,b2} and
2909 vect_stmts_counter
= 0;
2911 first_vec_index
= vec_index
++;
2913 second_vec_index
= first_vec_index
;
2915 second_vec_index
= vec_index
++;
2917 for (j
= 0; j
< unroll_factor
; j
++)
2919 for (k
= 0; k
< group_size
; k
++)
2921 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
2922 first_mask_element
= i
+ j
* group_size
;
2923 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2924 nunits
, only_one_vec
, index
,
2925 mask
, ¤t_mask_element
,
2927 &number_of_mask_fixes
, &mask_fixed
,
2928 &needs_first_vector
))
2930 mask
[index
++] = current_mask_element
;
2932 if (index
== nunits
)
2935 if (!can_vec_perm_p (mode
, false, mask
))
2937 if (dump_enabled_p ())
2939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2941 "unsupported vect permute { ");
2942 for (i
= 0; i
< nunits
; ++i
)
2943 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
2945 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
2953 tree mask_vec
, *mask_elts
;
2954 mask_elts
= XALLOCAVEC (tree
, nunits
);
2955 for (l
= 0; l
< nunits
; ++l
)
2956 mask_elts
[l
] = build_int_cst (mask_element_type
,
2958 mask_vec
= build_vector (mask_type
, mask_elts
);
2960 if (need_next_vector
)
2962 first_vec_index
= second_vec_index
;
2963 second_vec_index
= vec_index
;
2967 = SLP_TREE_SCALAR_STMTS (node
)[scalar_index
++];
2969 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2970 mask_vec
, first_vec_index
, second_vec_index
,
2971 gsi
, node
, vectype
, dr_chain
,
2972 ncopies
, vect_stmts_counter
++);
2984 /* Vectorize SLP instance tree in postorder. */
2987 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2988 unsigned int vectorization_factor
)
2991 bool grouped_store
, is_store
;
2992 gimple_stmt_iterator si
;
2993 stmt_vec_info stmt_info
;
2994 unsigned int vec_stmts_size
, nunits
, group_size
;
3002 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3003 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3005 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3006 stmt_info
= vinfo_for_stmt (stmt
);
3008 /* VECTYPE is the type of the destination. */
3009 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3010 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3011 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3013 /* For each SLP instance calculate number of vector stmts to be created
3014 for the scalar stmts in each node of the SLP tree. Number of vector
3015 elements in one vector iteration is the number of scalar elements in
3016 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3018 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3020 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3022 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3023 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3026 if (dump_enabled_p ())
3028 dump_printf_loc (MSG_NOTE
,vect_location
,
3029 "------>vectorizing SLP node starting from: ");
3030 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3031 dump_printf (MSG_NOTE
, "\n");
3034 /* Loads should be inserted before the first load. */
3035 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
3036 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3037 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
3038 && SLP_TREE_LOAD_PERMUTATION (node
).exists ())
3039 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
3040 else if (is_pattern_stmt_p (stmt_info
))
3041 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
3043 si
= gsi_for_stmt (stmt
);
3045 /* Stores should be inserted just before the last store. */
3046 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3047 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
3049 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
3050 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
3051 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
3052 si
= gsi_for_stmt (last_store
);
3055 /* Mark the first element of the reduction chain as reduction to properly
3056 transform the node. In the analysis phase only the last element of the
3057 chain is marked as reduction. */
3058 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3059 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3061 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3062 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3065 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3069 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3070 For loop vectorization this is done in vectorizable_call, but for SLP
3071 it needs to be deferred until end of vect_schedule_slp, because multiple
3072 SLP instances may refer to the same scalar stmt. */
3075 vect_remove_slp_scalar_calls (slp_tree node
)
3077 gimple stmt
, new_stmt
;
3078 gimple_stmt_iterator gsi
;
3082 stmt_vec_info stmt_info
;
3087 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3088 vect_remove_slp_scalar_calls (child
);
3090 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3092 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3094 stmt_info
= vinfo_for_stmt (stmt
);
3095 if (stmt_info
== NULL
3096 || is_pattern_stmt_p (stmt_info
)
3097 || !PURE_SLP_STMT (stmt_info
))
3099 lhs
= gimple_call_lhs (stmt
);
3100 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3101 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3102 set_vinfo_for_stmt (stmt
, NULL
);
3103 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3104 gsi
= gsi_for_stmt (stmt
);
3105 gsi_replace (&gsi
, new_stmt
, false);
3106 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3110 /* Generate vector code for all SLP instances in the loop/basic block. */
3113 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
3115 vec
<slp_instance
> slp_instances
;
3116 slp_instance instance
;
3118 bool is_store
= false;
3122 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3123 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3127 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3131 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3133 /* Schedule the tree of INSTANCE. */
3134 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3136 if (dump_enabled_p ())
3137 dump_printf_loc (MSG_NOTE
, vect_location
,
3138 "vectorizing stmts using SLP.\n");
3141 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3143 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3146 gimple_stmt_iterator gsi
;
3148 /* Remove scalar call stmts. Do not do this for basic-block
3149 vectorization as not all uses may be vectorized.
3150 ??? Why should this be necessary? DCE should be able to
3151 remove the stmts itself.
3152 ??? For BB vectorization we can as well remove scalar
3153 stmts starting from the SLP tree root if they have no
3156 vect_remove_slp_scalar_calls (root
);
3158 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3159 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3161 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3164 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3165 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3166 /* Free the attached stmt_vec_info and remove the stmt. */
3167 gsi
= gsi_for_stmt (store
);
3168 unlink_stmt_vdef (store
);
3169 gsi_remove (&gsi
, true);
3170 release_defs (store
);
3171 free_stmt_vec_info (store
);
3179 /* Vectorize the basic block. */
3182 vect_slp_transform_bb (basic_block bb
)
3184 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3185 gimple_stmt_iterator si
;
3187 gcc_assert (bb_vinfo
);
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3192 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3194 gimple stmt
= gsi_stmt (si
);
3195 stmt_vec_info stmt_info
;
3197 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_NOTE
, vect_location
,
3200 "------>SLPing statement: ");
3201 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3202 dump_printf (MSG_NOTE
, "\n");
3205 stmt_info
= vinfo_for_stmt (stmt
);
3206 gcc_assert (stmt_info
);
3208 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3209 if (STMT_SLP_TYPE (stmt_info
))
3211 vect_schedule_slp (NULL
, bb_vinfo
);
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE
, vect_location
,
3218 "BASIC BLOCK VECTORIZED\n");
3220 destroy_bb_vec_info (bb_vinfo
);