1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb
)
50 gimple_stmt_iterator si
;
55 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
58 if (gimple_location (stmt
) != UNKNOWN_LOC
)
59 return gimple_location (stmt
);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node
)
77 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
78 vect_free_slp_tree ((slp_tree
) child
);
80 VEC_free (slp_void_p
, heap
, SLP_TREE_CHILDREN (node
));
81 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
83 if (SLP_TREE_VEC_STMTS (node
))
84 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
90 /* Free the memory allocated for the SLP instance. */
93 vect_free_slp_instance (slp_instance instance
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
96 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
97 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (VEC (gimple
, heap
) *scalar_stmts
)
107 gimple stmt
= VEC_index (gimple
, scalar_stmts
, 0);
110 if (is_gimple_call (stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (is_gimple_assign (stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
) = NULL
;
124 SLP_TREE_CHILDREN (node
) = VEC_alloc (slp_void_p
, heap
, nops
);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 static VEC (slp_oprnd_info
, heap
) *
135 vect_create_oprnd_info (int nops
, int group_size
)
138 slp_oprnd_info oprnd_info
;
139 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
141 oprnds_info
= VEC_alloc (slp_oprnd_info
, heap
, nops
);
142 for (i
= 0; i
< nops
; i
++)
144 oprnd_info
= XNEW (struct _slp_oprnd_info
);
145 oprnd_info
->def_stmts
= VEC_alloc (gimple
, heap
, group_size
);
146 oprnd_info
->first_dt
= vect_uninitialized_def
;
147 oprnd_info
->first_def_type
= NULL_TREE
;
148 oprnd_info
->first_const_oprnd
= NULL_TREE
;
149 oprnd_info
->first_pattern
= false;
150 VEC_quick_push (slp_oprnd_info
, oprnds_info
, oprnd_info
);
157 /* Free operands info. */
160 vect_free_oprnd_info (VEC (slp_oprnd_info
, heap
) **oprnds_info
)
163 slp_oprnd_info oprnd_info
;
165 FOR_EACH_VEC_ELT (slp_oprnd_info
, *oprnds_info
, i
, oprnd_info
)
167 VEC_free (gimple
, heap
, oprnd_info
->def_stmts
);
168 XDELETE (oprnd_info
);
171 VEC_free (slp_oprnd_info
, heap
, *oprnds_info
);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
181 slp_tree slp_node
, gimple stmt
,
182 int ncopies_for_cost
, bool first
,
183 VEC (slp_oprnd_info
, heap
) **oprnds_info
)
186 unsigned int i
, number_of_oprnds
;
187 tree def
, def_op0
= NULL_TREE
;
189 enum vect_def_type dt
= vect_uninitialized_def
;
190 enum vect_def_type dt_op0
= vect_uninitialized_def
;
191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
192 tree lhs
= gimple_get_lhs (stmt
);
193 struct loop
*loop
= NULL
;
194 enum tree_code rhs_code
;
195 bool different_types
= false;
196 bool pattern
= false;
197 slp_oprnd_info oprnd_info
, oprnd0_info
, oprnd1_info
;
199 tree compare_rhs
= NULL_TREE
;
202 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
204 if (is_gimple_call (stmt
))
206 number_of_oprnds
= gimple_call_num_args (stmt
);
209 else if (is_gimple_assign (stmt
))
211 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
212 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
218 for (i
= 0; i
< number_of_oprnds
; i
++)
223 compare_rhs
= NULL_TREE
;
226 oprnd
= gimple_op (stmt
, op_idx
++);
228 oprnd_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, i
);
230 if (COMPARISON_CLASS_P (oprnd
))
232 compare_rhs
= TREE_OPERAND (oprnd
, 1);
233 oprnd
= TREE_OPERAND (oprnd
, 0);
236 if (!vect_is_simple_use (oprnd
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
238 || (!def_stmt
&& dt
!= vect_constant_def
))
240 if (vect_print_dump_info (REPORT_SLP
))
242 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
252 if (def_stmt
&& gimple_bb (def_stmt
)
253 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
254 || (!loop
&& gimple_bb (def_stmt
) == BB_VINFO_BB (bb_vinfo
)
255 && gimple_code (def_stmt
) != GIMPLE_PHI
))
256 && vinfo_for_stmt (def_stmt
)
257 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
258 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
259 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
262 if (!first
&& !oprnd_info
->first_pattern
)
264 if (vect_print_dump_info (REPORT_DETAILS
))
266 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
267 " are in a pattern, and others are not ");
268 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
274 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
275 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
277 if (dt
== vect_unknown_def_type
)
279 if (vect_print_dump_info (REPORT_DETAILS
))
280 fprintf (vect_dump
, "Unsupported pattern.");
284 switch (gimple_code (def_stmt
))
287 def
= gimple_phi_result (def_stmt
);
291 def
= gimple_assign_lhs (def_stmt
);
295 if (vect_print_dump_info (REPORT_DETAILS
))
296 fprintf (vect_dump
, "unsupported defining stmt: ");
303 oprnd_info
->first_dt
= dt
;
304 oprnd_info
->first_pattern
= pattern
;
307 oprnd_info
->first_def_type
= TREE_TYPE (def
);
308 oprnd_info
->first_const_oprnd
= NULL_TREE
;
312 oprnd_info
->first_def_type
= NULL_TREE
;
313 oprnd_info
->first_const_oprnd
= oprnd
;
320 /* Analyze costs (for the first stmt of the group only). */
321 if (REFERENCE_CLASS_P (lhs
))
323 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
327 enum vect_def_type dts
[2];
329 dts
[1] = vect_uninitialized_def
;
330 /* Not memory operation (we don't call this function for
332 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, dts
,
339 /* Not first stmt of the group, check that the def-stmt/s match
340 the def-stmt/s of the first stmt. Allow different definition
341 types for reduction chains: the first stmt must be a
342 vect_reduction_def (a phi node), and the rest
343 vect_internal_def. */
344 if (((oprnd_info
->first_dt
!= dt
345 && !(oprnd_info
->first_dt
== vect_reduction_def
346 && dt
== vect_internal_def
))
347 || (oprnd_info
->first_def_type
!= NULL_TREE
349 && !types_compatible_p (oprnd_info
->first_def_type
,
352 && !types_compatible_p (TREE_TYPE (oprnd_info
->first_const_oprnd
),
356 if (number_of_oprnds
!= 2)
358 if (vect_print_dump_info (REPORT_SLP
))
359 fprintf (vect_dump
, "Build SLP failed: different types ");
364 /* Try to swap operands in case of binary operation. */
366 different_types
= true;
369 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
370 if (is_gimple_assign (stmt
)
371 && (rhs_code
= gimple_assign_rhs_code (stmt
))
372 && TREE_CODE_CLASS (rhs_code
) == tcc_binary
373 && commutative_tree_code (rhs_code
)
374 && oprnd0_info
->first_dt
== dt
375 && oprnd_info
->first_dt
== dt_op0
377 && !(oprnd0_info
->first_def_type
378 && !types_compatible_p (oprnd0_info
->first_def_type
,
380 && !(oprnd_info
->first_def_type
381 && !types_compatible_p (oprnd_info
->first_def_type
,
382 TREE_TYPE (def_op0
))))
384 if (vect_print_dump_info (REPORT_SLP
))
386 fprintf (vect_dump
, "Swapping operands of ");
387 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
390 swap_tree_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
391 gimple_assign_rhs2_ptr (stmt
));
395 if (vect_print_dump_info (REPORT_SLP
))
396 fprintf (vect_dump
, "Build SLP failed: different types ");
404 /* Check the types of the definitions. */
407 case vect_constant_def
:
408 case vect_external_def
:
409 case vect_reduction_def
:
412 case vect_internal_def
:
415 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
416 oprnd1_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
418 VEC_quick_push (gimple
, oprnd1_info
->def_stmts
, def_stmt
);
420 VEC_quick_push (gimple
, oprnd0_info
->def_stmts
, def_stmt
);
423 VEC_quick_push (gimple
, oprnd_info
->def_stmts
, def_stmt
);
428 /* FORNOW: Not supported. */
429 if (vect_print_dump_info (REPORT_SLP
))
431 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
432 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
443 /* Recursively build an SLP tree starting from NODE.
444 Fail (and return FALSE) if def-stmts are not isomorphic, require data
445 permutation or are of unsupported types of operation. Otherwise, return
449 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
450 slp_tree
*node
, unsigned int group_size
,
451 int *inside_cost
, int *outside_cost
,
452 int ncopies_for_cost
, unsigned int *max_nunits
,
453 VEC (int, heap
) **load_permutation
,
454 VEC (slp_tree
, heap
) **loads
,
455 unsigned int vectorization_factor
, bool *loads_permuted
)
458 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
459 gimple stmt
= VEC_index (gimple
, stmts
, 0);
460 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
461 enum tree_code first_cond_code
= ERROR_MARK
;
463 bool stop_recursion
= false, need_same_oprnds
= false;
464 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
465 unsigned int ncopies
;
468 enum machine_mode optab_op2_mode
;
469 enum machine_mode vec_mode
;
470 struct data_reference
*first_dr
;
472 bool permutation
= false;
473 unsigned int load_place
;
474 gimple first_load
, prev_first_load
= NULL
;
475 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
477 slp_oprnd_info oprnd_info
;
480 if (is_gimple_call (stmt
))
481 nops
= gimple_call_num_args (stmt
);
482 else if (is_gimple_assign (stmt
))
484 nops
= gimple_num_ops (stmt
) - 1;
485 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
491 oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
493 /* For every stmt in NODE find its def stmt/s. */
494 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
496 if (vect_print_dump_info (REPORT_SLP
))
498 fprintf (vect_dump
, "Build SLP for ");
499 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
502 /* Fail to vectorize statements marked as unvectorizable. */
503 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
505 if (vect_print_dump_info (REPORT_SLP
))
508 "Build SLP failed: unvectorizable statement ");
509 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
512 vect_free_oprnd_info (&oprnds_info
);
516 lhs
= gimple_get_lhs (stmt
);
517 if (lhs
== NULL_TREE
)
519 if (vect_print_dump_info (REPORT_SLP
))
522 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
523 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
526 vect_free_oprnd_info (&oprnds_info
);
530 if (is_gimple_assign (stmt
)
531 && gimple_assign_rhs_code (stmt
) == COND_EXPR
532 && (cond
= gimple_assign_rhs1 (stmt
))
533 && !COMPARISON_CLASS_P (cond
))
535 if (vect_print_dump_info (REPORT_SLP
))
538 "Build SLP failed: condition is not comparison ");
539 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
542 vect_free_oprnd_info (&oprnds_info
);
546 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
547 vectype
= get_vectype_for_scalar_type (scalar_type
);
550 if (vect_print_dump_info (REPORT_SLP
))
552 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
553 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
556 vect_free_oprnd_info (&oprnds_info
);
560 /* In case of multiple types we need to detect the smallest type. */
561 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
563 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
565 vectorization_factor
= *max_nunits
;
568 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
570 if (is_gimple_call (stmt
))
572 rhs_code
= CALL_EXPR
;
573 if (gimple_call_internal_p (stmt
)
574 || gimple_call_tail_p (stmt
)
575 || gimple_call_noreturn_p (stmt
)
576 || !gimple_call_nothrow_p (stmt
)
577 || gimple_call_chain (stmt
))
579 if (vect_print_dump_info (REPORT_SLP
))
582 "Build SLP failed: unsupported call type ");
583 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
586 vect_free_oprnd_info (&oprnds_info
);
591 rhs_code
= gimple_assign_rhs_code (stmt
);
593 /* Check the operation. */
596 first_stmt_code
= rhs_code
;
598 /* Shift arguments should be equal in all the packed stmts for a
599 vector shift with scalar shift operand. */
600 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
601 || rhs_code
== LROTATE_EXPR
602 || rhs_code
== RROTATE_EXPR
)
604 vec_mode
= TYPE_MODE (vectype
);
606 /* First see if we have a vector/vector shift. */
607 optab
= optab_for_tree_code (rhs_code
, vectype
,
611 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
613 /* No vector/vector shift, try for a vector/scalar shift. */
614 optab
= optab_for_tree_code (rhs_code
, vectype
,
619 if (vect_print_dump_info (REPORT_SLP
))
620 fprintf (vect_dump
, "Build SLP failed: no optab.");
621 vect_free_oprnd_info (&oprnds_info
);
624 icode
= (int) optab_handler (optab
, vec_mode
);
625 if (icode
== CODE_FOR_nothing
)
627 if (vect_print_dump_info (REPORT_SLP
))
628 fprintf (vect_dump
, "Build SLP failed: "
629 "op not supported by target.");
630 vect_free_oprnd_info (&oprnds_info
);
633 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
634 if (!VECTOR_MODE_P (optab_op2_mode
))
636 need_same_oprnds
= true;
637 first_op1
= gimple_assign_rhs2 (stmt
);
641 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
643 need_same_oprnds
= true;
644 first_op1
= gimple_assign_rhs2 (stmt
);
649 if (first_stmt_code
!= rhs_code
650 && (first_stmt_code
!= IMAGPART_EXPR
651 || rhs_code
!= REALPART_EXPR
)
652 && (first_stmt_code
!= REALPART_EXPR
653 || rhs_code
!= IMAGPART_EXPR
)
654 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
655 && (first_stmt_code
== ARRAY_REF
656 || first_stmt_code
== INDIRECT_REF
657 || first_stmt_code
== COMPONENT_REF
658 || first_stmt_code
== MEM_REF
)))
660 if (vect_print_dump_info (REPORT_SLP
))
663 "Build SLP failed: different operation in stmt ");
664 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
667 vect_free_oprnd_info (&oprnds_info
);
672 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
674 if (vect_print_dump_info (REPORT_SLP
))
677 "Build SLP failed: different shift arguments in ");
678 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
681 vect_free_oprnd_info (&oprnds_info
);
685 if (rhs_code
== CALL_EXPR
)
687 gimple first_stmt
= VEC_index (gimple
, stmts
, 0);
688 if (gimple_call_num_args (stmt
) != nops
689 || !operand_equal_p (gimple_call_fn (first_stmt
),
690 gimple_call_fn (stmt
), 0)
691 || gimple_call_fntype (first_stmt
)
692 != gimple_call_fntype (stmt
))
694 if (vect_print_dump_info (REPORT_SLP
))
697 "Build SLP failed: different calls in ");
698 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
701 vect_free_oprnd_info (&oprnds_info
);
707 /* Strided store or load. */
708 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
710 if (REFERENCE_CLASS_P (lhs
))
713 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
714 stmt
, ncopies_for_cost
,
715 (i
== 0), &oprnds_info
))
717 vect_free_oprnd_info (&oprnds_info
);
724 /* FORNOW: Check that there is no gap between the loads. */
725 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
727 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
728 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
730 if (vect_print_dump_info (REPORT_SLP
))
732 fprintf (vect_dump
, "Build SLP failed: strided "
734 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
737 vect_free_oprnd_info (&oprnds_info
);
741 /* Check that the size of interleaved loads group is not
742 greater than the SLP group size. */
744 && GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
746 if (vect_print_dump_info (REPORT_SLP
))
748 fprintf (vect_dump
, "Build SLP failed: the number of "
749 "interleaved loads is greater than"
750 " the SLP group size ");
751 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
754 vect_free_oprnd_info (&oprnds_info
);
758 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
761 /* Check that there are no loads from different interleaving
762 chains in the same node. The only exception is complex
764 if (prev_first_load
!= first_load
765 && rhs_code
!= REALPART_EXPR
766 && rhs_code
!= IMAGPART_EXPR
)
768 if (vect_print_dump_info (REPORT_SLP
))
770 fprintf (vect_dump
, "Build SLP failed: different "
771 "interleaving chains in one node ");
772 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
775 vect_free_oprnd_info (&oprnds_info
);
780 prev_first_load
= first_load
;
782 if (first_load
== stmt
)
784 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
785 if (vect_supportable_dr_alignment (first_dr
, false)
786 == dr_unaligned_unsupported
)
788 if (vect_print_dump_info (REPORT_SLP
))
790 fprintf (vect_dump
, "Build SLP failed: unsupported "
792 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
795 vect_free_oprnd_info (&oprnds_info
);
799 /* Analyze costs (for the first stmt in the group). */
800 vect_model_load_cost (vinfo_for_stmt (stmt
),
801 ncopies_for_cost
, false, *node
);
804 /* Store the place of this load in the interleaving chain. In
805 case that permutation is needed we later decide if a specific
806 permutation is supported. */
807 load_place
= vect_get_place_in_interleaving_chain (stmt
,
812 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
814 /* We stop the tree when we reach a group of loads. */
815 stop_recursion
= true;
818 } /* Strided access. */
821 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
823 /* Not strided load. */
824 if (vect_print_dump_info (REPORT_SLP
))
826 fprintf (vect_dump
, "Build SLP failed: not strided load ");
827 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
830 /* FORNOW: Not strided loads are not supported. */
831 vect_free_oprnd_info (&oprnds_info
);
835 /* Not memory operation. */
836 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
837 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
838 && rhs_code
!= COND_EXPR
839 && rhs_code
!= CALL_EXPR
)
841 if (vect_print_dump_info (REPORT_SLP
))
843 fprintf (vect_dump
, "Build SLP failed: operation");
844 fprintf (vect_dump
, " unsupported ");
845 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
848 vect_free_oprnd_info (&oprnds_info
);
852 if (rhs_code
== COND_EXPR
)
854 tree cond_expr
= gimple_assign_rhs1 (stmt
);
857 first_cond_code
= TREE_CODE (cond_expr
);
858 else if (first_cond_code
!= TREE_CODE (cond_expr
))
860 if (vect_print_dump_info (REPORT_SLP
))
862 fprintf (vect_dump
, "Build SLP failed: different"
864 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
867 vect_free_oprnd_info (&oprnds_info
);
872 /* Find the def-stmts. */
873 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
874 ncopies_for_cost
, (i
== 0),
877 vect_free_oprnd_info (&oprnds_info
);
883 /* Add the costs of the node to the overall instance costs. */
884 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
885 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
887 /* Strided loads were reached - stop the recursion. */
890 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
894 *loads_permuted
= true;
896 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
901 /* We don't check here complex numbers chains, so we set
902 LOADS_PERMUTED for further check in
903 vect_supported_load_permutation_p. */
904 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
905 *loads_permuted
= true;
908 vect_free_oprnd_info (&oprnds_info
);
912 /* Create SLP_TREE nodes for the definition node/s. */
913 FOR_EACH_VEC_ELT (slp_oprnd_info
, oprnds_info
, i
, oprnd_info
)
917 if (oprnd_info
->first_dt
!= vect_internal_def
)
920 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
922 || !vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
, group_size
,
923 inside_cost
, outside_cost
, ncopies_for_cost
,
924 max_nunits
, load_permutation
, loads
,
925 vectorization_factor
, loads_permuted
))
928 oprnd_info
->def_stmts
= NULL
;
929 vect_free_slp_tree (child
);
930 vect_free_oprnd_info (&oprnds_info
);
934 oprnd_info
->def_stmts
= NULL
;
935 VEC_quick_push (slp_void_p
, SLP_TREE_CHILDREN (*node
), child
);
938 vect_free_oprnd_info (&oprnds_info
);
944 vect_print_slp_tree (slp_tree node
)
953 fprintf (vect_dump
, "node ");
954 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
956 fprintf (vect_dump
, "\n\tstmt %d ", i
);
957 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
959 fprintf (vect_dump
, "\n");
961 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
962 vect_print_slp_tree ((slp_tree
) child
);
966 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
967 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
968 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
969 stmts in NODE are to be marked. */
972 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
981 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
983 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
985 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
986 vect_mark_slp_stmts ((slp_tree
) child
, mark
, j
);
990 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
993 vect_mark_slp_stmts_relevant (slp_tree node
)
997 stmt_vec_info stmt_info
;
1003 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1005 stmt_info
= vinfo_for_stmt (stmt
);
1006 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1007 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1008 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1011 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1012 vect_mark_slp_stmts_relevant ((slp_tree
) child
);
1016 /* Check if the permutation required by the SLP INSTANCE is supported.
1017 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1020 vect_supported_slp_permutation_p (slp_instance instance
)
1022 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
1023 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1024 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
1025 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
1027 slp_tree
*tmp_loads
= NULL
;
1028 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
1031 /* FORNOW: The only supported loads permutation is loads from the same
1032 location in all the loads in the node, when the data-refs in
1033 nodes of LOADS constitute an interleaving chain.
1034 Sort the nodes according to the order of accesses in the chain. */
1035 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
1037 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
1038 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
1039 i
+= group_size
, j
++)
1041 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
1042 /* Check that the loads are all in the same interleaving chain. */
1043 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
1045 if (vect_print_dump_info (REPORT_DETAILS
))
1047 fprintf (vect_dump
, "Build SLP failed: unsupported data "
1049 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
1056 tmp_loads
[index
] = load
;
1059 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1060 for (i
= 0; i
< group_size
; i
++)
1061 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
1063 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
1064 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
1067 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
1068 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
1076 /* Rearrange the statements of NODE according to PERMUTATION. */
1079 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1080 VEC (int, heap
) *permutation
)
1083 VEC (gimple
, heap
) *tmp_stmts
;
1084 unsigned int index
, i
;
1090 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1091 vect_slp_rearrange_stmts ((slp_tree
) child
, group_size
, permutation
);
1093 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
1094 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1096 for (i
= 0; i
< group_size
; i
++)
1097 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
1099 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1101 index
= VEC_index (int, permutation
, i
);
1102 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
1105 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
1106 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1110 /* Check if the required load permutation is supported.
1111 LOAD_PERMUTATION contains a list of indices of the loads.
1112 In SLP this permutation is relative to the order of strided stores that are
1113 the base of the SLP instance. */
1116 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
1117 VEC (int, heap
) *load_permutation
)
1119 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
1120 bool supported
, bad_permutation
= false;
1122 slp_tree node
, other_complex_node
;
1123 gimple stmt
, first
= NULL
, other_node_first
, load
, next_load
, first_load
;
1124 unsigned complex_numbers
= 0;
1125 struct data_reference
*dr
;
1126 bb_vec_info bb_vinfo
;
1128 /* FORNOW: permutations are only supported in SLP. */
1132 if (vect_print_dump_info (REPORT_SLP
))
1134 fprintf (vect_dump
, "Load permutation ");
1135 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
1136 fprintf (vect_dump
, "%d ", next
);
1139 /* In case of reduction every load permutation is allowed, since the order
1140 of the reduction statements is not important (as opposed to the case of
1141 strided stores). The only condition we need to check is that all the
1142 load nodes are of the same size and have the same permutation (and then
1143 rearrange all the nodes of the SLP instance according to this
1146 /* Check that all the load nodes are of the same size. */
1147 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1149 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
1150 != (unsigned) group_size
)
1153 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1154 if (is_gimple_assign (stmt
)
1155 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
1156 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
1160 /* Complex operands can be swapped as following:
1161 real_c = real_b + real_a;
1162 imag_c = imag_a + imag_b;
1163 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1164 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1165 chains are mixed, they match the above pattern. */
1166 if (complex_numbers
)
1168 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1170 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
1176 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
1178 if (complex_numbers
!= 2)
1186 other_complex_node
= VEC_index (slp_tree
,
1187 SLP_INSTANCE_LOADS (slp_instn
), k
);
1188 other_node_first
= VEC_index (gimple
,
1189 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
1191 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1192 != other_node_first
)
1200 /* We checked that this case ok, so there is no need to proceed with
1201 permutation tests. */
1202 if (complex_numbers
== 2)
1204 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
1205 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1209 node
= SLP_INSTANCE_TREE (slp_instn
);
1210 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1211 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1212 instance, not all the loads belong to the same node or interleaving
1213 group. Hence, we need to divide them into groups according to
1215 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
1217 /* Reduction (there are no data-refs in the root).
1218 In reduction chain the order of the loads is important. */
1219 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1220 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1222 int first_group_load_index
;
1224 /* Compare all the permutation sequences to the first one. */
1225 for (i
= 1; i
< number_of_groups
; i
++)
1228 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
1230 next
= VEC_index (int, load_permutation
, j
);
1231 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1233 if (next
!= first_group_load_index
)
1235 bad_permutation
= true;
1242 if (bad_permutation
)
1246 if (!bad_permutation
)
1248 /* Check that the loads in the first sequence are different and there
1249 are no gaps between them. */
1250 load_index
= sbitmap_alloc (group_size
);
1251 sbitmap_zero (load_index
);
1252 for (k
= 0; k
< group_size
; k
++)
1254 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1255 if (TEST_BIT (load_index
, first_group_load_index
))
1257 bad_permutation
= true;
1261 SET_BIT (load_index
, first_group_load_index
);
1264 if (!bad_permutation
)
1265 for (k
= 0; k
< group_size
; k
++)
1266 if (!TEST_BIT (load_index
, k
))
1268 bad_permutation
= true;
1272 sbitmap_free (load_index
);
1275 if (!bad_permutation
)
1277 /* This permutation is valid for reduction. Since the order of the
1278 statements in the nodes is not important unless they are memory
1279 accesses, we can rearrange the statements in all the nodes
1280 according to the order of the loads. */
1281 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1283 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1288 /* In basic block vectorization we allow any subchain of an interleaving
1290 FORNOW: not supported in loop SLP because of realignment compications. */
1291 bb_vinfo
= STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
));
1292 bad_permutation
= false;
1293 /* Check that for every node in the instance teh loads form a subchain. */
1296 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1300 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1303 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
));
1305 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
)))
1307 bad_permutation
= true;
1311 if (j
!= 0 && next_load
!= load
)
1313 bad_permutation
= true;
1317 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1320 if (bad_permutation
)
1324 /* Check that the alignment of the first load in every subchain, i.e.,
1325 the first statement in every load node, is supported. */
1326 if (!bad_permutation
)
1328 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1330 first_load
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1332 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1334 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1335 if (vect_supportable_dr_alignment (dr
, false)
1336 == dr_unaligned_unsupported
)
1338 if (vect_print_dump_info (REPORT_SLP
))
1340 fprintf (vect_dump
, "unsupported unaligned load ");
1341 print_gimple_stmt (vect_dump
, first_load
, 0,
1344 bad_permutation
= true;
1350 if (!bad_permutation
)
1352 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1358 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1359 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1360 well (unless it's reduction). */
1361 if (VEC_length (int, load_permutation
)
1362 != (unsigned int) (group_size
* group_size
))
1366 load_index
= sbitmap_alloc (group_size
);
1367 sbitmap_zero (load_index
);
1368 for (j
= 0; j
< group_size
; j
++)
1370 for (i
= j
* group_size
, k
= 0;
1371 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1374 if (i
!= j
* group_size
&& next
!= prev
)
1383 if (TEST_BIT (load_index
, prev
))
1389 SET_BIT (load_index
, prev
);
1392 for (j
= 0; j
< group_size
; j
++)
1393 if (!TEST_BIT (load_index
, j
))
1396 sbitmap_free (load_index
);
1398 if (supported
&& i
== group_size
* group_size
1399 && vect_supported_slp_permutation_p (slp_instn
))
1406 /* Find the first load in the loop that belongs to INSTANCE.
1407 When loads are in several SLP nodes, there can be a case in which the first
1408 load does not appear in the first SLP node to be transformed, causing
1409 incorrect order of statements. Since we generate all the loads together,
1410 they must be inserted before the first load of the SLP instance and not
1411 before the first load of the first node of the instance. */
1414 vect_find_first_load_in_slp_instance (slp_instance instance
)
1418 gimple first_load
= NULL
, load
;
1420 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1421 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1422 first_load
= get_earlier_stmt (load
, first_load
);
1428 /* Find the last store in SLP INSTANCE. */
1431 vect_find_last_store_in_slp_instance (slp_instance instance
)
1435 gimple last_store
= NULL
, store
;
1437 node
= SLP_INSTANCE_TREE (instance
);
1439 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1441 last_store
= get_later_stmt (store
, last_store
);
1447 /* Analyze an SLP instance starting from a group of strided stores. Call
1448 vect_build_slp_tree to build a tree of packed stmts if possible.
1449 Return FALSE if it's impossible to SLP any stmt in the loop. */
1452 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1455 slp_instance new_instance
;
1457 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1458 unsigned int unrolling_factor
= 1, nunits
;
1459 tree vectype
, scalar_type
= NULL_TREE
;
1461 unsigned int vectorization_factor
= 0;
1462 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1463 unsigned int max_nunits
= 0;
1464 VEC (int, heap
) *load_permutation
;
1465 VEC (slp_tree
, heap
) *loads
;
1466 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1467 bool loads_permuted
= false;
1468 VEC (gimple
, heap
) *scalar_stmts
;
1470 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1474 scalar_type
= TREE_TYPE (DR_REF (dr
));
1475 vectype
= get_vectype_for_scalar_type (scalar_type
);
1479 gcc_assert (loop_vinfo
);
1480 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1483 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1487 gcc_assert (loop_vinfo
);
1488 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1489 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1494 if (vect_print_dump_info (REPORT_SLP
))
1496 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1497 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1503 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1505 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1507 vectorization_factor
= nunits
;
1509 /* Calculate the unrolling factor. */
1510 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1511 if (unrolling_factor
!= 1 && !loop_vinfo
)
1513 if (vect_print_dump_info (REPORT_SLP
))
1514 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1520 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1521 scalar_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1523 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1525 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1528 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1529 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1530 VEC_safe_push (gimple
, heap
, scalar_stmts
,
1531 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1533 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1534 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1539 /* Collect reduction statements. */
1540 VEC (gimple
, heap
) *reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1541 for (i
= 0; VEC_iterate (gimple
, reductions
, i
, next
); i
++)
1542 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1545 node
= vect_create_new_slp_node (scalar_stmts
);
1547 /* Calculate the number of vector stmts to create based on the unrolling
1548 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1549 GROUP_SIZE / NUNITS otherwise. */
1550 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1552 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1553 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1555 /* Build the tree for the SLP instance. */
1556 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1557 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1558 &max_nunits
, &load_permutation
, &loads
,
1559 vectorization_factor
, &loads_permuted
))
1561 /* Calculate the unrolling factor based on the smallest type. */
1562 if (max_nunits
> nunits
)
1563 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1566 if (unrolling_factor
!= 1 && !loop_vinfo
)
1568 if (vect_print_dump_info (REPORT_SLP
))
1569 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1574 /* Create a new SLP instance. */
1575 new_instance
= XNEW (struct _slp_instance
);
1576 SLP_INSTANCE_TREE (new_instance
) = node
;
1577 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1578 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1579 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1580 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1581 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1582 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1583 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1587 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1590 if (vect_print_dump_info (REPORT_SLP
))
1592 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1594 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1597 vect_free_slp_instance (new_instance
);
1601 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1602 = vect_find_first_load_in_slp_instance (new_instance
);
1605 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1608 VEC_safe_push (slp_instance
, heap
,
1609 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1612 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1615 if (vect_print_dump_info (REPORT_SLP
))
1616 vect_print_slp_tree (node
);
1621 /* Failed to SLP. */
1622 /* Free the allocated memory. */
1623 vect_free_slp_tree (node
);
1624 VEC_free (int, heap
, load_permutation
);
1625 VEC_free (slp_tree
, heap
, loads
);
1631 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1632 trees of packed scalar stmts if SLP is possible. */
1635 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1638 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1639 gimple first_element
;
1642 if (vect_print_dump_info (REPORT_SLP
))
1643 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1647 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1648 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1649 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1652 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1654 /* Find SLP sequences starting from groups of strided stores. */
1655 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1656 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1659 if (bb_vinfo
&& !ok
)
1661 if (vect_print_dump_info (REPORT_SLP
))
1662 fprintf (vect_dump
, "Failed to SLP the basic block.");
1668 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1670 /* Find SLP sequences starting from reduction chains. */
1671 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1672 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1677 /* Don't try to vectorize SLP reductions if reduction chain was
1682 /* Find SLP sequences starting from groups of reductions. */
1683 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1684 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1685 VEC_index (gimple
, reductions
, 0)))
1692 /* For each possible SLP instance decide whether to SLP it and calculate overall
1693 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1694 least one instance. */
1697 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1699 unsigned int i
, unrolling_factor
= 1;
1700 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1701 slp_instance instance
;
1702 int decided_to_slp
= 0;
1704 if (vect_print_dump_info (REPORT_SLP
))
1705 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1707 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1709 /* FORNOW: SLP if you can. */
1710 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1711 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1713 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1714 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1715 loop-based vectorization. Such stmts will be marked as HYBRID. */
1716 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1720 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1722 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1723 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1724 decided_to_slp
, unrolling_factor
);
1726 return (decided_to_slp
> 0);
1730 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1731 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1734 vect_detect_hybrid_slp_stmts (slp_tree node
)
1737 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (node
);
1738 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1739 imm_use_iterator imm_iter
;
1741 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1743 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1744 struct loop
*loop
= NULL
;
1745 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1746 basic_block bb
= NULL
;
1752 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1754 bb
= BB_VINFO_BB (bb_vinfo
);
1756 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1757 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1758 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1759 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1760 if (gimple_bb (use_stmt
)
1761 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1762 || bb
== gimple_bb (use_stmt
))
1763 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1764 && !STMT_SLP_TYPE (stmt_vinfo
)
1765 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1766 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1767 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1768 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1769 == vect_reduction_def
))
1770 vect_mark_slp_stmts (node
, hybrid
, i
);
1772 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1773 vect_detect_hybrid_slp_stmts ((slp_tree
) child
);
1777 /* Find stmts that must be both vectorized and SLPed. */
1780 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1783 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1784 slp_instance instance
;
1786 if (vect_print_dump_info (REPORT_SLP
))
1787 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1789 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1790 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1794 /* Create and initialize a new bb_vec_info struct for BB, as well as
1795 stmt_vec_info structs for all the stmts in it. */
1798 new_bb_vec_info (basic_block bb
)
1800 bb_vec_info res
= NULL
;
1801 gimple_stmt_iterator gsi
;
1803 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1804 BB_VINFO_BB (res
) = bb
;
1806 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1808 gimple stmt
= gsi_stmt (gsi
);
1809 gimple_set_uid (stmt
, 0);
1810 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1813 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1814 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1821 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1822 stmts in the basic block. */
1825 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1828 gimple_stmt_iterator si
;
1833 bb
= BB_VINFO_BB (bb_vinfo
);
1835 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1837 gimple stmt
= gsi_stmt (si
);
1838 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1841 /* Free stmt_vec_info. */
1842 free_stmt_vec_info (stmt
);
1845 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1846 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1847 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1848 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1854 /* Analyze statements contained in SLP tree node after recursively analyzing
1855 the subtree. Return TRUE if the operations are supported. */
1858 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1868 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1869 if (!vect_slp_analyze_node_operations (bb_vinfo
, (slp_tree
) child
))
1872 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1874 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1875 gcc_assert (stmt_info
);
1876 gcc_assert (PURE_SLP_STMT (stmt_info
));
1878 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1886 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1887 operations are supported. */
1890 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1892 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1893 slp_instance instance
;
1896 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1898 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1899 SLP_INSTANCE_TREE (instance
)))
1901 vect_free_slp_instance (instance
);
1902 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1908 if (!VEC_length (slp_instance
, slp_instances
))
1914 /* Check if vectorization of the basic block is profitable. */
1917 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1919 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1920 slp_instance instance
;
1922 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1923 unsigned int stmt_cost
;
1925 gimple_stmt_iterator si
;
1926 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1927 stmt_vec_info stmt_info
= NULL
;
1928 tree dummy_type
= NULL
;
1931 /* Calculate vector costs. */
1932 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1934 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1935 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1938 /* Calculate scalar cost. */
1939 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1941 stmt
= gsi_stmt (si
);
1942 stmt_info
= vinfo_for_stmt (stmt
);
1944 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1945 || !PURE_SLP_STMT (stmt_info
))
1948 if (STMT_VINFO_DATA_REF (stmt_info
))
1950 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1951 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1952 (scalar_load
, dummy_type
, dummy
);
1954 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1955 (scalar_store
, dummy_type
, dummy
);
1958 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1959 (scalar_stmt
, dummy_type
, dummy
);
1961 scalar_cost
+= stmt_cost
;
1964 if (vect_print_dump_info (REPORT_COST
))
1966 fprintf (vect_dump
, "Cost model analysis: \n");
1967 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1969 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1971 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1974 /* Vectorization is profitable if its cost is less than the cost of scalar
1976 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1982 /* Check if the basic block can be vectorized. */
1985 vect_slp_analyze_bb_1 (basic_block bb
)
1987 bb_vec_info bb_vinfo
;
1988 VEC (ddr_p
, heap
) *ddrs
;
1989 VEC (slp_instance
, heap
) *slp_instances
;
1990 slp_instance instance
;
1993 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1995 bb_vinfo
= new_bb_vec_info (bb
);
1999 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
2001 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2002 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
2005 destroy_bb_vec_info (bb_vinfo
);
2009 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
2010 if (!VEC_length (ddr_p
, ddrs
))
2012 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2013 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
2016 destroy_bb_vec_info (bb_vinfo
);
2020 vect_pattern_recog (NULL
, bb_vinfo
);
2022 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
2025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2026 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
2027 "in basic block.\n");
2029 destroy_bb_vec_info (bb_vinfo
);
2033 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2035 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2036 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
2039 destroy_bb_vec_info (bb_vinfo
);
2043 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2045 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2046 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
2049 destroy_bb_vec_info (bb_vinfo
);
2053 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2056 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
2059 destroy_bb_vec_info (bb_vinfo
);
2063 /* Check the SLP opportunities in the basic block, analyze and build SLP
2065 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2068 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
2069 "in basic block.\n");
2071 destroy_bb_vec_info (bb_vinfo
);
2075 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2077 /* Mark all the statements that we want to vectorize as pure SLP and
2079 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2081 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2082 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2085 if (!vect_slp_analyze_operations (bb_vinfo
))
2087 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2088 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
2090 destroy_bb_vec_info (bb_vinfo
);
2094 /* Cost model: check if the vectorization is worthwhile. */
2095 if (flag_vect_cost_model
2096 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2098 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2099 fprintf (vect_dump
, "not vectorized: vectorization is not "
2102 destroy_bb_vec_info (bb_vinfo
);
2106 if (vect_print_dump_info (REPORT_DETAILS
))
2107 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
2114 vect_slp_analyze_bb (basic_block bb
)
2116 bb_vec_info bb_vinfo
;
2118 gimple_stmt_iterator gsi
;
2119 unsigned int vector_sizes
;
2121 if (vect_print_dump_info (REPORT_DETAILS
))
2122 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
2124 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2126 gimple stmt
= gsi_stmt (gsi
);
2127 if (!is_gimple_debug (stmt
)
2128 && !gimple_nop_p (stmt
)
2129 && gimple_code (stmt
) != GIMPLE_LABEL
)
2133 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2135 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2136 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
2142 /* Autodetect first vector size we try. */
2143 current_vector_size
= 0;
2144 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2148 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2152 destroy_bb_vec_info (bb_vinfo
);
2154 vector_sizes
&= ~current_vector_size
;
2155 if (vector_sizes
== 0
2156 || current_vector_size
== 0)
2159 /* Try the next biggest vector size. */
2160 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2161 if (vect_print_dump_info (REPORT_DETAILS
))
2162 fprintf (vect_dump
, "***** Re-trying analysis with "
2163 "vector size %d\n", current_vector_size
);
2168 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2169 the number of created vector stmts depends on the unrolling factor).
2170 However, the actual number of vector stmts for every SLP node depends on
2171 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2172 should be updated. In this function we assume that the inside costs
2173 calculated in vect_model_xxx_cost are linear in ncopies. */
2176 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2178 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2179 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2180 slp_instance instance
;
2182 if (vect_print_dump_info (REPORT_SLP
))
2183 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
2185 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2186 /* We assume that costs are linear in ncopies. */
2187 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
2188 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2192 /* For constant and loop invariant defs of SLP_NODE this function returns
2193 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2194 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2195 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2196 REDUC_INDEX is the index of the reduction operand in the statements, unless
2200 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2201 VEC (tree
, heap
) **vec_oprnds
,
2202 unsigned int op_num
, unsigned int number_of_vectors
,
2205 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2206 gimple stmt
= VEC_index (gimple
, stmts
, 0);
2207 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2211 unsigned j
, number_of_places_left_in_vector
;
2214 int group_size
= VEC_length (gimple
, stmts
);
2215 unsigned int vec_num
, i
;
2216 unsigned number_of_copies
= 1;
2217 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
2218 bool constant_p
, is_store
;
2219 tree neutral_op
= NULL
;
2220 enum tree_code code
= gimple_expr_code (stmt
);
2224 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2225 && reduc_index
!= -1)
2227 op_num
= reduc_index
- 1;
2228 op
= gimple_op (stmt
, reduc_index
);
2229 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2230 we need either neutral operands or the original operands. See
2231 get_initial_def_for_reduction() for details. */
2234 case WIDEN_SUM_EXPR
:
2240 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2241 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2243 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2248 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2249 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2251 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2256 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2261 def_stmt
= SSA_NAME_DEF_STMT (op
);
2262 loop
= (gimple_bb (stmt
))->loop_father
;
2263 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2264 loop_preheader_edge (loop
));
2272 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2275 op
= gimple_assign_rhs1 (stmt
);
2282 if (CONSTANT_CLASS_P (op
))
2287 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2288 gcc_assert (vector_type
);
2289 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2291 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2292 created vectors. It is greater than 1 if unrolling is performed.
2294 For example, we have two scalar operands, s1 and s2 (e.g., group of
2295 strided accesses of size two), while NUNITS is four (i.e., four scalars
2296 of this type can be packed in a vector). The output vector will contain
2297 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2300 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2301 containing the operands.
2303 For example, NUNITS is four as before, and the group size is 8
2304 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2305 {s5, s6, s7, s8}. */
2307 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2309 number_of_places_left_in_vector
= nunits
;
2310 elts
= XALLOCAVEC (tree
, nunits
);
2311 for (j
= 0; j
< number_of_copies
; j
++)
2313 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
2316 op
= gimple_assign_rhs1 (stmt
);
2322 if (op_num
== 0 || op_num
== 1)
2324 tree cond
= gimple_assign_rhs1 (stmt
);
2325 op
= TREE_OPERAND (cond
, op_num
);
2330 op
= gimple_assign_rhs2 (stmt
);
2332 op
= gimple_assign_rhs3 (stmt
);
2337 op
= gimple_call_arg (stmt
, op_num
);
2341 op
= gimple_op (stmt
, op_num
+ 1);
2345 if (reduc_index
!= -1)
2347 loop
= (gimple_bb (stmt
))->loop_father
;
2348 def_stmt
= SSA_NAME_DEF_STMT (op
);
2352 /* Get the def before the loop. In reduction chain we have only
2353 one initial value. */
2354 if ((j
!= (number_of_copies
- 1)
2355 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2360 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2361 loop_preheader_edge (loop
));
2364 /* Create 'vect_ = {op0,op1,...,opn}'. */
2365 number_of_places_left_in_vector
--;
2367 && !types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2369 op
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
), op
);
2370 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2372 elts
[number_of_places_left_in_vector
] = op
;
2374 if (number_of_places_left_in_vector
== 0)
2376 number_of_places_left_in_vector
= nunits
;
2379 vec_cst
= build_vector (vector_type
, elts
);
2382 VEC(constructor_elt
,gc
) *v
;
2384 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
2385 for (k
= 0; k
< nunits
; ++k
)
2386 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2387 vec_cst
= build_constructor (vector_type
, v
);
2389 VEC_quick_push (tree
, voprnds
,
2390 vect_init_vector (stmt
, vec_cst
,
2391 vector_type
, NULL
));
2396 /* Since the vectors are created in the reverse order, we should invert
2398 vec_num
= VEC_length (tree
, voprnds
);
2399 for (j
= vec_num
; j
!= 0; j
--)
2401 vop
= VEC_index (tree
, voprnds
, j
- 1);
2402 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2405 VEC_free (tree
, heap
, voprnds
);
2407 /* In case that VF is greater than the unrolling factor needed for the SLP
2408 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2409 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2410 to replicate the vectors. */
2411 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2413 tree neutral_vec
= NULL
;
2418 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2420 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2424 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2425 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2431 /* Get vectorized definitions from SLP_NODE that contains corresponding
2432 vectorized def-stmts. */
2435 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2438 gimple vec_def_stmt
;
2441 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2443 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2445 gcc_assert (vec_def_stmt
);
2446 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2447 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2452 /* Get vectorized definitions for SLP_NODE.
2453 If the scalar definitions are loop invariants or constants, collect them and
2454 call vect_get_constant_vectors() to create vector stmts.
2455 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2456 must be stored in the corresponding child of SLP_NODE, and we call
2457 vect_get_slp_vect_defs () to retrieve them. */
2460 vect_get_slp_defs (VEC (tree
, heap
) *ops
, slp_tree slp_node
,
2461 VEC (slp_void_p
, heap
) **vec_oprnds
, int reduc_index
)
2463 gimple first_stmt
, first_def
;
2464 int number_of_vects
= 0, i
;
2465 unsigned int child_index
= 0;
2466 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2467 slp_tree child
= NULL
;
2468 VEC (tree
, heap
) *vec_defs
;
2469 tree oprnd
, def_lhs
;
2470 bool vectorized_defs
;
2472 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2473 FOR_EACH_VEC_ELT (tree
, ops
, i
, oprnd
)
2475 /* For each operand we check if it has vectorized definitions in a child
2476 node or we need to create them (for invariants and constants). We
2477 check if the LHS of the first stmt of the next child matches OPRND.
2478 If it does, we found the correct child. Otherwise, we call
2479 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2480 to check this child node for the next operand. */
2481 vectorized_defs
= false;
2482 if (VEC_length (slp_void_p
, SLP_TREE_CHILDREN (slp_node
)) > child_index
)
2484 child
= (slp_tree
) VEC_index (slp_void_p
,
2485 SLP_TREE_CHILDREN (slp_node
),
2487 first_def
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (child
), 0);
2489 /* In the end of a pattern sequence we have a use of the original stmt,
2490 so we need to compare OPRND with the original def. */
2491 if (is_pattern_stmt_p (vinfo_for_stmt (first_def
))
2492 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt
))
2493 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt
)))
2494 first_def
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2496 if (is_gimple_call (first_def
))
2497 def_lhs
= gimple_call_lhs (first_def
);
2499 def_lhs
= gimple_assign_lhs (first_def
);
2501 if (operand_equal_p (oprnd
, def_lhs
, 0))
2503 /* The number of vector defs is determined by the number of
2504 vector statements in the node from which we get those
2506 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2507 vectorized_defs
= true;
2512 if (!vectorized_defs
)
2516 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2517 /* Number of vector stmts was calculated according to LHS in
2518 vect_schedule_slp_instance (), fix it by replacing LHS with
2519 RHS, if necessary. See vect_get_smallest_scalar_type () for
2521 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2523 if (rhs_size_unit
!= lhs_size_unit
)
2525 number_of_vects
*= rhs_size_unit
;
2526 number_of_vects
/= lhs_size_unit
;
2531 /* Allocate memory for vectorized defs. */
2532 vec_defs
= VEC_alloc (tree
, heap
, number_of_vects
);
2534 /* For reduction defs we call vect_get_constant_vectors (), since we are
2535 looking for initial loop invariant values. */
2536 if (vectorized_defs
&& reduc_index
== -1)
2537 /* The defs are already vectorized. */
2538 vect_get_slp_vect_defs (child
, &vec_defs
);
2540 /* Build vectors from scalar defs. */
2541 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2542 number_of_vects
, reduc_index
);
2544 VEC_quick_push (slp_void_p
, *vec_oprnds
, (slp_void_p
) vec_defs
);
2546 /* For reductions, we only need initial values. */
2547 if (reduc_index
!= -1)
2553 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2554 building a vector of type MASK_TYPE from it) and two input vectors placed in
2555 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2556 shifting by STRIDE elements of DR_CHAIN for every copy.
2557 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2559 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2560 the created stmts must be inserted. */
2563 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2564 tree mask
, int first_vec_indx
, int second_vec_indx
,
2565 gimple_stmt_iterator
*gsi
, slp_tree node
,
2566 tree vectype
, VEC(tree
,heap
) *dr_chain
,
2567 int ncopies
, int vect_stmts_counter
)
2570 gimple perm_stmt
= NULL
;
2571 stmt_vec_info next_stmt_info
;
2573 tree first_vec
, second_vec
, data_ref
;
2575 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2577 /* Initialize the vect stmts of NODE to properly insert the generated
2579 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2580 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2581 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2583 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2584 for (i
= 0; i
< ncopies
; i
++)
2586 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2587 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2589 /* Generate the permute statement. */
2590 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
2591 first_vec
, second_vec
, mask
);
2592 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2593 gimple_set_lhs (perm_stmt
, data_ref
);
2594 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2596 /* Store the vector statement in NODE. */
2597 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2598 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2600 first_vec_indx
+= stride
;
2601 second_vec_indx
+= stride
;
2604 /* Mark the scalar stmt as vectorized. */
2605 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2606 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2610 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2611 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2612 representation. Check that the mask is valid and return FALSE if not.
2613 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2614 the next vector, i.e., the current first vector is not needed. */
2617 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2618 int mask_nunits
, bool only_one_vec
, int index
,
2619 unsigned char *mask
, int *current_mask_element
,
2620 bool *need_next_vector
, int *number_of_mask_fixes
,
2621 bool *mask_fixed
, bool *needs_first_vector
)
2625 /* Convert to target specific representation. */
2626 *current_mask_element
= first_mask_element
+ m
;
2627 /* Adjust the value in case it's a mask for second and third vectors. */
2628 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2630 if (*current_mask_element
< mask_nunits
)
2631 *needs_first_vector
= true;
2633 /* We have only one input vector to permute but the mask accesses values in
2634 the next vector as well. */
2635 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2637 if (vect_print_dump_info (REPORT_DETAILS
))
2639 fprintf (vect_dump
, "permutation requires at least two vectors ");
2640 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2646 /* The mask requires the next vector. */
2647 if (*current_mask_element
>= mask_nunits
* 2)
2649 if (*needs_first_vector
|| *mask_fixed
)
2651 /* We either need the first vector too or have already moved to the
2652 next vector. In both cases, this permutation needs three
2654 if (vect_print_dump_info (REPORT_DETAILS
))
2656 fprintf (vect_dump
, "permutation requires at "
2657 "least three vectors ");
2658 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2664 /* We move to the next vector, dropping the first one and working with
2665 the second and the third - we need to adjust the values of the mask
2667 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2669 for (i
= 0; i
< index
; i
++)
2670 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2672 (*number_of_mask_fixes
)++;
2676 *need_next_vector
= *mask_fixed
;
2678 /* This was the last element of this mask. Start a new one. */
2679 if (index
== mask_nunits
- 1)
2681 *number_of_mask_fixes
= 1;
2682 *mask_fixed
= false;
2683 *needs_first_vector
= false;
2690 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2691 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2692 permute statements for SLP_NODE_INSTANCE. */
2694 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2695 gimple_stmt_iterator
*gsi
, int vf
,
2696 slp_instance slp_node_instance
, bool analyze_only
)
2698 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2699 tree mask_element_type
= NULL_TREE
, mask_type
;
2700 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2702 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2703 gimple next_scalar_stmt
;
2704 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2705 int first_mask_element
;
2706 int index
, unroll_factor
, current_mask_element
, ncopies
;
2707 unsigned char *mask
;
2708 bool only_one_vec
= false, need_next_vector
= false;
2709 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2710 int number_of_mask_fixes
= 1;
2711 bool mask_fixed
= false;
2712 bool needs_first_vector
= false;
2713 enum machine_mode mode
;
2715 mode
= TYPE_MODE (vectype
);
2717 if (!can_vec_perm_p (mode
, false, NULL
))
2719 if (vect_print_dump_info (REPORT_DETAILS
))
2721 fprintf (vect_dump
, "no vect permute for ");
2722 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2727 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2728 same size as the vector element being permuted. */
2729 mask_element_type
= lang_hooks
.types
.type_for_mode
2730 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
2731 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2732 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2733 mask
= XALLOCAVEC (unsigned char, nunits
);
2734 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2736 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2737 unrolling factor. */
2738 orig_vec_stmts_num
= group_size
*
2739 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2740 if (orig_vec_stmts_num
== 1)
2741 only_one_vec
= true;
2743 /* Number of copies is determined by the final vectorization factor
2744 relatively to SLP_NODE_INSTANCE unrolling factor. */
2745 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2747 /* Generate permutation masks for every NODE. Number of masks for each NODE
2748 is equal to GROUP_SIZE.
2749 E.g., we have a group of three nodes with three loads from the same
2750 location in each node, and the vector size is 4. I.e., we have a
2751 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2752 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2753 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2756 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2757 The last mask is illegal since we assume two operands for permute
2758 operation, and the mask element values can't be outside that range.
2759 Hence, the last mask must be converted into {2,5,5,5}.
2760 For the first two permutations we need the first and the second input
2761 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2762 we need the second and the third vectors: {b1,c1,a2,b2} and
2765 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2769 vect_stmts_counter
= 0;
2771 first_vec_index
= vec_index
++;
2773 second_vec_index
= first_vec_index
;
2775 second_vec_index
= vec_index
++;
2777 for (j
= 0; j
< unroll_factor
; j
++)
2779 for (k
= 0; k
< group_size
; k
++)
2781 first_mask_element
= i
+ j
* group_size
;
2782 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2783 nunits
, only_one_vec
, index
,
2784 mask
, ¤t_mask_element
,
2786 &number_of_mask_fixes
, &mask_fixed
,
2787 &needs_first_vector
))
2789 mask
[index
++] = current_mask_element
;
2791 if (index
== nunits
)
2793 tree mask_vec
, *mask_elts
;
2796 if (!can_vec_perm_p (mode
, false, mask
))
2798 if (vect_print_dump_info (REPORT_DETAILS
))
2800 fprintf (vect_dump
, "unsupported vect permute { ");
2801 for (i
= 0; i
< nunits
; ++i
)
2802 fprintf (vect_dump
, "%d ", mask
[i
]);
2803 fprintf (vect_dump
, "}\n");
2808 mask_elts
= XALLOCAVEC (tree
, nunits
);
2809 for (l
= 0; l
< nunits
; ++l
)
2810 mask_elts
[l
] = build_int_cst (mask_element_type
, mask
[l
]);
2811 mask_vec
= build_vector (mask_type
, mask_elts
);
2816 if (need_next_vector
)
2818 first_vec_index
= second_vec_index
;
2819 second_vec_index
= vec_index
;
2822 next_scalar_stmt
= VEC_index (gimple
,
2823 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2825 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2826 mask_vec
, first_vec_index
, second_vec_index
,
2827 gsi
, node
, vectype
, dr_chain
,
2828 ncopies
, vect_stmts_counter
++);
2840 /* Vectorize SLP instance tree in postorder. */
2843 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2844 unsigned int vectorization_factor
)
2847 bool strided_store
, is_store
;
2848 gimple_stmt_iterator si
;
2849 stmt_vec_info stmt_info
;
2850 unsigned int vec_stmts_size
, nunits
, group_size
;
2853 slp_tree loads_node
;
2859 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2860 vect_schedule_slp_instance ((slp_tree
) child
, instance
,
2861 vectorization_factor
);
2863 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2864 stmt_info
= vinfo_for_stmt (stmt
);
2866 /* VECTYPE is the type of the destination. */
2867 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2868 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2869 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2871 /* For each SLP instance calculate number of vector stmts to be created
2872 for the scalar stmts in each node of the SLP tree. Number of vector
2873 elements in one vector iteration is the number of scalar elements in
2874 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2876 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2878 /* In case of load permutation we have to allocate vectorized statements for
2879 all the nodes that participate in that permutation. */
2880 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2882 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2884 if (!SLP_TREE_VEC_STMTS (loads_node
))
2886 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2888 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2893 if (!SLP_TREE_VEC_STMTS (node
))
2895 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2896 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2899 if (vect_print_dump_info (REPORT_DETAILS
))
2901 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2902 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2905 /* Loads should be inserted before the first load. */
2906 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2907 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2908 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
2909 && SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2910 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2911 else if (is_pattern_stmt_p (stmt_info
))
2912 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2914 si
= gsi_for_stmt (stmt
);
2916 /* Stores should be inserted just before the last store. */
2917 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2918 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2920 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2921 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
2922 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
2923 si
= gsi_for_stmt (last_store
);
2926 /* Mark the first element of the reduction chain as reduction to properly
2927 transform the node. In the analysis phase only the last element of the
2928 chain is marked as reduction. */
2929 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2930 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2932 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2933 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2936 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2940 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2941 For loop vectorization this is done in vectorizable_call, but for SLP
2942 it needs to be deferred until end of vect_schedule_slp, because multiple
2943 SLP instances may refer to the same scalar stmt. */
2946 vect_remove_slp_scalar_calls (slp_tree node
)
2948 gimple stmt
, new_stmt
;
2949 gimple_stmt_iterator gsi
;
2953 stmt_vec_info stmt_info
;
2958 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2959 vect_remove_slp_scalar_calls ((slp_tree
) child
);
2961 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2963 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
2965 stmt_info
= vinfo_for_stmt (stmt
);
2966 if (stmt_info
== NULL
2967 || is_pattern_stmt_p (stmt_info
)
2968 || !PURE_SLP_STMT (stmt_info
))
2970 lhs
= gimple_call_lhs (stmt
);
2971 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2972 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2973 set_vinfo_for_stmt (stmt
, NULL
);
2974 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2975 gsi
= gsi_for_stmt (stmt
);
2976 gsi_replace (&gsi
, new_stmt
, false);
2977 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2981 /* Generate vector code for all SLP instances in the loop/basic block. */
2984 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2986 VEC (slp_instance
, heap
) *slp_instances
;
2987 slp_instance instance
;
2989 bool is_store
= false;
2993 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2994 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2998 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3002 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
3004 /* Schedule the tree of INSTANCE. */
3005 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3007 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
3008 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3009 fprintf (vect_dump
, "vectorizing stmts using SLP.");
3012 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
3014 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3017 gimple_stmt_iterator gsi
;
3019 vect_remove_slp_scalar_calls (root
);
3021 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
3022 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3024 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3027 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3028 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3029 /* Free the attached stmt_vec_info and remove the stmt. */
3030 gsi
= gsi_for_stmt (store
);
3031 gsi_remove (&gsi
, true);
3032 free_stmt_vec_info (store
);
3040 /* Vectorize the basic block. */
3043 vect_slp_transform_bb (basic_block bb
)
3045 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3046 gimple_stmt_iterator si
;
3048 gcc_assert (bb_vinfo
);
3050 if (vect_print_dump_info (REPORT_DETAILS
))
3051 fprintf (vect_dump
, "SLPing BB\n");
3053 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3055 gimple stmt
= gsi_stmt (si
);
3056 stmt_vec_info stmt_info
;
3058 if (vect_print_dump_info (REPORT_DETAILS
))
3060 fprintf (vect_dump
, "------>SLPing statement: ");
3061 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3064 stmt_info
= vinfo_for_stmt (stmt
);
3065 gcc_assert (stmt_info
);
3067 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3068 if (STMT_SLP_TYPE (stmt_info
))
3070 vect_schedule_slp (NULL
, bb_vinfo
);
3075 mark_sym_for_renaming (gimple_vop (cfun
));
3076 /* The memory tags and pointers in vectorized statements need to
3077 have their SSA forms updated. FIXME, why can't this be delayed
3078 until all the loops have been transformed? */
3079 update_ssa (TODO_update_ssa
);
3081 if (vect_print_dump_info (REPORT_DETAILS
))
3082 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
3084 destroy_bb_vec_info (bb_vinfo
);