1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb
)
50 gimple_stmt_iterator si
;
55 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
58 if (gimple_location (stmt
) != UNKNOWN_LOC
)
59 return gimple_location (stmt
);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node
)
77 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
78 vect_free_slp_tree ((slp_tree
) child
);
80 VEC_free (slp_void_p
, heap
, SLP_TREE_CHILDREN (node
));
81 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
83 if (SLP_TREE_VEC_STMTS (node
))
84 VEC_free (gimple
, heap
, SLP_TREE_VEC_STMTS (node
));
90 /* Free the memory allocated for the SLP instance. */
93 vect_free_slp_instance (slp_instance instance
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
96 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (instance
));
97 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (VEC (gimple
, heap
) *scalar_stmts
)
107 gimple stmt
= VEC_index (gimple
, scalar_stmts
, 0);
110 if (is_gimple_call (stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (is_gimple_assign (stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
121 node
= XNEW (struct _slp_tree
);
122 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
123 SLP_TREE_VEC_STMTS (node
) = NULL
;
124 SLP_TREE_CHILDREN (node
) = VEC_alloc (slp_void_p
, heap
, nops
);
125 SLP_TREE_OUTSIDE_OF_LOOP_COST (node
) = 0;
126 SLP_TREE_INSIDE_OF_LOOP_COST (node
) = 0;
132 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 static VEC (slp_oprnd_info
, heap
) *
135 vect_create_oprnd_info (int nops
, int group_size
)
138 slp_oprnd_info oprnd_info
;
139 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
141 oprnds_info
= VEC_alloc (slp_oprnd_info
, heap
, nops
);
142 for (i
= 0; i
< nops
; i
++)
144 oprnd_info
= XNEW (struct _slp_oprnd_info
);
145 oprnd_info
->def_stmts
= VEC_alloc (gimple
, heap
, group_size
);
146 oprnd_info
->first_dt
= vect_uninitialized_def
;
147 oprnd_info
->first_def_type
= NULL_TREE
;
148 oprnd_info
->first_const_oprnd
= NULL_TREE
;
149 oprnd_info
->first_pattern
= false;
150 VEC_quick_push (slp_oprnd_info
, oprnds_info
, oprnd_info
);
157 /* Free operands info. */
160 vect_free_oprnd_info (VEC (slp_oprnd_info
, heap
) **oprnds_info
)
163 slp_oprnd_info oprnd_info
;
165 FOR_EACH_VEC_ELT (slp_oprnd_info
, *oprnds_info
, i
, oprnd_info
)
167 VEC_free (gimple
, heap
, oprnd_info
->def_stmts
);
168 XDELETE (oprnd_info
);
171 VEC_free (slp_oprnd_info
, heap
, *oprnds_info
);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
181 slp_tree slp_node
, gimple stmt
,
182 int ncopies_for_cost
, bool first
,
183 VEC (slp_oprnd_info
, heap
) **oprnds_info
)
186 unsigned int i
, number_of_oprnds
;
187 tree def
, def_op0
= NULL_TREE
;
189 enum vect_def_type dt
= vect_uninitialized_def
;
190 enum vect_def_type dt_op0
= vect_uninitialized_def
;
191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
192 tree lhs
= gimple_get_lhs (stmt
);
193 struct loop
*loop
= NULL
;
194 enum tree_code rhs_code
;
195 bool different_types
= false;
196 bool pattern
= false;
197 slp_oprnd_info oprnd_info
, oprnd0_info
, oprnd1_info
;
199 tree compare_rhs
= NULL_TREE
;
202 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
204 if (is_gimple_call (stmt
))
206 number_of_oprnds
= gimple_call_num_args (stmt
);
209 else if (is_gimple_assign (stmt
))
211 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
212 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
218 for (i
= 0; i
< number_of_oprnds
; i
++)
223 compare_rhs
= NULL_TREE
;
226 oprnd
= gimple_op (stmt
, op_idx
++);
228 oprnd_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, i
);
230 if (COMPARISON_CLASS_P (oprnd
))
232 compare_rhs
= TREE_OPERAND (oprnd
, 1);
233 oprnd
= TREE_OPERAND (oprnd
, 0);
236 if (!vect_is_simple_use (oprnd
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
238 || (!def_stmt
&& dt
!= vect_constant_def
))
240 if (vect_print_dump_info (REPORT_SLP
))
242 fprintf (vect_dump
, "Build SLP failed: can't find def for ");
243 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
249 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
250 from the pattern. Check that all the stmts of the node are in the
252 if (loop
&& def_stmt
&& gimple_bb (def_stmt
)
253 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
254 && vinfo_for_stmt (def_stmt
)
255 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
256 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
257 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
260 if (!first
&& !oprnd_info
->first_pattern
)
262 if (vect_print_dump_info (REPORT_DETAILS
))
264 fprintf (vect_dump
, "Build SLP failed: some of the stmts"
265 " are in a pattern, and others are not ");
266 print_generic_expr (vect_dump
, oprnd
, TDF_SLIM
);
272 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
273 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
275 if (dt
== vect_unknown_def_type
)
277 if (vect_print_dump_info (REPORT_DETAILS
))
278 fprintf (vect_dump
, "Unsupported pattern.");
282 switch (gimple_code (def_stmt
))
285 def
= gimple_phi_result (def_stmt
);
289 def
= gimple_assign_lhs (def_stmt
);
293 if (vect_print_dump_info (REPORT_DETAILS
))
294 fprintf (vect_dump
, "unsupported defining stmt: ");
301 oprnd_info
->first_dt
= dt
;
302 oprnd_info
->first_pattern
= pattern
;
305 oprnd_info
->first_def_type
= TREE_TYPE (def
);
306 oprnd_info
->first_const_oprnd
= NULL_TREE
;
310 oprnd_info
->first_def_type
= NULL_TREE
;
311 oprnd_info
->first_const_oprnd
= oprnd
;
318 /* Analyze costs (for the first stmt of the group only). */
319 if (REFERENCE_CLASS_P (lhs
))
321 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
324 /* Not memory operation (we don't call this function for
326 vect_model_simple_cost (stmt_info
, ncopies_for_cost
, &dt
,
332 /* Not first stmt of the group, check that the def-stmt/s match
333 the def-stmt/s of the first stmt. Allow different definition
334 types for reduction chains: the first stmt must be a
335 vect_reduction_def (a phi node), and the rest
336 vect_internal_def. */
337 if (((oprnd_info
->first_dt
!= dt
338 && !(oprnd_info
->first_dt
== vect_reduction_def
339 && dt
== vect_internal_def
))
340 || (oprnd_info
->first_def_type
!= NULL_TREE
342 && !types_compatible_p (oprnd_info
->first_def_type
,
345 && !types_compatible_p (TREE_TYPE (oprnd_info
->first_const_oprnd
),
349 if (number_of_oprnds
!= 2)
351 if (vect_print_dump_info (REPORT_SLP
))
352 fprintf (vect_dump
, "Build SLP failed: different types ");
357 /* Try to swap operands in case of binary operation. */
359 different_types
= true;
362 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
363 if (is_gimple_assign (stmt
)
364 && (rhs_code
= gimple_assign_rhs_code (stmt
))
365 && TREE_CODE_CLASS (rhs_code
) == tcc_binary
366 && commutative_tree_code (rhs_code
)
367 && oprnd0_info
->first_dt
== dt
368 && oprnd_info
->first_dt
== dt_op0
370 && !(oprnd0_info
->first_def_type
371 && !types_compatible_p (oprnd0_info
->first_def_type
,
373 && !(oprnd_info
->first_def_type
374 && !types_compatible_p (oprnd_info
->first_def_type
,
375 TREE_TYPE (def_op0
))))
377 if (vect_print_dump_info (REPORT_SLP
))
379 fprintf (vect_dump
, "Swapping operands of ");
380 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
383 swap_tree_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
384 gimple_assign_rhs2_ptr (stmt
));
388 if (vect_print_dump_info (REPORT_SLP
))
389 fprintf (vect_dump
, "Build SLP failed: different types ");
397 /* Check the types of the definitions. */
400 case vect_constant_def
:
401 case vect_external_def
:
402 case vect_reduction_def
:
405 case vect_internal_def
:
408 oprnd0_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
409 oprnd1_info
= VEC_index (slp_oprnd_info
, *oprnds_info
, 0);
411 VEC_quick_push (gimple
, oprnd1_info
->def_stmts
, def_stmt
);
413 VEC_quick_push (gimple
, oprnd0_info
->def_stmts
, def_stmt
);
416 VEC_quick_push (gimple
, oprnd_info
->def_stmts
, def_stmt
);
421 /* FORNOW: Not supported. */
422 if (vect_print_dump_info (REPORT_SLP
))
424 fprintf (vect_dump
, "Build SLP failed: illegal type of def ");
425 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
436 /* Recursively build an SLP tree starting from NODE.
437 Fail (and return FALSE) if def-stmts are not isomorphic, require data
438 permutation or are of unsupported types of operation. Otherwise, return
442 vect_build_slp_tree (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
443 slp_tree
*node
, unsigned int group_size
,
444 int *inside_cost
, int *outside_cost
,
445 int ncopies_for_cost
, unsigned int *max_nunits
,
446 VEC (int, heap
) **load_permutation
,
447 VEC (slp_tree
, heap
) **loads
,
448 unsigned int vectorization_factor
, bool *loads_permuted
)
451 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (*node
);
452 gimple stmt
= VEC_index (gimple
, stmts
, 0);
453 enum tree_code first_stmt_code
= ERROR_MARK
, rhs_code
= ERROR_MARK
;
454 enum tree_code first_cond_code
= ERROR_MARK
;
456 bool stop_recursion
= false, need_same_oprnds
= false;
457 tree vectype
, scalar_type
, first_op1
= NULL_TREE
;
458 unsigned int ncopies
;
461 enum machine_mode optab_op2_mode
;
462 enum machine_mode vec_mode
;
463 struct data_reference
*first_dr
;
465 bool permutation
= false;
466 unsigned int load_place
;
467 gimple first_load
, prev_first_load
= NULL
;
468 VEC (slp_oprnd_info
, heap
) *oprnds_info
;
470 slp_oprnd_info oprnd_info
;
473 if (is_gimple_call (stmt
))
474 nops
= gimple_call_num_args (stmt
);
475 else if (is_gimple_assign (stmt
))
477 nops
= gimple_num_ops (stmt
) - 1;
478 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
484 oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
486 /* For every stmt in NODE find its def stmt/s. */
487 FOR_EACH_VEC_ELT (gimple
, stmts
, i
, stmt
)
489 if (vect_print_dump_info (REPORT_SLP
))
491 fprintf (vect_dump
, "Build SLP for ");
492 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
495 /* Fail to vectorize statements marked as unvectorizable. */
496 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
498 if (vect_print_dump_info (REPORT_SLP
))
501 "Build SLP failed: unvectorizable statement ");
502 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
505 vect_free_oprnd_info (&oprnds_info
);
509 lhs
= gimple_get_lhs (stmt
);
510 if (lhs
== NULL_TREE
)
512 if (vect_print_dump_info (REPORT_SLP
))
515 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
516 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
519 vect_free_oprnd_info (&oprnds_info
);
523 if (is_gimple_assign (stmt
)
524 && gimple_assign_rhs_code (stmt
) == COND_EXPR
525 && (cond
= gimple_assign_rhs1 (stmt
))
526 && !COMPARISON_CLASS_P (cond
))
528 if (vect_print_dump_info (REPORT_SLP
))
531 "Build SLP failed: condition is not comparison ");
532 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
535 vect_free_oprnd_info (&oprnds_info
);
539 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
540 vectype
= get_vectype_for_scalar_type (scalar_type
);
543 if (vect_print_dump_info (REPORT_SLP
))
545 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
546 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
549 vect_free_oprnd_info (&oprnds_info
);
553 /* In case of multiple types we need to detect the smallest type. */
554 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
556 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
558 vectorization_factor
= *max_nunits
;
561 ncopies
= vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
563 if (is_gimple_call (stmt
))
565 rhs_code
= CALL_EXPR
;
566 if (gimple_call_internal_p (stmt
)
567 || gimple_call_tail_p (stmt
)
568 || gimple_call_noreturn_p (stmt
)
569 || !gimple_call_nothrow_p (stmt
)
570 || gimple_call_chain (stmt
))
572 if (vect_print_dump_info (REPORT_SLP
))
575 "Build SLP failed: unsupported call type ");
576 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
579 vect_free_oprnd_info (&oprnds_info
);
584 rhs_code
= gimple_assign_rhs_code (stmt
);
586 /* Check the operation. */
589 first_stmt_code
= rhs_code
;
591 /* Shift arguments should be equal in all the packed stmts for a
592 vector shift with scalar shift operand. */
593 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
594 || rhs_code
== LROTATE_EXPR
595 || rhs_code
== RROTATE_EXPR
)
597 vec_mode
= TYPE_MODE (vectype
);
599 /* First see if we have a vector/vector shift. */
600 optab
= optab_for_tree_code (rhs_code
, vectype
,
604 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
606 /* No vector/vector shift, try for a vector/scalar shift. */
607 optab
= optab_for_tree_code (rhs_code
, vectype
,
612 if (vect_print_dump_info (REPORT_SLP
))
613 fprintf (vect_dump
, "Build SLP failed: no optab.");
614 vect_free_oprnd_info (&oprnds_info
);
617 icode
= (int) optab_handler (optab
, vec_mode
);
618 if (icode
== CODE_FOR_nothing
)
620 if (vect_print_dump_info (REPORT_SLP
))
621 fprintf (vect_dump
, "Build SLP failed: "
622 "op not supported by target.");
623 vect_free_oprnd_info (&oprnds_info
);
626 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
627 if (!VECTOR_MODE_P (optab_op2_mode
))
629 need_same_oprnds
= true;
630 first_op1
= gimple_assign_rhs2 (stmt
);
634 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
636 need_same_oprnds
= true;
637 first_op1
= gimple_assign_rhs2 (stmt
);
642 if (first_stmt_code
!= rhs_code
643 && (first_stmt_code
!= IMAGPART_EXPR
644 || rhs_code
!= REALPART_EXPR
)
645 && (first_stmt_code
!= REALPART_EXPR
646 || rhs_code
!= IMAGPART_EXPR
)
647 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
))
648 && (first_stmt_code
== ARRAY_REF
649 || first_stmt_code
== INDIRECT_REF
650 || first_stmt_code
== COMPONENT_REF
651 || first_stmt_code
== MEM_REF
)))
653 if (vect_print_dump_info (REPORT_SLP
))
656 "Build SLP failed: different operation in stmt ");
657 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
660 vect_free_oprnd_info (&oprnds_info
);
665 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
667 if (vect_print_dump_info (REPORT_SLP
))
670 "Build SLP failed: different shift arguments in ");
671 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
674 vect_free_oprnd_info (&oprnds_info
);
678 if (rhs_code
== CALL_EXPR
)
680 gimple first_stmt
= VEC_index (gimple
, stmts
, 0);
681 if (gimple_call_num_args (stmt
) != nops
682 || !operand_equal_p (gimple_call_fn (first_stmt
),
683 gimple_call_fn (stmt
), 0)
684 || gimple_call_fntype (first_stmt
)
685 != gimple_call_fntype (stmt
))
687 if (vect_print_dump_info (REPORT_SLP
))
690 "Build SLP failed: different calls in ");
691 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
694 vect_free_oprnd_info (&oprnds_info
);
700 /* Strided store or load. */
701 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt
)))
703 if (REFERENCE_CLASS_P (lhs
))
706 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
,
707 stmt
, ncopies_for_cost
,
708 (i
== 0), &oprnds_info
))
710 vect_free_oprnd_info (&oprnds_info
);
717 /* FORNOW: Check that there is no gap between the loads. */
718 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
719 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 0)
720 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
721 && GROUP_GAP (vinfo_for_stmt (stmt
)) != 1))
723 if (vect_print_dump_info (REPORT_SLP
))
725 fprintf (vect_dump
, "Build SLP failed: strided "
727 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
730 vect_free_oprnd_info (&oprnds_info
);
734 /* Check that the size of interleaved loads group is not
735 greater than the SLP group size. */
737 && GROUP_SIZE (vinfo_for_stmt (stmt
)) > ncopies
* group_size
)
739 if (vect_print_dump_info (REPORT_SLP
))
741 fprintf (vect_dump
, "Build SLP failed: the number of "
742 "interleaved loads is greater than"
743 " the SLP group size ");
744 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
747 vect_free_oprnd_info (&oprnds_info
);
751 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
754 /* Check that there are no loads from different interleaving
755 chains in the same node. The only exception is complex
757 if (prev_first_load
!= first_load
758 && rhs_code
!= REALPART_EXPR
759 && rhs_code
!= IMAGPART_EXPR
)
761 if (vect_print_dump_info (REPORT_SLP
))
763 fprintf (vect_dump
, "Build SLP failed: different "
764 "interleaving chains in one node ");
765 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
768 vect_free_oprnd_info (&oprnds_info
);
773 prev_first_load
= first_load
;
775 if (first_load
== stmt
)
777 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
778 if (vect_supportable_dr_alignment (first_dr
, false)
779 == dr_unaligned_unsupported
)
781 if (vect_print_dump_info (REPORT_SLP
))
783 fprintf (vect_dump
, "Build SLP failed: unsupported "
785 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
788 vect_free_oprnd_info (&oprnds_info
);
792 /* Analyze costs (for the first stmt in the group). */
793 vect_model_load_cost (vinfo_for_stmt (stmt
),
794 ncopies_for_cost
, false, *node
);
797 /* Store the place of this load in the interleaving chain. In
798 case that permutation is needed we later decide if a specific
799 permutation is supported. */
800 load_place
= vect_get_place_in_interleaving_chain (stmt
,
805 VEC_safe_push (int, heap
, *load_permutation
, load_place
);
807 /* We stop the tree when we reach a group of loads. */
808 stop_recursion
= true;
811 } /* Strided access. */
814 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
816 /* Not strided load. */
817 if (vect_print_dump_info (REPORT_SLP
))
819 fprintf (vect_dump
, "Build SLP failed: not strided load ");
820 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
823 /* FORNOW: Not strided loads are not supported. */
824 vect_free_oprnd_info (&oprnds_info
);
828 /* Not memory operation. */
829 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
830 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
831 && rhs_code
!= COND_EXPR
832 && rhs_code
!= CALL_EXPR
)
834 if (vect_print_dump_info (REPORT_SLP
))
836 fprintf (vect_dump
, "Build SLP failed: operation");
837 fprintf (vect_dump
, " unsupported ");
838 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
841 vect_free_oprnd_info (&oprnds_info
);
845 if (rhs_code
== COND_EXPR
)
847 tree cond_expr
= gimple_assign_rhs1 (stmt
);
850 first_cond_code
= TREE_CODE (cond_expr
);
851 else if (first_cond_code
!= TREE_CODE (cond_expr
))
853 if (vect_print_dump_info (REPORT_SLP
))
855 fprintf (vect_dump
, "Build SLP failed: different"
857 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
860 vect_free_oprnd_info (&oprnds_info
);
865 /* Find the def-stmts. */
866 if (!vect_get_and_check_slp_defs (loop_vinfo
, bb_vinfo
, *node
, stmt
,
867 ncopies_for_cost
, (i
== 0),
870 vect_free_oprnd_info (&oprnds_info
);
876 /* Add the costs of the node to the overall instance costs. */
877 *inside_cost
+= SLP_TREE_INSIDE_OF_LOOP_COST (*node
);
878 *outside_cost
+= SLP_TREE_OUTSIDE_OF_LOOP_COST (*node
);
880 /* Strided loads were reached - stop the recursion. */
883 VEC_safe_push (slp_tree
, heap
, *loads
, *node
);
887 *loads_permuted
= true;
889 += targetm
.vectorize
.builtin_vectorization_cost (vec_perm
, NULL
, 0)
894 /* We don't check here complex numbers chains, so we set
895 LOADS_PERMUTED for further check in
896 vect_supported_load_permutation_p. */
897 if (rhs_code
== REALPART_EXPR
|| rhs_code
== IMAGPART_EXPR
)
898 *loads_permuted
= true;
901 vect_free_oprnd_info (&oprnds_info
);
905 /* Create SLP_TREE nodes for the definition node/s. */
906 FOR_EACH_VEC_ELT (slp_oprnd_info
, oprnds_info
, i
, oprnd_info
)
910 if (oprnd_info
->first_dt
!= vect_internal_def
)
913 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
915 || !vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &child
, group_size
,
916 inside_cost
, outside_cost
, ncopies_for_cost
,
917 max_nunits
, load_permutation
, loads
,
918 vectorization_factor
, loads_permuted
))
921 oprnd_info
->def_stmts
= NULL
;
922 vect_free_slp_tree (child
);
923 vect_free_oprnd_info (&oprnds_info
);
927 oprnd_info
->def_stmts
= NULL
;
928 VEC_quick_push (slp_void_p
, SLP_TREE_CHILDREN (*node
), child
);
931 vect_free_oprnd_info (&oprnds_info
);
937 vect_print_slp_tree (slp_tree node
)
946 fprintf (vect_dump
, "node ");
947 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
949 fprintf (vect_dump
, "\n\tstmt %d ", i
);
950 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
952 fprintf (vect_dump
, "\n");
954 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
955 vect_print_slp_tree ((slp_tree
) child
);
959 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
960 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
961 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
962 stmts in NODE are to be marked. */
965 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
974 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
976 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
978 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
979 vect_mark_slp_stmts ((slp_tree
) child
, mark
, j
);
983 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
986 vect_mark_slp_stmts_relevant (slp_tree node
)
990 stmt_vec_info stmt_info
;
996 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
998 stmt_info
= vinfo_for_stmt (stmt
);
999 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1000 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1001 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1004 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1005 vect_mark_slp_stmts_relevant ((slp_tree
) child
);
1009 /* Check if the permutation required by the SLP INSTANCE is supported.
1010 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1013 vect_supported_slp_permutation_p (slp_instance instance
)
1015 slp_tree node
= VEC_index (slp_tree
, SLP_INSTANCE_LOADS (instance
), 0);
1016 gimple stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1017 gimple first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
1018 VEC (slp_tree
, heap
) *sorted_loads
= NULL
;
1020 slp_tree
*tmp_loads
= NULL
;
1021 int group_size
= SLP_INSTANCE_GROUP_SIZE (instance
), i
, j
;
1024 /* FORNOW: The only supported loads permutation is loads from the same
1025 location in all the loads in the node, when the data-refs in
1026 nodes of LOADS constitute an interleaving chain.
1027 Sort the nodes according to the order of accesses in the chain. */
1028 tmp_loads
= (slp_tree
*) xmalloc (sizeof (slp_tree
) * group_size
);
1030 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance
), i
, index
)
1031 && VEC_iterate (slp_tree
, SLP_INSTANCE_LOADS (instance
), j
, load
);
1032 i
+= group_size
, j
++)
1034 gimple scalar_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (load
), 0);
1035 /* Check that the loads are all in the same interleaving chain. */
1036 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt
)) != first_load
)
1038 if (vect_print_dump_info (REPORT_DETAILS
))
1040 fprintf (vect_dump
, "Build SLP failed: unsupported data "
1042 print_gimple_stmt (vect_dump
, scalar_stmt
, 0, TDF_SLIM
);
1049 tmp_loads
[index
] = load
;
1052 sorted_loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1053 for (i
= 0; i
< group_size
; i
++)
1054 VEC_safe_push (slp_tree
, heap
, sorted_loads
, tmp_loads
[i
]);
1056 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (instance
));
1057 SLP_INSTANCE_LOADS (instance
) = sorted_loads
;
1060 if (!vect_transform_slp_perm_load (stmt
, NULL
, NULL
,
1061 SLP_INSTANCE_UNROLLING_FACTOR (instance
),
1069 /* Rearrange the statements of NODE according to PERMUTATION. */
1072 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1073 VEC (int, heap
) *permutation
)
1076 VEC (gimple
, heap
) *tmp_stmts
;
1077 unsigned int index
, i
;
1083 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1084 vect_slp_rearrange_stmts ((slp_tree
) child
, group_size
, permutation
);
1086 gcc_assert (group_size
== VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
)));
1087 tmp_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1089 for (i
= 0; i
< group_size
; i
++)
1090 VEC_safe_push (gimple
, heap
, tmp_stmts
, NULL
);
1092 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1094 index
= VEC_index (int, permutation
, i
);
1095 VEC_replace (gimple
, tmp_stmts
, index
, stmt
);
1098 VEC_free (gimple
, heap
, SLP_TREE_SCALAR_STMTS (node
));
1099 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1103 /* Check if the required load permutation is supported.
1104 LOAD_PERMUTATION contains a list of indices of the loads.
1105 In SLP this permutation is relative to the order of strided stores that are
1106 the base of the SLP instance. */
1109 vect_supported_load_permutation_p (slp_instance slp_instn
, int group_size
,
1110 VEC (int, heap
) *load_permutation
)
1112 int i
= 0, j
, prev
= -1, next
, k
, number_of_groups
;
1113 bool supported
, bad_permutation
= false;
1115 slp_tree node
, other_complex_node
;
1116 gimple stmt
, first
= NULL
, other_node_first
, load
, next_load
, first_load
;
1117 unsigned complex_numbers
= 0;
1118 struct data_reference
*dr
;
1119 bb_vec_info bb_vinfo
;
1121 /* FORNOW: permutations are only supported in SLP. */
1125 if (vect_print_dump_info (REPORT_SLP
))
1127 fprintf (vect_dump
, "Load permutation ");
1128 FOR_EACH_VEC_ELT (int, load_permutation
, i
, next
)
1129 fprintf (vect_dump
, "%d ", next
);
1132 /* In case of reduction every load permutation is allowed, since the order
1133 of the reduction statements is not important (as opposed to the case of
1134 strided stores). The only condition we need to check is that all the
1135 load nodes are of the same size and have the same permutation (and then
1136 rearrange all the nodes of the SLP instance according to this
1139 /* Check that all the load nodes are of the same size. */
1140 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1142 if (VEC_length (gimple
, SLP_TREE_SCALAR_STMTS (node
))
1143 != (unsigned) group_size
)
1146 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1147 if (is_gimple_assign (stmt
)
1148 && (gimple_assign_rhs_code (stmt
) == REALPART_EXPR
1149 || gimple_assign_rhs_code (stmt
) == IMAGPART_EXPR
))
1153 /* Complex operands can be swapped as following:
1154 real_c = real_b + real_a;
1155 imag_c = imag_a + imag_b;
1156 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1157 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1158 chains are mixed, they match the above pattern. */
1159 if (complex_numbers
)
1161 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1163 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, stmt
)
1169 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != first
)
1171 if (complex_numbers
!= 2)
1179 other_complex_node
= VEC_index (slp_tree
,
1180 SLP_INSTANCE_LOADS (slp_instn
), k
);
1181 other_node_first
= VEC_index (gimple
,
1182 SLP_TREE_SCALAR_STMTS (other_complex_node
), 0);
1184 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1185 != other_node_first
)
1193 /* We checked that this case ok, so there is no need to proceed with
1194 permutation tests. */
1195 if (complex_numbers
== 2)
1197 VEC_free (slp_tree
, heap
, SLP_INSTANCE_LOADS (slp_instn
));
1198 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1202 node
= SLP_INSTANCE_TREE (slp_instn
);
1203 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1204 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1205 instance, not all the loads belong to the same node or interleaving
1206 group. Hence, we need to divide them into groups according to
1208 number_of_groups
= VEC_length (int, load_permutation
) / group_size
;
1210 /* Reduction (there are no data-refs in the root).
1211 In reduction chain the order of the loads is important. */
1212 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1213 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1215 int first_group_load_index
;
1217 /* Compare all the permutation sequences to the first one. */
1218 for (i
= 1; i
< number_of_groups
; i
++)
1221 for (j
= i
* group_size
; j
< i
* group_size
+ group_size
; j
++)
1223 next
= VEC_index (int, load_permutation
, j
);
1224 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1226 if (next
!= first_group_load_index
)
1228 bad_permutation
= true;
1235 if (bad_permutation
)
1239 if (!bad_permutation
)
1241 /* Check that the loads in the first sequence are different and there
1242 are no gaps between them. */
1243 load_index
= sbitmap_alloc (group_size
);
1244 sbitmap_zero (load_index
);
1245 for (k
= 0; k
< group_size
; k
++)
1247 first_group_load_index
= VEC_index (int, load_permutation
, k
);
1248 if (TEST_BIT (load_index
, first_group_load_index
))
1250 bad_permutation
= true;
1254 SET_BIT (load_index
, first_group_load_index
);
1257 if (!bad_permutation
)
1258 for (k
= 0; k
< group_size
; k
++)
1259 if (!TEST_BIT (load_index
, k
))
1261 bad_permutation
= true;
1265 sbitmap_free (load_index
);
1268 if (!bad_permutation
)
1270 /* This permutation is valid for reduction. Since the order of the
1271 statements in the nodes is not important unless they are memory
1272 accesses, we can rearrange the statements in all the nodes
1273 according to the order of the loads. */
1274 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1276 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1281 /* In basic block vectorization we allow any subchain of an interleaving
1283 FORNOW: not supported in loop SLP because of realignment compications. */
1284 bb_vinfo
= STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
));
1285 bad_permutation
= false;
1286 /* Check that for every node in the instance teh loads form a subchain. */
1289 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1293 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1296 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
));
1298 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load
)))
1300 bad_permutation
= true;
1304 if (j
!= 0 && next_load
!= load
)
1306 bad_permutation
= true;
1310 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1313 if (bad_permutation
)
1317 /* Check that the alignment of the first load in every subchain, i.e.,
1318 the first statement in every load node, is supported. */
1319 if (!bad_permutation
)
1321 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1323 first_load
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
1325 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1327 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1328 if (vect_supportable_dr_alignment (dr
, false)
1329 == dr_unaligned_unsupported
)
1331 if (vect_print_dump_info (REPORT_SLP
))
1333 fprintf (vect_dump
, "unsupported unaligned load ");
1334 print_gimple_stmt (vect_dump
, first_load
, 0,
1337 bad_permutation
= true;
1343 if (!bad_permutation
)
1345 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn
));
1351 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1352 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1353 well (unless it's reduction). */
1354 if (VEC_length (int, load_permutation
)
1355 != (unsigned int) (group_size
* group_size
))
1359 load_index
= sbitmap_alloc (group_size
);
1360 sbitmap_zero (load_index
);
1361 for (j
= 0; j
< group_size
; j
++)
1363 for (i
= j
* group_size
, k
= 0;
1364 VEC_iterate (int, load_permutation
, i
, next
) && k
< group_size
;
1367 if (i
!= j
* group_size
&& next
!= prev
)
1376 if (TEST_BIT (load_index
, prev
))
1382 SET_BIT (load_index
, prev
);
1385 for (j
= 0; j
< group_size
; j
++)
1386 if (!TEST_BIT (load_index
, j
))
1389 sbitmap_free (load_index
);
1391 if (supported
&& i
== group_size
* group_size
1392 && vect_supported_slp_permutation_p (slp_instn
))
1399 /* Find the first load in the loop that belongs to INSTANCE.
1400 When loads are in several SLP nodes, there can be a case in which the first
1401 load does not appear in the first SLP node to be transformed, causing
1402 incorrect order of statements. Since we generate all the loads together,
1403 they must be inserted before the first load of the SLP instance and not
1404 before the first load of the first node of the instance. */
1407 vect_find_first_load_in_slp_instance (slp_instance instance
)
1411 gimple first_load
= NULL
, load
;
1413 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, load_node
)
1414 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1415 first_load
= get_earlier_stmt (load
, first_load
);
1421 /* Find the last store in SLP INSTANCE. */
1424 vect_find_last_store_in_slp_instance (slp_instance instance
)
1428 gimple last_store
= NULL
, store
;
1430 node
= SLP_INSTANCE_TREE (instance
);
1432 VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, store
);
1434 last_store
= get_later_stmt (store
, last_store
);
1440 /* Analyze an SLP instance starting from a group of strided stores. Call
1441 vect_build_slp_tree to build a tree of packed stmts if possible.
1442 Return FALSE if it's impossible to SLP any stmt in the loop. */
1445 vect_analyze_slp_instance (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
,
1448 slp_instance new_instance
;
1450 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1451 unsigned int unrolling_factor
= 1, nunits
;
1452 tree vectype
, scalar_type
= NULL_TREE
;
1454 unsigned int vectorization_factor
= 0;
1455 int inside_cost
= 0, outside_cost
= 0, ncopies_for_cost
, i
;
1456 unsigned int max_nunits
= 0;
1457 VEC (int, heap
) *load_permutation
;
1458 VEC (slp_tree
, heap
) *loads
;
1459 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1460 bool loads_permuted
= false;
1461 VEC (gimple
, heap
) *scalar_stmts
;
1463 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1467 scalar_type
= TREE_TYPE (DR_REF (dr
));
1468 vectype
= get_vectype_for_scalar_type (scalar_type
);
1472 gcc_assert (loop_vinfo
);
1473 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1476 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1480 gcc_assert (loop_vinfo
);
1481 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1482 group_size
= VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
));
1487 if (vect_print_dump_info (REPORT_SLP
))
1489 fprintf (vect_dump
, "Build SLP failed: unsupported data-type ");
1490 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
1496 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1498 vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1500 vectorization_factor
= nunits
;
1502 /* Calculate the unrolling factor. */
1503 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1504 if (unrolling_factor
!= 1 && !loop_vinfo
)
1506 if (vect_print_dump_info (REPORT_SLP
))
1507 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1513 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1514 scalar_stmts
= VEC_alloc (gimple
, heap
, group_size
);
1516 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1518 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1521 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1522 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1523 VEC_safe_push (gimple
, heap
, scalar_stmts
,
1524 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1526 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1527 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1532 /* Collect reduction statements. */
1533 VEC (gimple
, heap
) *reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1534 for (i
= 0; VEC_iterate (gimple
, reductions
, i
, next
); i
++)
1535 VEC_safe_push (gimple
, heap
, scalar_stmts
, next
);
1538 node
= vect_create_new_slp_node (scalar_stmts
);
1540 /* Calculate the number of vector stmts to create based on the unrolling
1541 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1542 GROUP_SIZE / NUNITS otherwise. */
1543 ncopies_for_cost
= unrolling_factor
* group_size
/ nunits
;
1545 load_permutation
= VEC_alloc (int, heap
, group_size
* group_size
);
1546 loads
= VEC_alloc (slp_tree
, heap
, group_size
);
1548 /* Build the tree for the SLP instance. */
1549 if (vect_build_slp_tree (loop_vinfo
, bb_vinfo
, &node
, group_size
,
1550 &inside_cost
, &outside_cost
, ncopies_for_cost
,
1551 &max_nunits
, &load_permutation
, &loads
,
1552 vectorization_factor
, &loads_permuted
))
1554 /* Calculate the unrolling factor based on the smallest type. */
1555 if (max_nunits
> nunits
)
1556 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1559 if (unrolling_factor
!= 1 && !loop_vinfo
)
1561 if (vect_print_dump_info (REPORT_SLP
))
1562 fprintf (vect_dump
, "Build SLP failed: unrolling required in basic"
1567 /* Create a new SLP instance. */
1568 new_instance
= XNEW (struct _slp_instance
);
1569 SLP_INSTANCE_TREE (new_instance
) = node
;
1570 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1571 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1572 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance
) = outside_cost
;
1573 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance
) = inside_cost
;
1574 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1575 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
) = NULL
;
1576 SLP_INSTANCE_LOAD_PERMUTATION (new_instance
) = load_permutation
;
1580 if (!vect_supported_load_permutation_p (new_instance
, group_size
,
1583 if (vect_print_dump_info (REPORT_SLP
))
1585 fprintf (vect_dump
, "Build SLP failed: unsupported load "
1587 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1590 vect_free_slp_instance (new_instance
);
1594 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance
)
1595 = vect_find_first_load_in_slp_instance (new_instance
);
1598 VEC_free (int, heap
, SLP_INSTANCE_LOAD_PERMUTATION (new_instance
));
1601 VEC_safe_push (slp_instance
, heap
,
1602 LOOP_VINFO_SLP_INSTANCES (loop_vinfo
),
1605 VEC_safe_push (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
),
1608 if (vect_print_dump_info (REPORT_SLP
))
1609 vect_print_slp_tree (node
);
1614 /* Failed to SLP. */
1615 /* Free the allocated memory. */
1616 vect_free_slp_tree (node
);
1617 VEC_free (int, heap
, load_permutation
);
1618 VEC_free (slp_tree
, heap
, loads
);
1624 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1625 trees of packed scalar stmts if SLP is possible. */
1628 vect_analyze_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1631 VEC (gimple
, heap
) *strided_stores
, *reductions
= NULL
, *reduc_chains
= NULL
;
1632 gimple first_element
;
1635 if (vect_print_dump_info (REPORT_SLP
))
1636 fprintf (vect_dump
, "=== vect_analyze_slp ===");
1640 strided_stores
= LOOP_VINFO_STRIDED_STORES (loop_vinfo
);
1641 reduc_chains
= LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
);
1642 reductions
= LOOP_VINFO_REDUCTIONS (loop_vinfo
);
1645 strided_stores
= BB_VINFO_STRIDED_STORES (bb_vinfo
);
1647 /* Find SLP sequences starting from groups of strided stores. */
1648 FOR_EACH_VEC_ELT (gimple
, strided_stores
, i
, first_element
)
1649 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1652 if (bb_vinfo
&& !ok
)
1654 if (vect_print_dump_info (REPORT_SLP
))
1655 fprintf (vect_dump
, "Failed to SLP the basic block.");
1661 && VEC_length (gimple
, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo
)) > 0)
1663 /* Find SLP sequences starting from reduction chains. */
1664 FOR_EACH_VEC_ELT (gimple
, reduc_chains
, i
, first_element
)
1665 if (vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
, first_element
))
1670 /* Don't try to vectorize SLP reductions if reduction chain was
1675 /* Find SLP sequences starting from groups of reductions. */
1676 if (loop_vinfo
&& VEC_length (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
)) > 1
1677 && vect_analyze_slp_instance (loop_vinfo
, bb_vinfo
,
1678 VEC_index (gimple
, reductions
, 0)))
1685 /* For each possible SLP instance decide whether to SLP it and calculate overall
1686 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1687 least one instance. */
1690 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1692 unsigned int i
, unrolling_factor
= 1;
1693 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1694 slp_instance instance
;
1695 int decided_to_slp
= 0;
1697 if (vect_print_dump_info (REPORT_SLP
))
1698 fprintf (vect_dump
, "=== vect_make_slp_decision ===");
1700 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1702 /* FORNOW: SLP if you can. */
1703 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1704 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1706 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1707 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1708 loop-based vectorization. Such stmts will be marked as HYBRID. */
1709 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1713 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1715 if (decided_to_slp
&& vect_print_dump_info (REPORT_SLP
))
1716 fprintf (vect_dump
, "Decided to SLP %d instances. Unrolling factor %d",
1717 decided_to_slp
, unrolling_factor
);
1719 return (decided_to_slp
> 0);
1723 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1724 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1727 vect_detect_hybrid_slp_stmts (slp_tree node
)
1730 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (node
);
1731 gimple stmt
= VEC_index (gimple
, stmts
, 0);
1732 imm_use_iterator imm_iter
;
1734 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1736 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1737 struct loop
*loop
= NULL
;
1738 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1739 basic_block bb
= NULL
;
1745 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1747 bb
= BB_VINFO_BB (bb_vinfo
);
1749 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1750 if (PURE_SLP_STMT (vinfo_for_stmt (stmt
))
1751 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1752 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1753 if (gimple_bb (use_stmt
)
1754 && ((loop
&& flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1755 || bb
== gimple_bb (use_stmt
))
1756 && (stmt_vinfo
= vinfo_for_stmt (use_stmt
))
1757 && !STMT_SLP_TYPE (stmt_vinfo
)
1758 && (STMT_VINFO_RELEVANT (stmt_vinfo
)
1759 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo
)))
1760 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1761 && STMT_VINFO_DEF_TYPE (stmt_vinfo
)
1762 == vect_reduction_def
))
1763 vect_mark_slp_stmts (node
, hybrid
, i
);
1765 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1766 vect_detect_hybrid_slp_stmts ((slp_tree
) child
);
1770 /* Find stmts that must be both vectorized and SLPed. */
1773 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
1776 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1777 slp_instance instance
;
1779 if (vect_print_dump_info (REPORT_SLP
))
1780 fprintf (vect_dump
, "=== vect_detect_hybrid_slp ===");
1782 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1783 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
));
1787 /* Create and initialize a new bb_vec_info struct for BB, as well as
1788 stmt_vec_info structs for all the stmts in it. */
1791 new_bb_vec_info (basic_block bb
)
1793 bb_vec_info res
= NULL
;
1794 gimple_stmt_iterator gsi
;
1796 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
1797 BB_VINFO_BB (res
) = bb
;
1799 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1801 gimple stmt
= gsi_stmt (gsi
);
1802 gimple_set_uid (stmt
, 0);
1803 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, NULL
, res
));
1806 BB_VINFO_STRIDED_STORES (res
) = VEC_alloc (gimple
, heap
, 10);
1807 BB_VINFO_SLP_INSTANCES (res
) = VEC_alloc (slp_instance
, heap
, 2);
1814 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1815 stmts in the basic block. */
1818 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
1821 gimple_stmt_iterator si
;
1826 bb
= BB_VINFO_BB (bb_vinfo
);
1828 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1830 gimple stmt
= gsi_stmt (si
);
1831 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1834 /* Free stmt_vec_info. */
1835 free_stmt_vec_info (stmt
);
1838 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo
));
1839 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
1840 VEC_free (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
));
1841 VEC_free (slp_instance
, heap
, BB_VINFO_SLP_INSTANCES (bb_vinfo
));
1847 /* Analyze statements contained in SLP tree node after recursively analyzing
1848 the subtree. Return TRUE if the operations are supported. */
1851 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo
, slp_tree node
)
1861 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
1862 if (!vect_slp_analyze_node_operations (bb_vinfo
, (slp_tree
) child
))
1865 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1867 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1868 gcc_assert (stmt_info
);
1869 gcc_assert (PURE_SLP_STMT (stmt_info
));
1871 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
1879 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1880 operations are supported. */
1883 vect_slp_analyze_operations (bb_vec_info bb_vinfo
)
1885 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1886 slp_instance instance
;
1889 for (i
= 0; VEC_iterate (slp_instance
, slp_instances
, i
, instance
); )
1891 if (!vect_slp_analyze_node_operations (bb_vinfo
,
1892 SLP_INSTANCE_TREE (instance
)))
1894 vect_free_slp_instance (instance
);
1895 VEC_ordered_remove (slp_instance
, slp_instances
, i
);
1901 if (!VEC_length (slp_instance
, slp_instances
))
1907 /* Check if vectorization of the basic block is profitable. */
1910 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
1912 VEC (slp_instance
, heap
) *slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
1913 slp_instance instance
;
1915 unsigned int vec_outside_cost
= 0, vec_inside_cost
= 0, scalar_cost
= 0;
1916 unsigned int stmt_cost
;
1918 gimple_stmt_iterator si
;
1919 basic_block bb
= BB_VINFO_BB (bb_vinfo
);
1920 stmt_vec_info stmt_info
= NULL
;
1921 tree dummy_type
= NULL
;
1924 /* Calculate vector costs. */
1925 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
1927 vec_outside_cost
+= SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance
);
1928 vec_inside_cost
+= SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
);
1931 /* Calculate scalar cost. */
1932 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
1934 stmt
= gsi_stmt (si
);
1935 stmt_info
= vinfo_for_stmt (stmt
);
1937 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
)
1938 || !PURE_SLP_STMT (stmt_info
))
1941 if (STMT_VINFO_DATA_REF (stmt_info
))
1943 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1944 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1945 (scalar_load
, dummy_type
, dummy
);
1947 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1948 (scalar_store
, dummy_type
, dummy
);
1951 stmt_cost
= targetm
.vectorize
.builtin_vectorization_cost
1952 (scalar_stmt
, dummy_type
, dummy
);
1954 scalar_cost
+= stmt_cost
;
1957 if (vect_print_dump_info (REPORT_COST
))
1959 fprintf (vect_dump
, "Cost model analysis: \n");
1960 fprintf (vect_dump
, " Vector inside of basic block cost: %d\n",
1962 fprintf (vect_dump
, " Vector outside of basic block cost: %d\n",
1964 fprintf (vect_dump
, " Scalar cost of basic block: %d", scalar_cost
);
1967 /* Vectorization is profitable if its cost is less than the cost of scalar
1969 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
1975 /* Check if the basic block can be vectorized. */
1978 vect_slp_analyze_bb_1 (basic_block bb
)
1980 bb_vec_info bb_vinfo
;
1981 VEC (ddr_p
, heap
) *ddrs
;
1982 VEC (slp_instance
, heap
) *slp_instances
;
1983 slp_instance instance
;
1986 int max_vf
= MAX_VECTORIZATION_FACTOR
;
1988 bb_vinfo
= new_bb_vec_info (bb
);
1992 if (!vect_analyze_data_refs (NULL
, bb_vinfo
, &min_vf
))
1994 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1995 fprintf (vect_dump
, "not vectorized: unhandled data-ref in basic "
1998 destroy_bb_vec_info (bb_vinfo
);
2002 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
2003 if (!VEC_length (ddr_p
, ddrs
))
2005 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2006 fprintf (vect_dump
, "not vectorized: not enough data-refs in basic "
2009 destroy_bb_vec_info (bb_vinfo
);
2013 if (!vect_analyze_data_ref_dependences (NULL
, bb_vinfo
, &max_vf
)
2016 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2017 fprintf (vect_dump
, "not vectorized: unhandled data dependence "
2018 "in basic block.\n");
2020 destroy_bb_vec_info (bb_vinfo
);
2024 if (!vect_analyze_data_refs_alignment (NULL
, bb_vinfo
))
2026 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2027 fprintf (vect_dump
, "not vectorized: bad data alignment in basic "
2030 destroy_bb_vec_info (bb_vinfo
);
2034 if (!vect_analyze_data_ref_accesses (NULL
, bb_vinfo
))
2036 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2037 fprintf (vect_dump
, "not vectorized: unhandled data access in basic "
2040 destroy_bb_vec_info (bb_vinfo
);
2044 if (!vect_verify_datarefs_alignment (NULL
, bb_vinfo
))
2046 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2047 fprintf (vect_dump
, "not vectorized: unsupported alignment in basic "
2050 destroy_bb_vec_info (bb_vinfo
);
2054 /* Check the SLP opportunities in the basic block, analyze and build SLP
2056 if (!vect_analyze_slp (NULL
, bb_vinfo
))
2058 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2059 fprintf (vect_dump
, "not vectorized: failed to find SLP opportunities "
2060 "in basic block.\n");
2062 destroy_bb_vec_info (bb_vinfo
);
2066 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2068 /* Mark all the statements that we want to vectorize as pure SLP and
2070 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2072 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2073 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2076 if (!vect_slp_analyze_operations (bb_vinfo
))
2078 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2079 fprintf (vect_dump
, "not vectorized: bad operation in basic block.\n");
2081 destroy_bb_vec_info (bb_vinfo
);
2085 /* Cost model: check if the vectorization is worthwhile. */
2086 if (flag_vect_cost_model
2087 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2089 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2090 fprintf (vect_dump
, "not vectorized: vectorization is not "
2093 destroy_bb_vec_info (bb_vinfo
);
2097 if (vect_print_dump_info (REPORT_DETAILS
))
2098 fprintf (vect_dump
, "Basic block will be vectorized using SLP\n");
2105 vect_slp_analyze_bb (basic_block bb
)
2107 bb_vec_info bb_vinfo
;
2109 gimple_stmt_iterator gsi
;
2110 unsigned int vector_sizes
;
2112 if (vect_print_dump_info (REPORT_DETAILS
))
2113 fprintf (vect_dump
, "===vect_slp_analyze_bb===\n");
2115 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2117 gimple stmt
= gsi_stmt (gsi
);
2118 if (!is_gimple_debug (stmt
)
2119 && !gimple_nop_p (stmt
)
2120 && gimple_code (stmt
) != GIMPLE_LABEL
)
2124 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2126 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2127 fprintf (vect_dump
, "not vectorized: too many instructions in basic "
2133 /* Autodetect first vector size we try. */
2134 current_vector_size
= 0;
2135 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2139 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2143 destroy_bb_vec_info (bb_vinfo
);
2145 vector_sizes
&= ~current_vector_size
;
2146 if (vector_sizes
== 0
2147 || current_vector_size
== 0)
2150 /* Try the next biggest vector size. */
2151 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2152 if (vect_print_dump_info (REPORT_DETAILS
))
2153 fprintf (vect_dump
, "***** Re-trying analysis with "
2154 "vector size %d\n", current_vector_size
);
2159 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2160 the number of created vector stmts depends on the unrolling factor).
2161 However, the actual number of vector stmts for every SLP node depends on
2162 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2163 should be updated. In this function we assume that the inside costs
2164 calculated in vect_model_xxx_cost are linear in ncopies. */
2167 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo
)
2169 unsigned int i
, vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2170 VEC (slp_instance
, heap
) *slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2171 slp_instance instance
;
2173 if (vect_print_dump_info (REPORT_SLP
))
2174 fprintf (vect_dump
, "=== vect_update_slp_costs_according_to_vf ===");
2176 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2177 /* We assume that costs are linear in ncopies. */
2178 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance
) *= vf
2179 / SLP_INSTANCE_UNROLLING_FACTOR (instance
);
2183 /* For constant and loop invariant defs of SLP_NODE this function returns
2184 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2185 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2186 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2187 REDUC_INDEX is the index of the reduction operand in the statements, unless
2191 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2192 VEC (tree
, heap
) **vec_oprnds
,
2193 unsigned int op_num
, unsigned int number_of_vectors
,
2196 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2197 gimple stmt
= VEC_index (gimple
, stmts
, 0);
2198 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2202 int j
, number_of_places_left_in_vector
;
2205 int group_size
= VEC_length (gimple
, stmts
);
2206 unsigned int vec_num
, i
;
2207 int number_of_copies
= 1;
2208 VEC (tree
, heap
) *voprnds
= VEC_alloc (tree
, heap
, number_of_vectors
);
2209 bool constant_p
, is_store
;
2210 tree neutral_op
= NULL
;
2211 enum tree_code code
= gimple_expr_code (stmt
);
2215 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2216 && reduc_index
!= -1)
2218 op_num
= reduc_index
- 1;
2219 op
= gimple_op (stmt
, reduc_index
);
2220 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2221 we need either neutral operands or the original operands. See
2222 get_initial_def_for_reduction() for details. */
2225 case WIDEN_SUM_EXPR
:
2231 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2232 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2234 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2239 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2240 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2242 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2247 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2252 def_stmt
= SSA_NAME_DEF_STMT (op
);
2253 loop
= (gimple_bb (stmt
))->loop_father
;
2254 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2255 loop_preheader_edge (loop
));
2263 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2266 op
= gimple_assign_rhs1 (stmt
);
2273 if (CONSTANT_CLASS_P (op
))
2278 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2279 gcc_assert (vector_type
);
2280 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2282 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2283 created vectors. It is greater than 1 if unrolling is performed.
2285 For example, we have two scalar operands, s1 and s2 (e.g., group of
2286 strided accesses of size two), while NUNITS is four (i.e., four scalars
2287 of this type can be packed in a vector). The output vector will contain
2288 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2291 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2292 containing the operands.
2294 For example, NUNITS is four as before, and the group size is 8
2295 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2296 {s5, s6, s7, s8}. */
2298 number_of_copies
= least_common_multiple (nunits
, group_size
) / group_size
;
2300 number_of_places_left_in_vector
= nunits
;
2301 for (j
= 0; j
< number_of_copies
; j
++)
2303 for (i
= group_size
- 1; VEC_iterate (gimple
, stmts
, i
, stmt
); i
--)
2306 op
= gimple_assign_rhs1 (stmt
);
2312 if (op_num
== 0 || op_num
== 1)
2314 tree cond
= gimple_assign_rhs1 (stmt
);
2315 op
= TREE_OPERAND (cond
, op_num
);
2320 op
= gimple_assign_rhs2 (stmt
);
2322 op
= gimple_assign_rhs3 (stmt
);
2327 op
= gimple_call_arg (stmt
, op_num
);
2331 op
= gimple_op (stmt
, op_num
+ 1);
2335 if (reduc_index
!= -1)
2337 loop
= (gimple_bb (stmt
))->loop_father
;
2338 def_stmt
= SSA_NAME_DEF_STMT (op
);
2342 /* Get the def before the loop. In reduction chain we have only
2343 one initial value. */
2344 if ((j
!= (number_of_copies
- 1)
2345 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2350 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2351 loop_preheader_edge (loop
));
2354 /* Create 'vect_ = {op0,op1,...,opn}'. */
2355 t
= tree_cons (NULL_TREE
, op
, t
);
2357 number_of_places_left_in_vector
--;
2359 if (number_of_places_left_in_vector
== 0)
2361 number_of_places_left_in_vector
= nunits
;
2364 vec_cst
= build_vector (vector_type
, t
);
2366 vec_cst
= build_constructor_from_list (vector_type
, t
);
2367 VEC_quick_push (tree
, voprnds
,
2368 vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
));
2374 /* Since the vectors are created in the reverse order, we should invert
2376 vec_num
= VEC_length (tree
, voprnds
);
2377 for (j
= vec_num
- 1; j
>= 0; j
--)
2379 vop
= VEC_index (tree
, voprnds
, j
);
2380 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2383 VEC_free (tree
, heap
, voprnds
);
2385 /* In case that VF is greater than the unrolling factor needed for the SLP
2386 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2387 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2388 to replicate the vectors. */
2389 while (number_of_vectors
> VEC_length (tree
, *vec_oprnds
))
2391 tree neutral_vec
= NULL
;
2396 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2398 VEC_quick_push (tree
, *vec_oprnds
, neutral_vec
);
2402 for (i
= 0; VEC_iterate (tree
, *vec_oprnds
, i
, vop
) && i
< vec_num
; i
++)
2403 VEC_quick_push (tree
, *vec_oprnds
, vop
);
2409 /* Get vectorized definitions from SLP_NODE that contains corresponding
2410 vectorized def-stmts. */
2413 vect_get_slp_vect_defs (slp_tree slp_node
, VEC (tree
,heap
) **vec_oprnds
)
2416 gimple vec_def_stmt
;
2419 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
));
2421 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2423 gcc_assert (vec_def_stmt
);
2424 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2425 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2430 /* Get vectorized definitions for SLP_NODE.
2431 If the scalar definitions are loop invariants or constants, collect them and
2432 call vect_get_constant_vectors() to create vector stmts.
2433 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2434 must be stored in the corresponding child of SLP_NODE, and we call
2435 vect_get_slp_vect_defs () to retrieve them. */
2438 vect_get_slp_defs (VEC (tree
, heap
) *ops
, slp_tree slp_node
,
2439 VEC (slp_void_p
, heap
) **vec_oprnds
, int reduc_index
)
2441 gimple first_stmt
, first_def
;
2442 int number_of_vects
= 0, i
;
2443 unsigned int child_index
= 0;
2444 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2445 slp_tree child
= NULL
;
2446 VEC (tree
, heap
) *vec_defs
;
2447 tree oprnd
, def_lhs
;
2448 bool vectorized_defs
;
2450 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
2451 FOR_EACH_VEC_ELT (tree
, ops
, i
, oprnd
)
2453 /* For each operand we check if it has vectorized definitions in a child
2454 node or we need to create them (for invariants and constants). We
2455 check if the LHS of the first stmt of the next child matches OPRND.
2456 If it does, we found the correct child. Otherwise, we call
2457 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2458 to check this child node for the next operand. */
2459 vectorized_defs
= false;
2460 if (VEC_length (slp_void_p
, SLP_TREE_CHILDREN (slp_node
)) > child_index
)
2462 child
= (slp_tree
) VEC_index (slp_void_p
,
2463 SLP_TREE_CHILDREN (slp_node
),
2465 first_def
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (child
), 0);
2467 /* In the end of a pattern sequence we have a use of the original stmt,
2468 so we need to compare OPRND with the original def. */
2469 if (is_pattern_stmt_p (vinfo_for_stmt (first_def
))
2470 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt
))
2471 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt
)))
2472 first_def
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2474 if (is_gimple_call (first_def
))
2475 def_lhs
= gimple_call_lhs (first_def
);
2477 def_lhs
= gimple_assign_lhs (first_def
);
2479 if (operand_equal_p (oprnd
, def_lhs
, 0))
2481 /* The number of vector defs is determined by the number of
2482 vector statements in the node from which we get those
2484 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2485 vectorized_defs
= true;
2490 if (!vectorized_defs
)
2494 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2495 /* Number of vector stmts was calculated according to LHS in
2496 vect_schedule_slp_instance (), fix it by replacing LHS with
2497 RHS, if necessary. See vect_get_smallest_scalar_type () for
2499 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2501 if (rhs_size_unit
!= lhs_size_unit
)
2503 number_of_vects
*= rhs_size_unit
;
2504 number_of_vects
/= lhs_size_unit
;
2509 /* Allocate memory for vectorized defs. */
2510 vec_defs
= VEC_alloc (tree
, heap
, number_of_vects
);
2512 /* For reduction defs we call vect_get_constant_vectors (), since we are
2513 looking for initial loop invariant values. */
2514 if (vectorized_defs
&& reduc_index
== -1)
2515 /* The defs are already vectorized. */
2516 vect_get_slp_vect_defs (child
, &vec_defs
);
2518 /* Build vectors from scalar defs. */
2519 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2520 number_of_vects
, reduc_index
);
2522 VEC_quick_push (slp_void_p
, *vec_oprnds
, (slp_void_p
) vec_defs
);
2524 /* For reductions, we only need initial values. */
2525 if (reduc_index
!= -1)
2531 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2532 building a vector of type MASK_TYPE from it) and two input vectors placed in
2533 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2534 shifting by STRIDE elements of DR_CHAIN for every copy.
2535 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2537 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2538 the created stmts must be inserted. */
2541 vect_create_mask_and_perm (gimple stmt
, gimple next_scalar_stmt
,
2542 tree mask
, int first_vec_indx
, int second_vec_indx
,
2543 gimple_stmt_iterator
*gsi
, slp_tree node
,
2544 tree vectype
, VEC(tree
,heap
) *dr_chain
,
2545 int ncopies
, int vect_stmts_counter
)
2548 gimple perm_stmt
= NULL
;
2549 stmt_vec_info next_stmt_info
;
2551 tree first_vec
, second_vec
, data_ref
;
2553 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
2555 /* Initialize the vect stmts of NODE to properly insert the generated
2557 for (i
= VEC_length (gimple
, SLP_TREE_VEC_STMTS (node
));
2558 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
2559 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (node
), NULL
);
2561 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
2562 for (i
= 0; i
< ncopies
; i
++)
2564 first_vec
= VEC_index (tree
, dr_chain
, first_vec_indx
);
2565 second_vec
= VEC_index (tree
, dr_chain
, second_vec_indx
);
2567 /* Generate the permute statement. */
2568 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
2569 first_vec
, second_vec
, mask
);
2570 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
2571 gimple_set_lhs (perm_stmt
, data_ref
);
2572 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
2574 /* Store the vector statement in NODE. */
2575 VEC_replace (gimple
, SLP_TREE_VEC_STMTS (node
),
2576 stride
* i
+ vect_stmts_counter
, perm_stmt
);
2578 first_vec_indx
+= stride
;
2579 second_vec_indx
+= stride
;
2582 /* Mark the scalar stmt as vectorized. */
2583 next_stmt_info
= vinfo_for_stmt (next_scalar_stmt
);
2584 STMT_VINFO_VEC_STMT (next_stmt_info
) = perm_stmt
;
2588 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2589 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2590 representation. Check that the mask is valid and return FALSE if not.
2591 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2592 the next vector, i.e., the current first vector is not needed. */
2595 vect_get_mask_element (gimple stmt
, int first_mask_element
, int m
,
2596 int mask_nunits
, bool only_one_vec
, int index
,
2597 unsigned char *mask
, int *current_mask_element
,
2598 bool *need_next_vector
, int *number_of_mask_fixes
,
2599 bool *mask_fixed
, bool *needs_first_vector
)
2603 /* Convert to target specific representation. */
2604 *current_mask_element
= first_mask_element
+ m
;
2605 /* Adjust the value in case it's a mask for second and third vectors. */
2606 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
2608 if (*current_mask_element
< mask_nunits
)
2609 *needs_first_vector
= true;
2611 /* We have only one input vector to permute but the mask accesses values in
2612 the next vector as well. */
2613 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
2615 if (vect_print_dump_info (REPORT_DETAILS
))
2617 fprintf (vect_dump
, "permutation requires at least two vectors ");
2618 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2624 /* The mask requires the next vector. */
2625 if (*current_mask_element
>= mask_nunits
* 2)
2627 if (*needs_first_vector
|| *mask_fixed
)
2629 /* We either need the first vector too or have already moved to the
2630 next vector. In both cases, this permutation needs three
2632 if (vect_print_dump_info (REPORT_DETAILS
))
2634 fprintf (vect_dump
, "permutation requires at "
2635 "least three vectors ");
2636 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2642 /* We move to the next vector, dropping the first one and working with
2643 the second and the third - we need to adjust the values of the mask
2645 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
2647 for (i
= 0; i
< index
; i
++)
2648 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
2650 (*number_of_mask_fixes
)++;
2654 *need_next_vector
= *mask_fixed
;
2656 /* This was the last element of this mask. Start a new one. */
2657 if (index
== mask_nunits
- 1)
2659 *number_of_mask_fixes
= 1;
2660 *mask_fixed
= false;
2661 *needs_first_vector
= false;
2668 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2669 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2670 permute statements for SLP_NODE_INSTANCE. */
2672 vect_transform_slp_perm_load (gimple stmt
, VEC (tree
, heap
) *dr_chain
,
2673 gimple_stmt_iterator
*gsi
, int vf
,
2674 slp_instance slp_node_instance
, bool analyze_only
)
2676 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2677 tree mask_element_type
= NULL_TREE
, mask_type
;
2678 int i
, j
, k
, nunits
, vec_index
= 0, scalar_index
;
2680 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2681 gimple next_scalar_stmt
;
2682 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
2683 int first_mask_element
;
2684 int index
, unroll_factor
, current_mask_element
, ncopies
;
2685 unsigned char *mask
;
2686 bool only_one_vec
= false, need_next_vector
= false;
2687 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
2688 int number_of_mask_fixes
= 1;
2689 bool mask_fixed
= false;
2690 bool needs_first_vector
= false;
2691 enum machine_mode mode
;
2693 mode
= TYPE_MODE (vectype
);
2695 if (!can_vec_perm_p (mode
, false, NULL
))
2697 if (vect_print_dump_info (REPORT_DETAILS
))
2699 fprintf (vect_dump
, "no vect permute for ");
2700 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2705 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2706 same size as the vector element being permuted. */
2708 = lang_hooks
.types
.type_for_size
2709 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype
))), 1);
2710 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
2711 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2712 mask
= XALLOCAVEC (unsigned char, nunits
);
2713 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2715 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2716 unrolling factor. */
2717 orig_vec_stmts_num
= group_size
*
2718 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
) / nunits
;
2719 if (orig_vec_stmts_num
== 1)
2720 only_one_vec
= true;
2722 /* Number of copies is determined by the final vectorization factor
2723 relatively to SLP_NODE_INSTANCE unrolling factor. */
2724 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
2726 /* Generate permutation masks for every NODE. Number of masks for each NODE
2727 is equal to GROUP_SIZE.
2728 E.g., we have a group of three nodes with three loads from the same
2729 location in each node, and the vector size is 4. I.e., we have a
2730 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2731 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2732 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2735 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2736 The last mask is illegal since we assume two operands for permute
2737 operation, and the mask element values can't be outside that range.
2738 Hence, the last mask must be converted into {2,5,5,5}.
2739 For the first two permutations we need the first and the second input
2740 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2741 we need the second and the third vectors: {b1,c1,a2,b2} and
2744 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (slp_node_instance
), i
, node
)
2748 vect_stmts_counter
= 0;
2750 first_vec_index
= vec_index
++;
2752 second_vec_index
= first_vec_index
;
2754 second_vec_index
= vec_index
++;
2756 for (j
= 0; j
< unroll_factor
; j
++)
2758 for (k
= 0; k
< group_size
; k
++)
2760 first_mask_element
= i
+ j
* group_size
;
2761 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
2762 nunits
, only_one_vec
, index
,
2763 mask
, ¤t_mask_element
,
2765 &number_of_mask_fixes
, &mask_fixed
,
2766 &needs_first_vector
))
2768 mask
[index
++] = current_mask_element
;
2770 if (index
== nunits
)
2772 tree mask_vec
= NULL
;
2774 if (!can_vec_perm_p (mode
, false, mask
))
2776 if (vect_print_dump_info (REPORT_DETAILS
))
2778 fprintf (vect_dump
, "unsupported vect permute { ");
2779 for (i
= 0; i
< nunits
; ++i
)
2780 fprintf (vect_dump
, "%d ", mask
[i
]);
2781 fprintf (vect_dump
, "}\n");
2786 while (--index
>= 0)
2788 tree t
= build_int_cst (mask_element_type
, mask
[index
]);
2789 mask_vec
= tree_cons (NULL
, t
, mask_vec
);
2791 mask_vec
= build_vector (mask_type
, mask_vec
);
2796 if (need_next_vector
)
2798 first_vec_index
= second_vec_index
;
2799 second_vec_index
= vec_index
;
2802 next_scalar_stmt
= VEC_index (gimple
,
2803 SLP_TREE_SCALAR_STMTS (node
), scalar_index
++);
2805 vect_create_mask_and_perm (stmt
, next_scalar_stmt
,
2806 mask_vec
, first_vec_index
, second_vec_index
,
2807 gsi
, node
, vectype
, dr_chain
,
2808 ncopies
, vect_stmts_counter
++);
2820 /* Vectorize SLP instance tree in postorder. */
2823 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
2824 unsigned int vectorization_factor
)
2827 bool strided_store
, is_store
;
2828 gimple_stmt_iterator si
;
2829 stmt_vec_info stmt_info
;
2830 unsigned int vec_stmts_size
, nunits
, group_size
;
2833 slp_tree loads_node
;
2839 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2840 vect_schedule_slp_instance ((slp_tree
) child
, instance
,
2841 vectorization_factor
);
2843 stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (node
), 0);
2844 stmt_info
= vinfo_for_stmt (stmt
);
2846 /* VECTYPE is the type of the destination. */
2847 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2848 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
2849 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2851 /* For each SLP instance calculate number of vector stmts to be created
2852 for the scalar stmts in each node of the SLP tree. Number of vector
2853 elements in one vector iteration is the number of scalar elements in
2854 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2856 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
2858 /* In case of load permutation we have to allocate vectorized statements for
2859 all the nodes that participate in that permutation. */
2860 if (SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2862 FOR_EACH_VEC_ELT (slp_tree
, SLP_INSTANCE_LOADS (instance
), i
, loads_node
)
2864 if (!SLP_TREE_VEC_STMTS (loads_node
))
2866 SLP_TREE_VEC_STMTS (loads_node
) = VEC_alloc (gimple
, heap
,
2868 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node
) = vec_stmts_size
;
2873 if (!SLP_TREE_VEC_STMTS (node
))
2875 SLP_TREE_VEC_STMTS (node
) = VEC_alloc (gimple
, heap
, vec_stmts_size
);
2876 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
2879 if (vect_print_dump_info (REPORT_DETAILS
))
2881 fprintf (vect_dump
, "------>vectorizing SLP node starting from: ");
2882 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2885 /* Loads should be inserted before the first load. */
2886 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance
)
2887 && STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2888 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt
))
2889 && SLP_INSTANCE_LOAD_PERMUTATION (instance
))
2890 si
= gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance
));
2891 else if (is_pattern_stmt_p (stmt_info
))
2892 si
= gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2894 si
= gsi_for_stmt (stmt
);
2896 /* Stores should be inserted just before the last store. */
2897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2898 && REFERENCE_CLASS_P (gimple_get_lhs (stmt
)))
2900 gimple last_store
= vect_find_last_store_in_slp_instance (instance
);
2901 if (is_pattern_stmt_p (vinfo_for_stmt (last_store
)))
2902 last_store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store
));
2903 si
= gsi_for_stmt (last_store
);
2906 /* Mark the first element of the reduction chain as reduction to properly
2907 transform the node. In the analysis phase only the last element of the
2908 chain is marked as reduction. */
2909 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_STRIDED_ACCESS (stmt_info
)
2910 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
2912 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2913 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
2916 is_store
= vect_transform_stmt (stmt
, &si
, &strided_store
, node
, instance
);
2920 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2921 For loop vectorization this is done in vectorizable_call, but for SLP
2922 it needs to be deferred until end of vect_schedule_slp, because multiple
2923 SLP instances may refer to the same scalar stmt. */
2926 vect_remove_slp_scalar_calls (slp_tree node
)
2928 gimple stmt
, new_stmt
;
2929 gimple_stmt_iterator gsi
;
2933 stmt_vec_info stmt_info
;
2938 FOR_EACH_VEC_ELT (slp_void_p
, SLP_TREE_CHILDREN (node
), i
, child
)
2939 vect_remove_slp_scalar_calls ((slp_tree
) child
);
2941 FOR_EACH_VEC_ELT (gimple
, SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2943 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
2945 stmt_info
= vinfo_for_stmt (stmt
);
2946 if (stmt_info
== NULL
2947 || is_pattern_stmt_p (stmt_info
)
2948 || !PURE_SLP_STMT (stmt_info
))
2950 lhs
= gimple_call_lhs (stmt
);
2951 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2952 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2953 set_vinfo_for_stmt (stmt
, NULL
);
2954 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2955 gsi
= gsi_for_stmt (stmt
);
2956 gsi_replace (&gsi
, new_stmt
, false);
2957 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2961 /* Generate vector code for all SLP instances in the loop/basic block. */
2964 vect_schedule_slp (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2966 VEC (slp_instance
, heap
) *slp_instances
;
2967 slp_instance instance
;
2969 bool is_store
= false;
2973 slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2974 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2978 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2982 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2984 /* Schedule the tree of INSTANCE. */
2985 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
2987 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS
)
2988 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2989 fprintf (vect_dump
, "vectorizing stmts using SLP.");
2992 FOR_EACH_VEC_ELT (slp_instance
, slp_instances
, i
, instance
)
2994 slp_tree root
= SLP_INSTANCE_TREE (instance
);
2997 gimple_stmt_iterator gsi
;
2999 vect_remove_slp_scalar_calls (root
);
3001 for (j
= 0; VEC_iterate (gimple
, SLP_TREE_SCALAR_STMTS (root
), j
, store
)
3002 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3004 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3007 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3008 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3009 /* Free the attached stmt_vec_info and remove the stmt. */
3010 gsi
= gsi_for_stmt (store
);
3011 gsi_remove (&gsi
, true);
3012 free_stmt_vec_info (store
);
3020 /* Vectorize the basic block. */
3023 vect_slp_transform_bb (basic_block bb
)
3025 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3026 gimple_stmt_iterator si
;
3028 gcc_assert (bb_vinfo
);
3030 if (vect_print_dump_info (REPORT_DETAILS
))
3031 fprintf (vect_dump
, "SLPing BB\n");
3033 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3035 gimple stmt
= gsi_stmt (si
);
3036 stmt_vec_info stmt_info
;
3038 if (vect_print_dump_info (REPORT_DETAILS
))
3040 fprintf (vect_dump
, "------>SLPing statement: ");
3041 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3044 stmt_info
= vinfo_for_stmt (stmt
);
3045 gcc_assert (stmt_info
);
3047 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3048 if (STMT_SLP_TYPE (stmt_info
))
3050 vect_schedule_slp (NULL
, bb_vinfo
);
3055 mark_sym_for_renaming (gimple_vop (cfun
));
3056 /* The memory tags and pointers in vectorized statements need to
3057 have their SSA forms updated. FIXME, why can't this be delayed
3058 until all the loops have been transformed? */
3059 update_ssa (TODO_update_ssa
);
3061 if (vect_print_dump_info (REPORT_DETAILS
))
3062 fprintf (vect_dump
, "BASIC BLOCK VECTORIZED\n");
3064 destroy_bb_vec_info (bb_vinfo
);