1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
53 vect_free_slp_tree (slp_tree node
)
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
59 vect_free_slp_tree (child
);
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt
))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
70 SLP_TREE_CHILDREN (node
).release ();
71 SLP_TREE_SCALAR_STMTS (node
).release ();
72 SLP_TREE_VEC_STMTS (node
).release ();
73 SLP_TREE_LOAD_PERMUTATION (node
).release ();
79 /* Free the memory allocated for the SLP instance. */
82 vect_free_slp_instance (slp_instance instance
)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
85 SLP_INSTANCE_LOADS (instance
).release ();
90 /* Create an SLP node for SCALAR_STMTS. */
93 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
96 gimple
*stmt
= scalar_stmts
[0];
99 if (is_gimple_call (stmt
))
100 nops
= gimple_call_num_args (stmt
);
101 else if (is_gimple_assign (stmt
))
103 nops
= gimple_num_ops (stmt
) - 1;
104 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
107 else if (gimple_code (stmt
) == GIMPLE_PHI
)
112 node
= XNEW (struct _slp_tree
);
113 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
114 SLP_TREE_VEC_STMTS (node
).create (0);
115 SLP_TREE_CHILDREN (node
).create (nops
);
116 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
117 SLP_TREE_TWO_OPERATORS (node
) = false;
118 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
121 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
122 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
128 /* This structure is used in creation of an SLP tree. Each instance
129 corresponds to the same operand in a group of scalar stmts in an SLP
131 typedef struct _slp_oprnd_info
133 /* Def-stmts for the operands. */
134 vec
<gimple
*> def_stmts
;
135 /* Information about the first statement, its vector def-type, type, the
136 operand itself in case it's constant, and an indication if it's a pattern
139 enum vect_def_type first_dt
;
145 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 static vec
<slp_oprnd_info
>
148 vect_create_oprnd_info (int nops
, int group_size
)
151 slp_oprnd_info oprnd_info
;
152 vec
<slp_oprnd_info
> oprnds_info
;
154 oprnds_info
.create (nops
);
155 for (i
= 0; i
< nops
; i
++)
157 oprnd_info
= XNEW (struct _slp_oprnd_info
);
158 oprnd_info
->def_stmts
.create (group_size
);
159 oprnd_info
->first_dt
= vect_uninitialized_def
;
160 oprnd_info
->first_op_type
= NULL_TREE
;
161 oprnd_info
->first_pattern
= false;
162 oprnd_info
->second_pattern
= false;
163 oprnds_info
.quick_push (oprnd_info
);
170 /* Free operands info. */
173 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
176 slp_oprnd_info oprnd_info
;
178 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
180 oprnd_info
->def_stmts
.release ();
181 XDELETE (oprnd_info
);
184 oprnds_info
.release ();
188 /* Find the place of the data-ref in STMT in the interleaving chain that starts
189 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
192 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
194 gimple
*next_stmt
= first_stmt
;
197 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
202 if (next_stmt
== stmt
)
204 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
206 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
213 /* Check whether it is possible to load COUNT elements of type ELT_MODE
214 using the method implemented by duplicate_and_interleave. Return true
215 if so, returning the number of intermediate vectors in *NVECTORS_OUT
216 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
220 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
221 unsigned int *nvectors_out
,
222 tree
*vector_type_out
,
225 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
227 unsigned int nvectors
= 1;
230 scalar_int_mode int_mode
;
231 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
232 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
233 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
235 tree int_type
= build_nonstandard_integer_type
236 (GET_MODE_BITSIZE (int_mode
), 1);
237 tree vector_type
= build_vector_type (int_type
, nelts
);
238 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
240 vec_perm_builder
sel1 (nelts
, 2, 3);
241 vec_perm_builder
sel2 (nelts
, 2, 3);
242 poly_int64 half_nelts
= exact_div (nelts
, 2);
243 for (unsigned int i
= 0; i
< 3; ++i
)
246 sel1
.quick_push (i
+ nelts
);
247 sel2
.quick_push (half_nelts
+ i
);
248 sel2
.quick_push (half_nelts
+ i
+ nelts
);
250 vec_perm_indices
indices1 (sel1
, 2, nelts
);
251 vec_perm_indices
indices2 (sel2
, 2, nelts
);
252 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
253 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
256 *nvectors_out
= nvectors
;
258 *vector_type_out
= vector_type
;
261 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
263 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
270 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
276 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
277 they are of a valid type and that they match the defs of the first stmt of
278 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
279 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
280 indicates swap is required for cond_expr stmts. Specifically, *SWAP
281 is 1 if STMT is cond and operands of comparison need to be swapped;
282 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
283 If there is any operand swap in this function, *SWAP is set to non-zero
285 If there was a fatal error return -1; if the error could be corrected by
286 swapping operands of father node of this one, return 1; if everything is
289 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
290 vec
<gimple
*> stmts
, unsigned stmt_num
,
291 vec
<slp_oprnd_info
> *oprnds_info
)
293 gimple
*stmt
= stmts
[stmt_num
];
295 unsigned int i
, number_of_oprnds
;
297 enum vect_def_type dt
= vect_uninitialized_def
;
298 bool pattern
= false;
299 slp_oprnd_info oprnd_info
;
300 int first_op_idx
= 1;
301 bool commutative
= false;
302 bool first_op_cond
= false;
303 bool first
= stmt_num
== 0;
304 bool second
= stmt_num
== 1;
306 if (is_gimple_call (stmt
))
308 number_of_oprnds
= gimple_call_num_args (stmt
);
311 else if (is_gimple_assign (stmt
))
313 enum tree_code code
= gimple_assign_rhs_code (stmt
);
314 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
315 /* Swap can only be done for cond_expr if asked to, otherwise we
316 could result in different comparison code to the first stmt. */
317 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
318 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
320 first_op_cond
= true;
324 commutative
= commutative_tree_code (code
);
329 bool swapped
= (*swap
!= 0);
330 gcc_assert (!swapped
|| first_op_cond
);
331 for (i
= 0; i
< number_of_oprnds
; i
++)
336 /* Map indicating how operands of cond_expr should be swapped. */
337 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
338 int *map
= maps
[*swap
];
341 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
343 oprnd
= gimple_op (stmt
, map
[i
]);
346 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
348 oprnd_info
= (*oprnds_info
)[i
];
350 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
352 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "Build SLP failed: can't analyze def for ");
356 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
357 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
363 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
364 from the pattern. Check that all the stmts of the node are in the
366 if (def_stmt
&& gimple_bb (def_stmt
)
367 && vect_stmt_in_region_p (vinfo
, def_stmt
)
368 && vinfo_for_stmt (def_stmt
)
369 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
370 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
371 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
374 if (!first
&& !oprnd_info
->first_pattern
375 /* Allow different pattern state for the defs of the
376 first stmt in reduction chains. */
377 && (oprnd_info
->first_dt
!= vect_reduction_def
378 || (!second
&& !oprnd_info
->second_pattern
)))
388 if (dump_enabled_p ())
390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
391 "Build SLP failed: some of the stmts"
392 " are in a pattern, and others are not ");
393 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
394 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
400 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
401 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
403 if (dt
== vect_unknown_def_type
)
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
407 "Unsupported pattern.\n");
411 switch (gimple_code (def_stmt
))
418 if (dump_enabled_p ())
419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
420 "unsupported defining stmt:\n");
426 oprnd_info
->second_pattern
= pattern
;
430 oprnd_info
->first_dt
= dt
;
431 oprnd_info
->first_pattern
= pattern
;
432 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
436 /* Not first stmt of the group, check that the def-stmt/s match
437 the def-stmt/s of the first stmt. Allow different definition
438 types for reduction chains: the first stmt must be a
439 vect_reduction_def (a phi node), and the rest
440 vect_internal_def. */
441 tree type
= TREE_TYPE (oprnd
);
442 if ((oprnd_info
->first_dt
!= dt
443 && !(oprnd_info
->first_dt
== vect_reduction_def
444 && dt
== vect_internal_def
)
445 && !((oprnd_info
->first_dt
== vect_external_def
446 || oprnd_info
->first_dt
== vect_constant_def
)
447 && (dt
== vect_external_def
448 || dt
== vect_constant_def
)))
449 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
451 /* Try swapping operands if we got a mismatch. */
460 if (dump_enabled_p ())
461 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
462 "Build SLP failed: different types\n");
466 if ((dt
== vect_constant_def
467 || dt
== vect_external_def
)
468 && !current_vector_size
.is_constant ()
469 && (TREE_CODE (type
) == BOOLEAN_TYPE
470 || !can_duplicate_and_interleave_p (stmts
.length (),
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
476 "Build SLP failed: invalid type of def "
477 "for variable-length SLP ");
478 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
479 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
485 /* Check the types of the definitions. */
488 case vect_constant_def
:
489 case vect_external_def
:
492 case vect_reduction_def
:
493 case vect_induction_def
:
494 case vect_internal_def
:
495 oprnd_info
->def_stmts
.quick_push (def_stmt
);
499 /* FORNOW: Not supported. */
500 if (dump_enabled_p ())
502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
503 "Build SLP failed: illegal type of def ");
504 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
505 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
515 /* If there are already uses of this stmt in a SLP instance then
516 we've committed to the operand order and can't swap it. */
517 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
519 if (dump_enabled_p ())
521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
522 "Build SLP failed: cannot swap operands of "
524 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
531 tree cond
= gimple_assign_rhs1 (stmt
);
532 enum tree_code code
= TREE_CODE (cond
);
537 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
538 &TREE_OPERAND (cond
, 1));
539 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
544 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
545 gimple_assign_rhs3_ptr (stmt
));
546 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
547 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
548 gcc_assert (code
!= ERROR_MARK
);
549 TREE_SET_CODE (cond
, code
);
553 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
554 gimple_assign_rhs2_ptr (stmt
));
555 if (dump_enabled_p ())
557 dump_printf_loc (MSG_NOTE
, vect_location
,
558 "swapped operands to match def types in ");
559 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
567 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
568 caller's attempt to find the vector type in STMT with the narrowest
569 element type. Return true if VECTYPE is nonnull and if it is valid
570 for VINFO. When returning true, update MAX_NUNITS to reflect the
571 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
572 as for vect_build_slp_tree. */
575 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
576 tree vectype
, poly_uint64
*max_nunits
)
580 if (dump_enabled_p ())
582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
583 "Build SLP failed: unsupported data-type in ");
584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
585 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
587 /* Fatal mismatch. */
591 /* If populating the vector type requires unrolling then fail
592 before adjusting *max_nunits for basic-block vectorization. */
593 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
594 unsigned HOST_WIDE_INT const_nunits
;
595 if (is_a
<bb_vec_info
> (vinfo
)
596 && (!nunits
.is_constant (&const_nunits
)
597 || const_nunits
> group_size
))
599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
600 "Build SLP failed: unrolling required "
601 "in basic block SLP\n");
602 /* Fatal mismatch. */
606 /* In case of multiple types we need to detect the smallest type. */
607 vect_update_max_nunits (max_nunits
, vectype
);
611 /* Verify if the scalar stmts STMTS are isomorphic, require data
612 permutation or are of unsupported types of operation. Return
613 true if they are, otherwise return false and indicate in *MATCHES
614 which stmts are not isomorphic to the first one. If MATCHES[0]
615 is false then this indicates the comparison could not be
616 carried out or the stmts will never be vectorized by SLP.
618 Note COND_EXPR is possibly ismorphic to another one after swapping its
619 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
620 the first stmt by swapping the two operands of comparison; set SWAP[i]
621 to 2 if stmt I is isormorphic to the first stmt by inverting the code
622 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
623 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
626 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
627 vec
<gimple
*> stmts
, unsigned int group_size
,
628 unsigned nops
, poly_uint64
*max_nunits
,
629 bool *matches
, bool *two_operators
)
632 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
633 enum tree_code first_stmt_code
= ERROR_MARK
;
634 enum tree_code alt_stmt_code
= ERROR_MARK
;
635 enum tree_code rhs_code
= ERROR_MARK
;
636 enum tree_code first_cond_code
= ERROR_MARK
;
638 bool need_same_oprnds
= false;
639 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
642 machine_mode optab_op2_mode
;
643 machine_mode vec_mode
;
645 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
647 /* For every stmt in NODE find its def stmt/s. */
648 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
653 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
656 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
659 /* Fail to vectorize statements marked as unvectorizable. */
660 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
665 "Build SLP failed: unvectorizable statement ");
666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
668 /* Fatal mismatch. */
673 lhs
= gimple_get_lhs (stmt
);
674 if (lhs
== NULL_TREE
)
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
679 "Build SLP failed: not GIMPLE_ASSIGN nor "
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
683 /* Fatal mismatch. */
688 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
689 vectype
= get_vectype_for_scalar_type (scalar_type
);
690 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
693 /* Fatal mismatch. */
698 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
700 rhs_code
= CALL_EXPR
;
701 if (gimple_call_internal_p (call_stmt
)
702 || gimple_call_tail_p (call_stmt
)
703 || gimple_call_noreturn_p (call_stmt
)
704 || !gimple_call_nothrow_p (call_stmt
)
705 || gimple_call_chain (call_stmt
))
707 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
710 "Build SLP failed: unsupported call type ");
711 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
714 /* Fatal mismatch. */
720 rhs_code
= gimple_assign_rhs_code (stmt
);
722 /* Check the operation. */
725 first_stmt_code
= rhs_code
;
727 /* Shift arguments should be equal in all the packed stmts for a
728 vector shift with scalar shift operand. */
729 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
730 || rhs_code
== LROTATE_EXPR
731 || rhs_code
== RROTATE_EXPR
)
733 vec_mode
= TYPE_MODE (vectype
);
735 /* First see if we have a vector/vector shift. */
736 optab
= optab_for_tree_code (rhs_code
, vectype
,
740 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
742 /* No vector/vector shift, try for a vector/scalar shift. */
743 optab
= optab_for_tree_code (rhs_code
, vectype
,
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
750 "Build SLP failed: no optab.\n");
751 /* Fatal mismatch. */
755 icode
= (int) optab_handler (optab
, vec_mode
);
756 if (icode
== CODE_FOR_nothing
)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
761 "op not supported by target.\n");
762 /* Fatal mismatch. */
766 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
767 if (!VECTOR_MODE_P (optab_op2_mode
))
769 need_same_oprnds
= true;
770 first_op1
= gimple_assign_rhs2 (stmt
);
774 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
776 need_same_oprnds
= true;
777 first_op1
= gimple_assign_rhs2 (stmt
);
782 if (first_stmt_code
!= rhs_code
783 && alt_stmt_code
== ERROR_MARK
)
784 alt_stmt_code
= rhs_code
;
785 if (first_stmt_code
!= rhs_code
786 && (first_stmt_code
!= IMAGPART_EXPR
787 || rhs_code
!= REALPART_EXPR
)
788 && (first_stmt_code
!= REALPART_EXPR
789 || rhs_code
!= IMAGPART_EXPR
)
790 /* Handle mismatches in plus/minus by computing both
791 and merging the results. */
792 && !((first_stmt_code
== PLUS_EXPR
793 || first_stmt_code
== MINUS_EXPR
)
794 && (alt_stmt_code
== PLUS_EXPR
795 || alt_stmt_code
== MINUS_EXPR
)
796 && rhs_code
== alt_stmt_code
)
797 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
798 && (first_stmt_code
== ARRAY_REF
799 || first_stmt_code
== BIT_FIELD_REF
800 || first_stmt_code
== INDIRECT_REF
801 || first_stmt_code
== COMPONENT_REF
802 || first_stmt_code
== MEM_REF
)))
804 if (dump_enabled_p ())
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
807 "Build SLP failed: different operation "
809 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
812 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
820 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
822 if (dump_enabled_p ())
824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
825 "Build SLP failed: different shift "
827 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
833 if (rhs_code
== CALL_EXPR
)
835 gimple
*first_stmt
= stmts
[0];
836 if (gimple_call_num_args (stmt
) != nops
837 || !operand_equal_p (gimple_call_fn (first_stmt
),
838 gimple_call_fn (stmt
), 0)
839 || gimple_call_fntype (first_stmt
)
840 != gimple_call_fntype (stmt
))
842 if (dump_enabled_p ())
844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
845 "Build SLP failed: different calls in ");
846 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
855 /* Grouped store or load. */
856 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
858 if (REFERENCE_CLASS_P (lhs
))
866 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
869 /* Check that there are no loads from different interleaving
870 chains in the same node. */
871 if (prev_first_load
!= first_load
)
873 if (dump_enabled_p ())
875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
877 "Build SLP failed: different "
878 "interleaving chains in one node ");
879 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
887 prev_first_load
= first_load
;
889 } /* Grouped access. */
892 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
894 /* Not grouped load. */
895 if (dump_enabled_p ())
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
898 "Build SLP failed: not grouped load ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
902 /* FORNOW: Not grouped loads are not supported. */
903 /* Fatal mismatch. */
908 /* Not memory operation. */
909 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
910 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
911 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
912 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
913 && rhs_code
!= CALL_EXPR
)
915 if (dump_enabled_p ())
917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
918 "Build SLP failed: operation");
919 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
920 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
922 /* Fatal mismatch. */
927 if (rhs_code
== COND_EXPR
)
929 tree cond_expr
= gimple_assign_rhs1 (stmt
);
930 enum tree_code cond_code
= TREE_CODE (cond_expr
);
931 enum tree_code swap_code
= ERROR_MARK
;
932 enum tree_code invert_code
= ERROR_MARK
;
935 first_cond_code
= TREE_CODE (cond_expr
);
936 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
938 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
939 swap_code
= swap_tree_comparison (cond_code
);
940 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
943 if (first_cond_code
== cond_code
)
945 /* Isomorphic can be achieved by swapping. */
946 else if (first_cond_code
== swap_code
)
948 /* Isomorphic can be achieved by inverting. */
949 else if (first_cond_code
== invert_code
)
953 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
956 "Build SLP failed: different"
958 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
970 for (i
= 0; i
< group_size
; ++i
)
974 /* If we allowed a two-operation SLP node verify the target can cope
975 with the permute we are going to use. */
976 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
977 if (alt_stmt_code
!= ERROR_MARK
978 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
980 unsigned HOST_WIDE_INT count
;
981 if (!nunits
.is_constant (&count
))
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
985 "Build SLP failed: different operations "
986 "not allowed with variable-length SLP.\n");
989 vec_perm_builder
sel (count
, count
, 1);
990 for (i
= 0; i
< count
; ++i
)
992 unsigned int elt
= i
;
993 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
995 sel
.quick_push (elt
);
997 vec_perm_indices
indices (sel
, 2, count
);
998 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1000 for (i
= 0; i
< group_size
; ++i
)
1001 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
1004 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1007 "Build SLP failed: different operation "
1009 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1013 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1019 *two_operators
= true;
1025 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1026 Note we never remove apart from at destruction time so we do not
1027 need a special value for deleted that differs from empty. */
1030 typedef vec
<gimple
*> value_type
;
1031 typedef vec
<gimple
*> compare_type
;
1032 static inline hashval_t
hash (value_type
);
1033 static inline bool equal (value_type existing
, value_type candidate
);
1034 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1035 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1036 static inline void mark_empty (value_type
&x
) { x
.release (); }
1037 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1038 static inline void remove (value_type
&x
) { x
.release (); }
1041 bst_traits::hash (value_type x
)
1044 for (unsigned i
= 0; i
< x
.length (); ++i
)
1045 h
.add_int (gimple_uid (x
[i
]));
1049 bst_traits::equal (value_type existing
, value_type candidate
)
1051 if (existing
.length () != candidate
.length ())
1053 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1054 if (existing
[i
] != candidate
[i
])
1059 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1060 static scalar_stmts_set_t
*bst_fail
;
1063 vect_build_slp_tree_2 (vec_info
*vinfo
,
1064 vec
<gimple
*> stmts
, unsigned int group_size
,
1065 poly_uint64
*max_nunits
,
1066 vec
<slp_tree
> *loads
,
1067 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1068 unsigned max_tree_size
);
1071 vect_build_slp_tree (vec_info
*vinfo
,
1072 vec
<gimple
*> stmts
, unsigned int group_size
,
1073 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1074 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1075 unsigned max_tree_size
)
1077 if (bst_fail
->contains (stmts
))
1079 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1080 loads
, matches
, npermutes
, tree_size
,
1082 /* When SLP build fails for stmts record this, otherwise SLP build
1083 can be exponential in time when we allow to construct parts from
1084 scalars, see PR81723. */
1088 x
.create (stmts
.length ());
1095 /* Recursively build an SLP tree starting from NODE.
1096 Fail (and return a value not equal to zero) if def-stmts are not
1097 isomorphic, require data permutation or are of unsupported types of
1098 operation. Otherwise, return 0.
1099 The value returned is the depth in the SLP tree where a mismatch
1103 vect_build_slp_tree_2 (vec_info
*vinfo
,
1104 vec
<gimple
*> stmts
, unsigned int group_size
,
1105 poly_uint64
*max_nunits
,
1106 vec
<slp_tree
> *loads
,
1107 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1108 unsigned max_tree_size
)
1110 unsigned nops
, i
, this_tree_size
= 0;
1111 poly_uint64 this_max_nunits
= *max_nunits
;
1118 if (is_gimple_call (stmt
))
1119 nops
= gimple_call_num_args (stmt
);
1120 else if (is_gimple_assign (stmt
))
1122 nops
= gimple_num_ops (stmt
) - 1;
1123 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1126 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1131 /* If the SLP node is a PHI (induction or reduction), terminate
1133 if (gimple_code (stmt
) == GIMPLE_PHI
)
1135 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1136 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1137 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1141 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1142 /* Induction from different IVs is not supported. */
1143 if (def_type
== vect_induction_def
)
1145 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1146 if (stmt
!= stmts
[0])
1151 /* Else def types have to match. */
1152 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1154 /* But for reduction chains only check on the first stmt. */
1155 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1156 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1158 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1162 node
= vect_create_new_slp_node (stmts
);
1167 bool two_operators
= false;
1168 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1169 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1170 stmts
, group_size
, nops
,
1171 &this_max_nunits
, matches
, &two_operators
))
1174 /* If the SLP node is a load, terminate the recursion. */
1175 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1176 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1178 *max_nunits
= this_max_nunits
;
1179 node
= vect_create_new_slp_node (stmts
);
1180 loads
->safe_push (node
);
1184 /* Get at the operands, verifying they are compatible. */
1185 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1186 slp_oprnd_info oprnd_info
;
1187 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1189 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1190 stmts
, i
, &oprnds_info
);
1192 matches
[(res
== -1) ? 0 : i
] = false;
1196 for (i
= 0; i
< group_size
; ++i
)
1199 vect_free_oprnd_info (oprnds_info
);
1203 auto_vec
<slp_tree
, 4> children
;
1204 auto_vec
<slp_tree
> this_loads
;
1209 max_tree_size
-= *tree_size
;
1211 /* Create SLP_TREE nodes for the definition node/s. */
1212 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1215 unsigned old_nloads
= this_loads
.length ();
1216 unsigned old_tree_size
= this_tree_size
;
1219 if (oprnd_info
->first_dt
!= vect_internal_def
1220 && oprnd_info
->first_dt
!= vect_reduction_def
1221 && oprnd_info
->first_dt
!= vect_induction_def
)
1224 if (++this_tree_size
> max_tree_size
)
1226 FOR_EACH_VEC_ELT (children
, j
, child
)
1227 vect_free_slp_tree (child
);
1228 vect_free_oprnd_info (oprnds_info
);
1232 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1233 group_size
, &this_max_nunits
,
1234 &this_loads
, matches
, npermutes
,
1236 max_tree_size
)) != NULL
)
1238 /* If we have all children of child built up from scalars then just
1239 throw that away and build it up this node from scalars. */
1240 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1241 /* ??? Rejecting patterns this way doesn't work. We'd have to
1242 do extra work to cancel the pattern so the uses see the
1244 && !is_pattern_stmt_p
1245 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1247 slp_tree grandchild
;
1249 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1250 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1255 this_loads
.truncate (old_nloads
);
1256 this_tree_size
= old_tree_size
;
1257 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1258 vect_free_slp_tree (grandchild
);
1259 SLP_TREE_CHILDREN (child
).truncate (0);
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "Building parent vector operands from "
1263 "scalars instead\n");
1264 oprnd_info
->def_stmts
= vNULL
;
1265 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1266 children
.safe_push (child
);
1271 oprnd_info
->def_stmts
= vNULL
;
1272 children
.safe_push (child
);
1276 /* If the SLP build failed fatally and we analyze a basic-block
1277 simply treat nodes we fail to build as externally defined
1278 (and thus build vectors from the scalar defs).
1279 The cost model will reject outright expensive cases.
1280 ??? This doesn't treat cases where permutation ultimatively
1281 fails (or we don't try permutation below). Ideally we'd
1282 even compute a permutation that will end up with the maximum
1284 if (is_a
<bb_vec_info
> (vinfo
)
1286 /* ??? Rejecting patterns this way doesn't work. We'd have to
1287 do extra work to cancel the pattern so the uses see the
1289 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1291 dump_printf_loc (MSG_NOTE
, vect_location
,
1292 "Building vector operands from scalars\n");
1293 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1294 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1295 children
.safe_push (child
);
1296 oprnd_info
->def_stmts
= vNULL
;
1300 /* If the SLP build for operand zero failed and operand zero
1301 and one can be commutated try that for the scalar stmts
1302 that failed the match. */
1304 /* A first scalar stmt mismatch signals a fatal mismatch. */
1306 /* ??? For COND_EXPRs we can swap the comparison operands
1307 as well as the arms under some constraints. */
1309 && oprnds_info
[1]->first_dt
== vect_internal_def
1310 && is_gimple_assign (stmt
)
1311 /* Do so only if the number of not successful permutes was nor more
1312 than a cut-ff as re-trying the recursive match on
1313 possibly each level of the tree would expose exponential
1317 /* See whether we can swap the matching or the non-matching
1319 bool swap_not_matching
= true;
1322 for (j
= 0; j
< group_size
; ++j
)
1324 if (matches
[j
] != !swap_not_matching
)
1326 gimple
*stmt
= stmts
[j
];
1327 /* Verify if we can swap operands of this stmt. */
1328 if (!is_gimple_assign (stmt
)
1329 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1331 if (!swap_not_matching
)
1333 swap_not_matching
= false;
1336 /* Verify if we can safely swap or if we committed to a
1337 specific operand order already.
1338 ??? Instead of modifying GIMPLE stmts here we could
1339 record whether we want to swap operands in the SLP
1340 node and temporarily do that when processing it
1341 (or wrap operand accessors in a helper). */
1342 else if (swap
[j
] != 0
1343 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)))
1345 if (!swap_not_matching
)
1347 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1351 "Build SLP failed: cannot swap "
1352 "operands of shared stmt ");
1353 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
1354 TDF_SLIM
, stmts
[j
], 0);
1358 swap_not_matching
= false;
1363 while (j
!= group_size
);
1365 /* Swap mismatched definition stmts. */
1366 dump_printf_loc (MSG_NOTE
, vect_location
,
1367 "Re-trying with swapped operands of stmts ");
1368 for (j
= 0; j
< group_size
; ++j
)
1369 if (matches
[j
] == !swap_not_matching
)
1371 std::swap (oprnds_info
[0]->def_stmts
[j
],
1372 oprnds_info
[1]->def_stmts
[j
]);
1373 dump_printf (MSG_NOTE
, "%d ", j
);
1375 dump_printf (MSG_NOTE
, "\n");
1376 /* And try again with scratch 'matches' ... */
1377 bool *tem
= XALLOCAVEC (bool, group_size
);
1378 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1379 group_size
, &this_max_nunits
,
1380 &this_loads
, tem
, npermutes
,
1382 max_tree_size
)) != NULL
)
1384 /* ... so if successful we can apply the operand swapping
1385 to the GIMPLE IL. This is necessary because for example
1386 vect_get_slp_defs uses operand indexes and thus expects
1387 canonical operand order. This is also necessary even
1388 if we end up building the operand from scalars as
1389 we'll continue to process swapped operand two. */
1390 for (j
= 0; j
< group_size
; ++j
)
1392 gimple
*stmt
= stmts
[j
];
1393 gimple_set_plf (stmt
, GF_PLF_1
, false);
1395 for (j
= 0; j
< group_size
; ++j
)
1397 gimple
*stmt
= stmts
[j
];
1398 if (matches
[j
] == !swap_not_matching
)
1400 /* Avoid swapping operands twice. */
1401 if (gimple_plf (stmt
, GF_PLF_1
))
1403 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1404 gimple_assign_rhs2_ptr (stmt
));
1405 gimple_set_plf (stmt
, GF_PLF_1
, true);
1408 /* Verify we swap all duplicates or none. */
1410 for (j
= 0; j
< group_size
; ++j
)
1412 gimple
*stmt
= stmts
[j
];
1413 gcc_assert (gimple_plf (stmt
, GF_PLF_1
)
1414 == (matches
[j
] == !swap_not_matching
));
1417 /* If we have all children of child built up from scalars then
1418 just throw that away and build it up this node from scalars. */
1419 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1420 /* ??? Rejecting patterns this way doesn't work. We'd have
1421 to do extra work to cancel the pattern so the uses see the
1423 && !is_pattern_stmt_p
1424 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1427 slp_tree grandchild
;
1429 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1430 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1435 this_loads
.truncate (old_nloads
);
1436 this_tree_size
= old_tree_size
;
1437 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1438 vect_free_slp_tree (grandchild
);
1439 SLP_TREE_CHILDREN (child
).truncate (0);
1441 dump_printf_loc (MSG_NOTE
, vect_location
,
1442 "Building parent vector operands from "
1443 "scalars instead\n");
1444 oprnd_info
->def_stmts
= vNULL
;
1445 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1446 children
.safe_push (child
);
1451 oprnd_info
->def_stmts
= vNULL
;
1452 children
.safe_push (child
);
1460 gcc_assert (child
== NULL
);
1461 FOR_EACH_VEC_ELT (children
, j
, child
)
1462 vect_free_slp_tree (child
);
1463 vect_free_oprnd_info (oprnds_info
);
1467 vect_free_oprnd_info (oprnds_info
);
1470 *tree_size
+= this_tree_size
;
1471 *max_nunits
= this_max_nunits
;
1472 loads
->safe_splice (this_loads
);
1474 node
= vect_create_new_slp_node (stmts
);
1475 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1476 SLP_TREE_CHILDREN (node
).splice (children
);
1480 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1483 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1489 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1490 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1491 ? " (external)" : "");
1492 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1494 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1495 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1498 vect_print_slp_tree (dump_kind
, loc
, child
);
1502 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1503 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1504 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1505 stmts in NODE are to be marked. */
1508 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1514 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1517 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1518 if (j
< 0 || i
== j
)
1519 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1521 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1522 vect_mark_slp_stmts (child
, mark
, j
);
1526 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1529 vect_mark_slp_stmts_relevant (slp_tree node
)
1533 stmt_vec_info stmt_info
;
1536 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1539 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1541 stmt_info
= vinfo_for_stmt (stmt
);
1542 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1543 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1544 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1547 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1548 vect_mark_slp_stmts_relevant (child
);
1552 /* Rearrange the statements of NODE according to PERMUTATION. */
1555 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1556 vec
<unsigned> permutation
)
1559 vec
<gimple
*> tmp_stmts
;
1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1564 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1566 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1567 tmp_stmts
.create (group_size
);
1568 tmp_stmts
.quick_grow_cleared (group_size
);
1570 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1571 tmp_stmts
[permutation
[i
]] = stmt
;
1573 SLP_TREE_SCALAR_STMTS (node
).release ();
1574 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1578 /* Attempt to reorder stmts in a reduction chain so that we don't
1579 require any load permutation. Return true if that was possible,
1580 otherwise return false. */
1583 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1585 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1588 slp_tree node
, load
;
1590 /* Compare all the permutation sequences to the first one. We know
1591 that at least one load is permuted. */
1592 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1593 if (!node
->load_permutation
.exists ())
1595 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1597 if (!load
->load_permutation
.exists ())
1599 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1600 if (lidx
!= node
->load_permutation
[j
])
1604 /* Check that the loads in the first sequence are different and there
1605 are no gaps between them. */
1606 auto_sbitmap
load_index (group_size
);
1607 bitmap_clear (load_index
);
1608 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1610 if (lidx
>= group_size
)
1612 if (bitmap_bit_p (load_index
, lidx
))
1615 bitmap_set_bit (load_index
, lidx
);
1617 for (i
= 0; i
< group_size
; i
++)
1618 if (!bitmap_bit_p (load_index
, i
))
1621 /* This permutation is valid for reduction. Since the order of the
1622 statements in the nodes is not important unless they are memory
1623 accesses, we can rearrange the statements in all the nodes
1624 according to the order of the loads. */
1625 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1626 node
->load_permutation
);
1628 /* We are done, no actual permutations need to be generated. */
1629 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1630 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1632 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1633 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1634 /* But we have to keep those permutations that are required because
1635 of handling of gaps. */
1636 if (known_eq (unrolling_factor
, 1U)
1637 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1638 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1639 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1641 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1642 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1648 /* Check if the required load permutations in the SLP instance
1649 SLP_INSTN are supported. */
1652 vect_supported_load_permutation_p (slp_instance slp_instn
)
1654 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1655 unsigned int i
, j
, k
, next
;
1657 gimple
*stmt
, *load
, *next_load
;
1659 if (dump_enabled_p ())
1661 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1662 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1663 if (node
->load_permutation
.exists ())
1664 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1665 dump_printf (MSG_NOTE
, "%d ", next
);
1667 for (k
= 0; k
< group_size
; ++k
)
1668 dump_printf (MSG_NOTE
, "%d ", k
);
1669 dump_printf (MSG_NOTE
, "\n");
1672 /* In case of reduction every load permutation is allowed, since the order
1673 of the reduction statements is not important (as opposed to the case of
1674 grouped stores). The only condition we need to check is that all the
1675 load nodes are of the same size and have the same permutation (and then
1676 rearrange all the nodes of the SLP instance according to this
1679 /* Check that all the load nodes are of the same size. */
1680 /* ??? Can't we assert this? */
1681 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1682 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1685 node
= SLP_INSTANCE_TREE (slp_instn
);
1686 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1688 /* Reduction (there are no data-refs in the root).
1689 In reduction chain the order of the loads is not important. */
1690 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1691 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1692 vect_attempt_slp_rearrange_stmts (slp_instn
);
1694 /* In basic block vectorization we allow any subchain of an interleaving
1696 FORNOW: not supported in loop SLP because of realignment compications. */
1697 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1699 /* Check whether the loads in an instance form a subchain and thus
1700 no permutation is necessary. */
1701 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1703 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1705 bool subchain_p
= true;
1707 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1710 && (next_load
!= load
1711 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1716 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1719 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1722 stmt_vec_info group_info
1723 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1724 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1725 unsigned HOST_WIDE_INT nunits
;
1726 unsigned k
, maxk
= 0;
1727 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1730 /* In BB vectorization we may not actually use a loaded vector
1731 accessing elements in excess of GROUP_SIZE. */
1732 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1733 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1734 || maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1737 "BB vectorization with gaps at the end of "
1738 "a load is not supported\n");
1742 /* Verify the permutation can be generated. */
1745 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1746 1, slp_instn
, true, &n_perms
))
1748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1750 "unsupported load permutation\n");
1758 /* For loop vectorization verify we can generate the permutation. Be
1759 conservative about the vectorization factor, there are permutations
1760 that will use three vector inputs only starting from a specific factor
1761 and the vectorization factor is not yet final.
1762 ??? The SLP instance unrolling factor might not be the maximum one. */
1765 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1766 LOOP_VINFO_VECT_FACTOR
1767 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1768 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1769 if (node
->load_permutation
.exists ()
1770 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1771 slp_instn
, true, &n_perms
))
1778 /* Find the last store in SLP INSTANCE. */
1781 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1783 gimple
*last
= NULL
, *stmt
;
1785 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1787 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1788 if (is_pattern_stmt_p (stmt_vinfo
))
1789 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1791 last
= get_later_stmt (stmt
, last
);
1797 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1800 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1801 stmt_vector_for_cost
*prologue_cost_vec
,
1802 stmt_vector_for_cost
*body_cost_vec
,
1803 unsigned ncopies_for_cost
,
1804 scalar_stmts_set_t
* visited
)
1809 stmt_vec_info stmt_info
;
1812 /* If we already costed the exact same set of scalar stmts we're done.
1813 We share the generated vector stmts for those. */
1814 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1817 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1819 /* Recurse down the SLP tree. */
1820 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1821 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1822 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1823 body_cost_vec
, ncopies_for_cost
, visited
);
1825 /* Look at the first scalar stmt to determine the cost. */
1826 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1827 stmt_info
= vinfo_for_stmt (stmt
);
1828 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1830 vect_memory_access_type memory_access_type
1831 = (STMT_VINFO_STRIDED_P (stmt_info
)
1834 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1835 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1836 memory_access_type
, VLS_STORE
,
1837 node
, prologue_cost_vec
, body_cost_vec
);
1840 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1841 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1843 /* If the load is permuted then the alignment is determined by
1844 the first group element not by the first scalar stmt DR. */
1845 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1846 stmt_info
= vinfo_for_stmt (stmt
);
1847 /* Record the cost for the permutation. */
1849 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1850 ncopies_for_cost
, instance
, true,
1852 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1853 stmt_info
, 0, vect_body
);
1854 unsigned assumed_nunits
1855 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1856 /* And adjust the number of loads performed. This handles
1857 redundancies as well as loads that are later dead. */
1858 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1859 bitmap_clear (perm
);
1860 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1861 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1862 ncopies_for_cost
= 0;
1863 bool load_seen
= false;
1864 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1866 if (i
% assumed_nunits
== 0)
1872 if (bitmap_bit_p (perm
, i
))
1877 gcc_assert (ncopies_for_cost
1878 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1879 + assumed_nunits
- 1) / assumed_nunits
);
1880 poly_uint64 uf
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1881 ncopies_for_cost
*= estimated_poly_value (uf
);
1883 /* Record the cost for the vector loads. */
1884 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1885 memory_access_type
, node
, prologue_cost_vec
,
1890 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1892 /* ncopies_for_cost is the number of IVs we generate. */
1893 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1894 stmt_info
, 0, vect_body
);
1896 /* Prologue cost for the initial values and step vector. */
1897 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1899 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1901 ? vector_load
: vec_construct
,
1902 stmt_info
, 0, vect_prologue
);
1903 record_stmt_cost (prologue_cost_vec
, 1,
1905 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1906 ? vector_load
: vec_construct
,
1907 stmt_info
, 0, vect_prologue
);
1909 /* ??? No easy way to get at the actual number of vector stmts
1910 to be geneated and thus the derived IVs. */
1914 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1915 stmt_info
, 0, vect_body
);
1916 if (SLP_TREE_TWO_OPERATORS (node
))
1918 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1919 stmt_info
, 0, vect_body
);
1920 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1921 stmt_info
, 0, vect_body
);
1925 /* Push SLP node def-type to stmts. */
1926 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1927 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1928 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1929 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1931 /* Scan operands and account for prologue cost of constants/externals.
1932 ??? This over-estimates cost for multiple uses and should be
1934 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1935 lhs
= gimple_get_lhs (stmt
);
1936 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1938 tree op
= gimple_op (stmt
, i
);
1940 enum vect_def_type dt
;
1941 if (!op
|| op
== lhs
)
1943 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
)
1944 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
1946 /* Without looking at the actual initializer a vector of
1947 constants can be implemented as load from the constant pool.
1948 When all elements are the same we can use a splat. */
1949 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1950 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
1951 unsigned num_vects_to_check
;
1952 unsigned HOST_WIDE_INT const_nunits
;
1953 unsigned nelt_limit
;
1954 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
1955 && ! multiple_p (const_nunits
, group_size
))
1957 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
1958 nelt_limit
= const_nunits
;
1962 /* If either the vector has variable length or the vectors
1963 are composed of repeated whole groups we only need to
1964 cost construction once. All vectors will be the same. */
1965 num_vects_to_check
= 1;
1966 nelt_limit
= group_size
;
1968 tree elt
= NULL_TREE
;
1970 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
1972 unsigned si
= j
% group_size
;
1974 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
], i
);
1975 /* ??? We're just tracking whether all operands of a single
1976 vector initializer are the same, ideally we'd check if
1977 we emitted the same one already. */
1978 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
], i
))
1981 if (nelt
== nelt_limit
)
1983 /* ??? We need to pass down stmt_info for a vector type
1984 even if it points to the wrong stmt. */
1985 record_stmt_cost (prologue_cost_vec
, 1,
1986 dt
== vect_external_def
1987 ? (elt
? scalar_to_vec
: vec_construct
)
1989 stmt_info
, 0, vect_prologue
);
1996 /* Restore stmt def-types. */
1997 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1998 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1999 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
2000 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
2003 /* Compute the cost for the SLP instance INSTANCE. */
2006 vect_analyze_slp_cost (slp_instance instance
, void *data
, scalar_stmts_set_t
*visited
)
2008 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
2009 unsigned ncopies_for_cost
;
2010 stmt_info_for_cost
*si
;
2013 /* Calculate the number of vector stmts to create based on the unrolling
2014 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2015 GROUP_SIZE / NUNITS otherwise. */
2016 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
2017 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2018 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
2019 /* Get the estimated vectorization factor, which is always one for
2020 basic-block vectorization. */
2021 unsigned int assumed_vf
;
2022 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
2023 assumed_vf
= vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info
));
2026 /* For reductions look at a reduction operand in case the reduction
2027 operation is widening like DOT_PROD or SAD. */
2028 tree vectype_for_cost
= STMT_VINFO_VECTYPE (stmt_info
);
2029 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2031 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2032 switch (gimple_assign_rhs_code (stmt
))
2036 vectype_for_cost
= get_vectype_for_scalar_type
2037 (TREE_TYPE (gimple_assign_rhs1 (stmt
)));
2042 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype_for_cost
);
2043 ncopies_for_cost
= (least_common_multiple (assumed_nunits
,
2044 group_size
* assumed_vf
)
2047 prologue_cost_vec
.create (10);
2048 body_cost_vec
.create (10);
2049 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
2050 &prologue_cost_vec
, &body_cost_vec
,
2051 ncopies_for_cost
, visited
);
2053 /* Record the prologue costs, which were delayed until we were
2054 sure that SLP was successful. */
2055 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
2057 struct _stmt_vec_info
*stmt_info
2058 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
2059 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
2060 si
->misalign
, vect_prologue
);
2063 /* Record the instance's instructions in the target cost model. */
2064 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
2066 struct _stmt_vec_info
*stmt_info
2067 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
2068 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
2069 si
->misalign
, vect_body
);
2072 prologue_cost_vec
.release ();
2073 body_cost_vec
.release ();
2076 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
2077 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
2078 the first GROUP1_SIZE stmts, since stores are consecutive), the second
2079 containing the remainder.
2080 Return the first stmt in the second group. */
2083 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
2085 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
2086 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
2087 gcc_assert (group1_size
> 0);
2088 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
2089 gcc_assert (group2_size
> 0);
2090 GROUP_SIZE (first_vinfo
) = group1_size
;
2092 gimple
*stmt
= first_stmt
;
2093 for (unsigned i
= group1_size
; i
> 1; i
--)
2095 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2096 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
2098 /* STMT is now the last element of the first group. */
2099 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2100 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
2102 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
2103 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
2105 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
2106 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
2109 /* For the second group, the GROUP_GAP is that before the original group,
2110 plus skipping over the first vector. */
2111 GROUP_GAP (vinfo_for_stmt (group2
)) =
2112 GROUP_GAP (first_vinfo
) + group1_size
;
2114 /* GROUP_GAP of the first group now has to skip over the second group too. */
2115 GROUP_GAP (first_vinfo
) += group2_size
;
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2119 group1_size
, group2_size
);
2124 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2125 statements and a vector of NUNITS elements. */
2128 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2130 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2133 /* Analyze an SLP instance starting from a group of grouped stores. Call
2134 vect_build_slp_tree to build a tree of packed stmts if possible.
2135 Return FALSE if it's impossible to SLP any stmt in the loop. */
2138 vect_analyze_slp_instance (vec_info
*vinfo
,
2139 gimple
*stmt
, unsigned max_tree_size
)
2141 slp_instance new_instance
;
2143 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2144 tree vectype
, scalar_type
= NULL_TREE
;
2147 vec
<slp_tree
> loads
;
2148 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2149 vec
<gimple
*> scalar_stmts
;
2151 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2155 scalar_type
= TREE_TYPE (DR_REF (dr
));
2156 vectype
= get_vectype_for_scalar_type (scalar_type
);
2160 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2161 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2164 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2168 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2169 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2170 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2175 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2178 "Build SLP failed: unsupported data-type ");
2179 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2180 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2185 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2187 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2188 scalar_stmts
.create (group_size
);
2190 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2192 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2195 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2196 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2197 scalar_stmts
.safe_push (
2198 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2200 scalar_stmts
.safe_push (next
);
2201 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2203 /* Mark the first element of the reduction chain as reduction to properly
2204 transform the node. In the reduction analysis phase only the last
2205 element of the chain is marked as reduction. */
2206 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2207 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2211 /* Collect reduction statements. */
2212 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2213 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2214 scalar_stmts
.safe_push (next
);
2217 loads
.create (group_size
);
2219 /* Build the tree for the SLP instance. */
2220 bool *matches
= XALLOCAVEC (bool, group_size
);
2221 unsigned npermutes
= 0;
2222 bst_fail
= new scalar_stmts_set_t ();
2223 poly_uint64 max_nunits
= nunits
;
2224 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2225 &max_nunits
, &loads
, matches
, &npermutes
,
2226 NULL
, max_tree_size
);
2230 /* Calculate the unrolling factor based on the smallest type. */
2231 poly_uint64 unrolling_factor
2232 = calculate_unrolling_factor (max_nunits
, group_size
);
2234 if (maybe_ne (unrolling_factor
, 1U)
2235 && is_a
<bb_vec_info
> (vinfo
))
2237 unsigned HOST_WIDE_INT const_max_nunits
;
2238 if (!max_nunits
.is_constant (&const_max_nunits
)
2239 || const_max_nunits
> group_size
)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "Build SLP failed: store group "
2244 "size not a multiple of the vector size "
2245 "in basic block SLP\n");
2246 vect_free_slp_tree (node
);
2250 /* Fatal mismatch. */
2251 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2252 vect_free_slp_tree (node
);
2257 /* Create a new SLP instance. */
2258 new_instance
= XNEW (struct _slp_instance
);
2259 SLP_INSTANCE_TREE (new_instance
) = node
;
2260 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2261 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2262 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2264 /* Compute the load permutation. */
2266 bool loads_permuted
= false;
2267 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2269 vec
<unsigned> load_permutation
;
2271 gimple
*load
, *first_stmt
;
2272 bool this_load_permuted
= false;
2273 load_permutation
.create (group_size
);
2274 first_stmt
= GROUP_FIRST_ELEMENT
2275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2276 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2278 int load_place
= vect_get_place_in_interleaving_chain
2280 gcc_assert (load_place
!= -1);
2281 if (load_place
!= j
)
2282 this_load_permuted
= true;
2283 load_permutation
.safe_push (load_place
);
2285 if (!this_load_permuted
2286 /* The load requires permutation when unrolling exposes
2287 a gap either because the group is larger than the SLP
2288 group-size or because there is a gap between the groups. */
2289 && (known_eq (unrolling_factor
, 1U)
2290 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2291 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2293 load_permutation
.release ();
2296 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2297 loads_permuted
= true;
2302 if (!vect_supported_load_permutation_p (new_instance
))
2304 if (dump_enabled_p ())
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2307 "Build SLP failed: unsupported load "
2309 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2312 vect_free_slp_instance (new_instance
);
2317 /* If the loads and stores can be handled with load/store-lan
2318 instructions do not generate this SLP instance. */
2319 if (is_a
<loop_vec_info
> (vinfo
)
2321 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2324 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2326 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2327 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2328 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2329 /* Use SLP for strided accesses (or if we
2330 can't load-lanes). */
2331 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2332 || ! vect_load_lanes_supported
2333 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2334 GROUP_SIZE (stmt_vinfo
), false))
2337 if (i
== loads
.length ())
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2341 "Built SLP cancelled: can use "
2342 "load/store-lanes\n");
2343 vect_free_slp_instance (new_instance
);
2348 vinfo
->slp_instances
.safe_push (new_instance
);
2350 if (dump_enabled_p ())
2352 dump_printf_loc (MSG_NOTE
, vect_location
,
2353 "Final SLP tree for instance:\n");
2354 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2362 /* Failed to SLP. */
2363 /* Free the allocated memory. */
2364 scalar_stmts
.release ();
2368 /* For basic block SLP, try to break the group up into multiples of the
2370 unsigned HOST_WIDE_INT const_nunits
;
2371 if (is_a
<bb_vec_info
> (vinfo
)
2372 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2373 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2374 && nunits
.is_constant (&const_nunits
))
2376 /* We consider breaking the group only on VF boundaries from the existing
2378 for (i
= 0; i
< group_size
; i
++)
2379 if (!matches
[i
]) break;
2381 if (i
>= const_nunits
&& i
< group_size
)
2383 /* Split into two groups at the first vector boundary before i. */
2384 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2385 unsigned group1_size
= i
& ~(const_nunits
- 1);
2387 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2388 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2389 /* If the first non-match was in the middle of a vector,
2390 skip the rest of that vector. */
2391 if (group1_size
< i
)
2393 i
= group1_size
+ const_nunits
;
2395 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2398 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2401 /* Even though the first vector did not all match, we might be able to SLP
2402 (some) of the remainder. FORNOW ignore this possibility. */
2409 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2410 trees of packed scalar stmts if SLP is possible. */
2413 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2416 gimple
*first_element
;
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2421 /* Find SLP sequences starting from groups of grouped stores. */
2422 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2423 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2425 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2427 if (loop_vinfo
->reduction_chains
.length () > 0)
2429 /* Find SLP sequences starting from reduction chains. */
2430 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2431 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2434 /* Dissolve reduction chain group. */
2435 gimple
*next
, *stmt
= first_element
;
2438 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2439 next
= GROUP_NEXT_ELEMENT (vinfo
);
2440 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2441 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2444 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2445 = vect_internal_def
;
2449 /* Find SLP sequences starting from groups of reductions. */
2450 if (loop_vinfo
->reductions
.length () > 1)
2451 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2459 /* For each possible SLP instance decide whether to SLP it and calculate overall
2460 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2461 least one instance. */
2464 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2467 poly_uint64 unrolling_factor
= 1;
2468 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2469 slp_instance instance
;
2470 int decided_to_slp
= 0;
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2476 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2478 /* FORNOW: SLP if you can. */
2479 /* All unroll factors have the form current_vector_size * X for some
2480 rational X, so they must have a common multiple. */
2482 = force_common_multiple (unrolling_factor
,
2483 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2485 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2486 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2487 loop-based vectorization. Such stmts will be marked as HYBRID. */
2488 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2492 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2494 if (decided_to_slp
&& dump_enabled_p ())
2496 dump_printf_loc (MSG_NOTE
, vect_location
,
2497 "Decided to SLP %d instances. Unrolling factor ",
2499 dump_dec (MSG_NOTE
, unrolling_factor
);
2500 dump_printf (MSG_NOTE
, "\n");
2503 return (decided_to_slp
> 0);
2507 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2508 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2511 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2513 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2514 imm_use_iterator imm_iter
;
2516 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2518 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2519 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2522 /* Propagate hybrid down the SLP tree. */
2523 if (stype
== hybrid
)
2525 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2529 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2530 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2531 /* If we get a pattern stmt here we have to use the LHS of the
2532 original stmt for immediate uses. */
2533 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2534 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2535 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2537 if (gimple_code (stmt
) == GIMPLE_PHI
)
2538 def
= gimple_phi_result (stmt
);
2540 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2542 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2544 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2546 use_vinfo
= vinfo_for_stmt (use_stmt
);
2547 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2548 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2549 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2550 if (!STMT_SLP_TYPE (use_vinfo
)
2551 && (STMT_VINFO_RELEVANT (use_vinfo
)
2552 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2553 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2554 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2556 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2559 "def in non-SLP stmt: ");
2560 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2568 && !HYBRID_SLP_STMT (stmt_vinfo
))
2570 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2573 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2575 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2579 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2580 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2583 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2586 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2588 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2589 struct loop
*loopp
= (struct loop
*)wi
->info
;
2594 if (TREE_CODE (*tp
) == SSA_NAME
2595 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2597 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2598 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2599 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2601 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2604 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2606 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2614 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2617 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2618 /* If the stmt is in a SLP instance then this isn't a reason
2619 to mark use definitions in other SLP instances as hybrid. */
2620 if (! STMT_SLP_TYPE (use_vinfo
)
2621 && (STMT_VINFO_RELEVANT (use_vinfo
)
2622 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2623 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2624 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2631 /* Find stmts that must be both vectorized and SLPed. */
2634 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2637 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2638 slp_instance instance
;
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2644 /* First walk all pattern stmt in the loop and mark defs of uses as
2645 hybrid because immediate uses in them are not recorded. */
2646 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2648 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2649 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2652 gimple
*stmt
= gsi_stmt (gsi
);
2653 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2654 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2657 memset (&wi
, 0, sizeof (wi
));
2658 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2659 gimple_stmt_iterator gsi2
2660 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2661 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2662 vect_detect_hybrid_slp_1
, &wi
);
2663 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2664 vect_detect_hybrid_slp_2
,
2665 vect_detect_hybrid_slp_1
, &wi
);
2670 /* Then walk the SLP instance trees marking stmts with uses in
2671 non-SLP stmts as hybrid, also propagating hybrid down the
2672 SLP tree, collecting the above info on-the-fly. */
2673 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2675 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2676 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2682 /* Initialize a bb_vec_info struct for the statements between
2683 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2685 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2686 gimple_stmt_iterator region_end_in
)
2687 : vec_info (vec_info::bb
, init_cost (NULL
)),
2688 bb (gsi_bb (region_begin_in
)),
2689 region_begin (region_begin_in
),
2690 region_end (region_end_in
)
2692 gimple_stmt_iterator gsi
;
2694 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2697 gimple
*stmt
= gsi_stmt (gsi
);
2698 gimple_set_uid (stmt
, 0);
2699 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2706 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2707 stmts in the basic block. */
2709 _bb_vec_info::~_bb_vec_info ()
2711 for (gimple_stmt_iterator si
= region_begin
;
2712 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2714 gimple
*stmt
= gsi_stmt (si
);
2715 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2718 /* Free stmt_vec_info. */
2719 free_stmt_vec_info (stmt
);
2721 /* Reset region marker. */
2722 gimple_set_uid (stmt
, -1);
2729 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2730 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2732 Return true if the operations are supported. */
2735 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2736 slp_instance node_instance
)
2743 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2746 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2747 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2750 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2751 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2752 gcc_assert (stmt_info
);
2753 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2755 /* For BB vectorization vector types are assigned here.
2756 Memory accesses already got their vector type assigned
2757 in vect_analyze_data_refs. */
2758 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2760 && ! STMT_VINFO_DATA_REF (stmt_info
))
2762 gcc_assert (PURE_SLP_STMT (stmt_info
));
2764 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2765 if (dump_enabled_p ())
2767 dump_printf_loc (MSG_NOTE
, vect_location
,
2768 "get vectype for scalar type: ");
2769 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2770 dump_printf (MSG_NOTE
, "\n");
2773 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2776 if (dump_enabled_p ())
2778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2779 "not SLPed: unsupported data-type ");
2780 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2782 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2787 if (dump_enabled_p ())
2789 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2790 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2791 dump_printf (MSG_NOTE
, "\n");
2795 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2796 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2799 /* Calculate the number of vector statements to be created for the
2800 scalar stmts in this node. For SLP reductions it is equal to the
2801 number of vector statements in the children (which has already been
2802 calculated by the recursive call). Otherwise it is the number of
2803 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2804 VF divided by the number of elements in a vector. */
2805 if (GROUP_FIRST_ELEMENT (stmt_info
)
2806 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2807 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2808 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2812 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2813 vf
= loop_vinfo
->vectorization_factor
;
2816 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2817 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2818 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2819 = vect_get_num_vectors (vf
* group_size
, vectype
);
2822 /* Push SLP node def-type to stmt operands. */
2823 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2824 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2825 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2826 = SLP_TREE_DEF_TYPE (child
);
2827 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2828 /* Restore def-types. */
2829 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2830 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2831 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2832 = vect_internal_def
;
2840 /* Analyze statements in SLP instances of VINFO. Return true if the
2841 operations are supported. */
2844 vect_slp_analyze_operations (vec_info
*vinfo
)
2846 slp_instance instance
;
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_NOTE
, vect_location
,
2851 "=== vect_slp_analyze_operations ===\n");
2853 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2855 if (!vect_slp_analyze_node_operations (vinfo
,
2856 SLP_INSTANCE_TREE (instance
),
2859 dump_printf_loc (MSG_NOTE
, vect_location
,
2860 "removing SLP instance operations starting from: ");
2861 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2862 SLP_TREE_SCALAR_STMTS
2863 (SLP_INSTANCE_TREE (instance
))[0], 0);
2864 vect_free_slp_instance (instance
);
2865 vinfo
->slp_instances
.ordered_remove (i
);
2871 if (dump_enabled_p ())
2872 dump_printf_loc (MSG_NOTE
, vect_location
,
2873 "=== vect_analyze_slp_cost ===\n");
2875 /* Compute the costs of the SLP instances. */
2876 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
2877 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
2878 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
, visited
);
2881 return !vinfo
->slp_instances
.is_empty ();
2885 /* Compute the scalar cost of the SLP node NODE and its children
2886 and return it. Do not account defs that are marked in LIFE and
2887 update LIFE according to uses of NODE. */
2890 vect_bb_slp_scalar_cost (basic_block bb
,
2891 slp_tree node
, vec
<bool, va_heap
> *life
)
2893 unsigned scalar_cost
= 0;
2898 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2901 ssa_op_iter op_iter
;
2902 def_operand_p def_p
;
2903 stmt_vec_info stmt_info
;
2908 /* If there is a non-vectorized use of the defs then the scalar
2909 stmt is kept live in which case we do not account it or any
2910 required defs in the SLP children in the scalar cost. This
2911 way we make the vectorization more costly when compared to
2913 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2915 imm_use_iterator use_iter
;
2917 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2918 if (!is_gimple_debug (use_stmt
)
2919 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2921 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2924 BREAK_FROM_IMM_USE_STMT (use_iter
);
2930 /* Count scalar stmts only once. */
2931 if (gimple_visited_p (stmt
))
2933 gimple_set_visited (stmt
, true);
2935 stmt_info
= vinfo_for_stmt (stmt
);
2936 if (STMT_VINFO_DATA_REF (stmt_info
))
2938 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2939 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2941 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2944 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2946 scalar_cost
+= stmt_cost
;
2949 auto_vec
<bool, 20> subtree_life
;
2950 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2952 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2954 /* Do not directly pass LIFE to the recursive call, copy it to
2955 confine changes in the callee to the current child/subtree. */
2956 subtree_life
.safe_splice (*life
);
2957 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2958 subtree_life
.truncate (0);
2965 /* Check if vectorization of the basic block is profitable. */
2968 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2970 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2971 slp_instance instance
;
2973 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2974 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2976 /* Calculate scalar cost. */
2977 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2979 auto_vec
<bool, 20> life
;
2980 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2981 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2982 SLP_INSTANCE_TREE (instance
),
2986 /* Unset visited flag. */
2987 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2988 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2989 gimple_set_visited (gsi_stmt (gsi
), false);
2991 /* Complete the target-specific cost calculation. */
2992 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2993 &vec_inside_cost
, &vec_epilogue_cost
);
2995 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2997 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3000 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3002 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3003 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3004 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3007 /* Vectorization is profitable if its cost is more than the cost of scalar
3008 version. Note that we err on the vector side for equal cost because
3009 the cost estimate is otherwise quite pessimistic (constant uses are
3010 free on the scalar side but cost a load on the vector side for
3012 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3018 /* Check if the basic block can be vectorized. Returns a bb_vec_info
3019 if so and sets fatal to true if failure is independent of
3020 current_vector_size. */
3023 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
3024 gimple_stmt_iterator region_end
,
3025 vec
<data_reference_p
> datarefs
, int n_stmts
,
3028 bb_vec_info bb_vinfo
;
3029 slp_instance instance
;
3031 poly_uint64 min_vf
= 2;
3033 /* The first group of checks is independent of the vector size. */
3036 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
3038 if (dump_enabled_p ())
3039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3040 "not vectorized: too many instructions in "
3042 free_data_refs (datarefs
);
3046 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
3050 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3052 /* Analyze the data references. */
3054 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
3056 if (dump_enabled_p ())
3057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3058 "not vectorized: unhandled data-ref in basic "
3065 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3067 if (dump_enabled_p ())
3068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3069 "not vectorized: not enough data-refs in "
3076 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3080 "not vectorized: unhandled data access in "
3087 /* If there are no grouped stores in the region there is no need
3088 to continue with pattern recog as vect_analyze_slp will fail
3090 if (bb_vinfo
->grouped_stores
.is_empty ())
3092 if (dump_enabled_p ())
3093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3094 "not vectorized: no grouped stores in "
3101 /* While the rest of the analysis below depends on it in some way. */
3104 vect_pattern_recog (bb_vinfo
);
3106 /* Check the SLP opportunities in the basic block, analyze and build SLP
3108 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3110 if (dump_enabled_p ())
3112 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3113 "Failed to SLP the basic block.\n");
3114 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3115 "not vectorized: failed to find SLP opportunities "
3116 "in basic block.\n");
3123 vect_record_base_alignments (bb_vinfo
);
3125 /* Analyze and verify the alignment of data references and the
3126 dependence in the SLP instances. */
3127 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3129 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3130 || ! vect_slp_analyze_instance_dependence (instance
))
3132 dump_printf_loc (MSG_NOTE
, vect_location
,
3133 "removing SLP instance operations starting from: ");
3134 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
3135 SLP_TREE_SCALAR_STMTS
3136 (SLP_INSTANCE_TREE (instance
))[0], 0);
3137 vect_free_slp_instance (instance
);
3138 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3142 /* Mark all the statements that we want to vectorize as pure SLP and
3144 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3145 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3149 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3155 if (!vect_slp_analyze_operations (bb_vinfo
))
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3159 "not vectorized: bad operation in basic block.\n");
3165 /* Cost model: check if the vectorization is worthwhile. */
3166 if (!unlimited_cost_model (NULL
)
3167 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3169 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3171 "not vectorized: vectorization is not "
3178 if (dump_enabled_p ())
3179 dump_printf_loc (MSG_NOTE
, vect_location
,
3180 "Basic block will be vectorized using SLP\n");
3186 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3187 true if anything in the basic-block was vectorized. */
3190 vect_slp_bb (basic_block bb
)
3192 bb_vec_info bb_vinfo
;
3193 gimple_stmt_iterator gsi
;
3194 bool any_vectorized
= false;
3195 auto_vector_sizes vector_sizes
;
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3200 /* Autodetect first vector size we try. */
3201 current_vector_size
= 0;
3202 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3203 unsigned int next_size
= 0;
3205 gsi
= gsi_start_bb (bb
);
3207 poly_uint64 autodetected_vector_size
= 0;
3210 if (gsi_end_p (gsi
))
3213 gimple_stmt_iterator region_begin
= gsi
;
3214 vec
<data_reference_p
> datarefs
= vNULL
;
3217 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3219 gimple
*stmt
= gsi_stmt (gsi
);
3220 if (is_gimple_debug (stmt
))
3224 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3225 vect_location
= gimple_location (stmt
);
3227 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3231 /* Skip leading unhandled stmts. */
3232 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3238 gimple_stmt_iterator region_end
= gsi
;
3240 bool vectorized
= false;
3242 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3243 datarefs
, insns
, fatal
);
3245 && dbg_cnt (vect_slp
))
3247 if (dump_enabled_p ())
3248 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3250 vect_schedule_slp (bb_vinfo
);
3252 if (dump_enabled_p ())
3253 dump_printf_loc (MSG_NOTE
, vect_location
,
3254 "basic block part vectorized\n");
3260 any_vectorized
|= vectorized
;
3263 autodetected_vector_size
= current_vector_size
;
3265 if (next_size
< vector_sizes
.length ()
3266 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3270 || next_size
== vector_sizes
.length ()
3271 || known_eq (current_vector_size
, 0U)
3272 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3273 vector sizes will fail do not bother iterating. */
3276 if (gsi_end_p (region_end
))
3279 /* Skip the unhandled stmt. */
3282 /* And reset vector sizes. */
3283 current_vector_size
= 0;
3288 /* Try the next biggest vector size. */
3289 current_vector_size
= vector_sizes
[next_size
++];
3290 if (dump_enabled_p ())
3292 dump_printf_loc (MSG_NOTE
, vect_location
,
3293 "***** Re-trying analysis with "
3295 dump_dec (MSG_NOTE
, current_vector_size
);
3296 dump_printf (MSG_NOTE
, "\n");
3304 return any_vectorized
;
3308 /* Return 1 if vector type of boolean constant which is OPNUM
3309 operand in statement STMT is a boolean vector. */
3312 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3314 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3315 enum tree_code code
= gimple_expr_code (stmt
);
3318 enum vect_def_type dt
;
3320 /* For comparison and COND_EXPR type is chosen depending
3321 on the other comparison operand. */
3322 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3325 op
= gimple_assign_rhs1 (stmt
);
3327 op
= gimple_assign_rhs2 (stmt
);
3329 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3333 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3336 if (code
== COND_EXPR
)
3338 tree cond
= gimple_assign_rhs1 (stmt
);
3340 if (TREE_CODE (cond
) == SSA_NAME
)
3343 op
= TREE_OPERAND (cond
, 1);
3345 op
= TREE_OPERAND (cond
, 0);
3347 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3351 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3354 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3357 /* Build a variable-length vector in which the elements in ELTS are repeated
3358 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3359 RESULTS and add any new instructions to SEQ.
3361 The approach we use is:
3363 (1) Find a vector mode VM with integer elements of mode IM.
3365 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3366 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3367 from small vectors to IM.
3369 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3371 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3372 correct byte contents.
3374 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3376 We try to find the largest IM for which this sequence works, in order
3377 to cut down on the number of interleaves. */
3380 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3381 unsigned int nresults
, vec
<tree
> &results
)
3383 unsigned int nelts
= elts
.length ();
3384 tree element_type
= TREE_TYPE (vector_type
);
3386 /* (1) Find a vector mode VM with integer elements of mode IM. */
3387 unsigned int nvectors
= 1;
3388 tree new_vector_type
;
3390 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3391 &nvectors
, &new_vector_type
,
3395 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3396 unsigned int partial_nelts
= nelts
/ nvectors
;
3397 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3399 tree_vector_builder partial_elts
;
3400 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3401 pieces
.quick_grow (nvectors
* 2);
3402 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3404 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3405 ELTS' has mode IM. */
3406 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3407 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3408 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3409 tree t
= gimple_build_vector (seq
, &partial_elts
);
3410 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3411 TREE_TYPE (new_vector_type
), t
);
3413 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3414 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3417 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3418 correct byte contents.
3420 We need to repeat the following operation log2(nvectors) times:
3422 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3423 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3425 However, if each input repeats every N elements and the VF is
3426 a multiple of N * 2, the HI result is the same as the LO. */
3427 unsigned int in_start
= 0;
3428 unsigned int out_start
= nvectors
;
3429 unsigned int hi_start
= nvectors
/ 2;
3430 /* A bound on the number of outputs needed to produce NRESULTS results
3431 in the final iteration. */
3432 unsigned int noutputs_bound
= nvectors
* nresults
;
3433 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3435 noutputs_bound
/= 2;
3436 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3437 for (unsigned int i
= 0; i
< limit
; ++i
)
3440 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3443 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3447 tree output
= make_ssa_name (new_vector_type
);
3448 tree input1
= pieces
[in_start
+ (i
/ 2)];
3449 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3450 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3453 gimple_seq_add_stmt (seq
, stmt
);
3454 pieces
[out_start
+ i
] = output
;
3456 std::swap (in_start
, out_start
);
3459 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3460 results
.reserve (nresults
);
3461 for (unsigned int i
= 0; i
< nresults
; ++i
)
3463 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3464 pieces
[in_start
+ i
]));
3466 results
.quick_push (results
[i
- nvectors
]);
3470 /* For constant and loop invariant defs of SLP_NODE this function returns
3471 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3472 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3473 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3474 REDUC_INDEX is the index of the reduction operand in the statements, unless
3478 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3479 vec
<tree
> *vec_oprnds
,
3480 unsigned int op_num
, unsigned int number_of_vectors
)
3482 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3483 gimple
*stmt
= stmts
[0];
3484 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3485 unsigned HOST_WIDE_INT nunits
;
3487 unsigned j
, number_of_places_left_in_vector
;
3490 int group_size
= stmts
.length ();
3491 unsigned int vec_num
, i
;
3492 unsigned number_of_copies
= 1;
3494 voprnds
.create (number_of_vectors
);
3495 bool constant_p
, is_store
;
3496 tree neutral_op
= NULL
;
3497 enum tree_code code
= gimple_expr_code (stmt
);
3498 gimple_seq ctor_seq
= NULL
;
3499 auto_vec
<tree
, 16> permute_results
;
3501 /* Check if vector type is a boolean vector. */
3502 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3503 && vect_mask_constant_operand_p (stmt
, op_num
))
3505 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3507 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3509 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3512 op
= gimple_assign_rhs1 (stmt
);
3519 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3520 created vectors. It is greater than 1 if unrolling is performed.
3522 For example, we have two scalar operands, s1 and s2 (e.g., group of
3523 strided accesses of size two), while NUNITS is four (i.e., four scalars
3524 of this type can be packed in a vector). The output vector will contain
3525 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3528 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3529 containing the operands.
3531 For example, NUNITS is four as before, and the group size is 8
3532 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3533 {s5, s6, s7, s8}. */
3535 /* When using duplicate_and_interleave, we just need one element for
3536 each scalar statement. */
3537 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3538 nunits
= group_size
;
3540 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3542 number_of_places_left_in_vector
= nunits
;
3544 tree_vector_builder
elts (vector_type
, nunits
, 1);
3545 elts
.quick_grow (nunits
);
3546 bool place_after_defs
= false;
3547 for (j
= 0; j
< number_of_copies
; j
++)
3549 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3552 op
= gimple_assign_rhs1 (stmt
);
3559 tree cond
= gimple_assign_rhs1 (stmt
);
3560 if (TREE_CODE (cond
) == SSA_NAME
)
3561 op
= gimple_op (stmt
, op_num
+ 1);
3562 else if (op_num
== 0 || op_num
== 1)
3563 op
= TREE_OPERAND (cond
, op_num
);
3567 op
= gimple_assign_rhs2 (stmt
);
3569 op
= gimple_assign_rhs3 (stmt
);
3575 op
= gimple_call_arg (stmt
, op_num
);
3582 op
= gimple_op (stmt
, op_num
+ 1);
3583 /* Unlike the other binary operators, shifts/rotates have
3584 the shift count being int, instead of the same type as
3585 the lhs, so make sure the scalar is the right type if
3586 we are dealing with vectors of
3587 long long/long/short/char. */
3588 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3589 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3593 op
= gimple_op (stmt
, op_num
+ 1);
3598 /* Create 'vect_ = {op0,op1,...,opn}'. */
3599 number_of_places_left_in_vector
--;
3601 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3603 if (CONSTANT_CLASS_P (op
))
3605 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3607 /* Can't use VIEW_CONVERT_EXPR for booleans because
3608 of possibly different sizes of scalar value and
3610 if (integer_zerop (op
))
3611 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3612 else if (integer_onep (op
))
3613 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3618 op
= fold_unary (VIEW_CONVERT_EXPR
,
3619 TREE_TYPE (vector_type
), op
);
3620 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3624 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3626 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3629 = build_all_ones_cst (TREE_TYPE (vector_type
));
3631 = build_zero_cst (TREE_TYPE (vector_type
));
3632 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3633 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3639 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3642 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3645 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3649 elts
[number_of_places_left_in_vector
] = op
;
3650 if (!CONSTANT_CLASS_P (op
))
3652 if (TREE_CODE (orig_op
) == SSA_NAME
3653 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3654 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3655 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3656 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3657 place_after_defs
= true;
3659 if (number_of_places_left_in_vector
== 0)
3662 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3663 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3664 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3667 if (vec_oprnds
->is_empty ())
3668 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3671 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3674 gimple_stmt_iterator gsi
;
3675 if (place_after_defs
)
3678 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3679 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3682 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3683 if (ctor_seq
!= NULL
)
3685 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3686 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3689 voprnds
.quick_push (init
);
3690 place_after_defs
= false;
3691 number_of_places_left_in_vector
= nunits
;
3693 elts
.new_vector (vector_type
, nunits
, 1);
3694 elts
.quick_grow (nunits
);
3699 /* Since the vectors are created in the reverse order, we should invert
3701 vec_num
= voprnds
.length ();
3702 for (j
= vec_num
; j
!= 0; j
--)
3704 vop
= voprnds
[j
- 1];
3705 vec_oprnds
->quick_push (vop
);
3710 /* In case that VF is greater than the unrolling factor needed for the SLP
3711 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3712 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3713 to replicate the vectors. */
3714 while (number_of_vectors
> vec_oprnds
->length ())
3716 tree neutral_vec
= NULL
;
3721 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3723 vec_oprnds
->quick_push (neutral_vec
);
3727 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3728 vec_oprnds
->quick_push (vop
);
3734 /* Get vectorized definitions from SLP_NODE that contains corresponding
3735 vectorized def-stmts. */
3738 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3741 gimple
*vec_def_stmt
;
3744 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3746 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3748 gcc_assert (vec_def_stmt
);
3749 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3750 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3752 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3753 vec_oprnds
->quick_push (vec_oprnd
);
3758 /* Get vectorized definitions for SLP_NODE.
3759 If the scalar definitions are loop invariants or constants, collect them and
3760 call vect_get_constant_vectors() to create vector stmts.
3761 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3762 must be stored in the corresponding child of SLP_NODE, and we call
3763 vect_get_slp_vect_defs () to retrieve them. */
3766 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3767 vec
<vec
<tree
> > *vec_oprnds
)
3770 int number_of_vects
= 0, i
;
3771 unsigned int child_index
= 0;
3772 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3773 slp_tree child
= NULL
;
3776 bool vectorized_defs
;
3778 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3779 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3781 /* For each operand we check if it has vectorized definitions in a child
3782 node or we need to create them (for invariants and constants). We
3783 check if the LHS of the first stmt of the next child matches OPRND.
3784 If it does, we found the correct child. Otherwise, we call
3785 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3786 to check this child node for the next operand. */
3787 vectorized_defs
= false;
3788 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3790 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3792 /* We have to check both pattern and original def, if available. */
3793 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3795 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3797 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3800 if (gimple_code (first_def
) == GIMPLE_PHI
)
3801 first_def_op
= gimple_phi_result (first_def
);
3803 first_def_op
= gimple_get_lhs (first_def
);
3804 if (operand_equal_p (oprnd
, first_def_op
, 0)
3806 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3808 /* The number of vector defs is determined by the number of
3809 vector statements in the node from which we get those
3811 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3812 vectorized_defs
= true;
3820 if (!vectorized_defs
)
3824 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3825 /* Number of vector stmts was calculated according to LHS in
3826 vect_schedule_slp_instance (), fix it by replacing LHS with
3827 RHS, if necessary. See vect_get_smallest_scalar_type () for
3829 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3831 if (rhs_size_unit
!= lhs_size_unit
)
3833 number_of_vects
*= rhs_size_unit
;
3834 number_of_vects
/= lhs_size_unit
;
3839 /* Allocate memory for vectorized defs. */
3841 vec_defs
.create (number_of_vects
);
3843 /* For reduction defs we call vect_get_constant_vectors (), since we are
3844 looking for initial loop invariant values. */
3845 if (vectorized_defs
)
3846 /* The defs are already vectorized. */
3847 vect_get_slp_vect_defs (child
, &vec_defs
);
3849 /* Build vectors from scalar defs. */
3850 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3853 vec_oprnds
->quick_push (vec_defs
);
3857 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3858 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3859 permute statements for the SLP node NODE of the SLP instance
3860 SLP_NODE_INSTANCE. */
3863 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3864 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3865 slp_instance slp_node_instance
, bool analyze_only
,
3868 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3869 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3870 tree mask_element_type
= NULL_TREE
, mask_type
;
3872 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3873 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3874 unsigned int mask_element
;
3876 unsigned HOST_WIDE_INT nunits
, const_vf
;
3878 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3881 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3883 mode
= TYPE_MODE (vectype
);
3885 /* At the moment, all permutations are represented using per-element
3886 indices, so we can't cope with variable vector lengths or
3887 vectorization factors. */
3888 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3889 || !vf
.is_constant (&const_vf
))
3892 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3893 same size as the vector element being permuted. */
3894 mask_element_type
= lang_hooks
.types
.type_for_mode
3895 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3896 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3897 vec_perm_builder
mask (nunits
, nunits
, 1);
3898 mask
.quick_grow (nunits
);
3899 vec_perm_indices indices
;
3901 /* Initialize the vect stmts of NODE to properly insert the generated
3904 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3905 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3906 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3908 /* Generate permutation masks for every NODE. Number of masks for each NODE
3909 is equal to GROUP_SIZE.
3910 E.g., we have a group of three nodes with three loads from the same
3911 location in each node, and the vector size is 4. I.e., we have a
3912 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3913 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3914 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3917 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3918 The last mask is illegal since we assume two operands for permute
3919 operation, and the mask element values can't be outside that range.
3920 Hence, the last mask must be converted into {2,5,5,5}.
3921 For the first two permutations we need the first and the second input
3922 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3923 we need the second and the third vectors: {b1,c1,a2,b2} and
3926 int vect_stmts_counter
= 0;
3927 unsigned int index
= 0;
3928 int first_vec_index
= -1;
3929 int second_vec_index
= -1;
3933 for (unsigned int j
= 0; j
< const_vf
; j
++)
3935 for (int k
= 0; k
< group_size
; k
++)
3937 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3938 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3939 vec_index
= i
/ nunits
;
3940 mask_element
= i
% nunits
;
3941 if (vec_index
== first_vec_index
3942 || first_vec_index
== -1)
3944 first_vec_index
= vec_index
;
3946 else if (vec_index
== second_vec_index
3947 || second_vec_index
== -1)
3949 second_vec_index
= vec_index
;
3950 mask_element
+= nunits
;
3954 if (dump_enabled_p ())
3956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3957 "permutation requires at "
3958 "least three vectors ");
3959 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3962 gcc_assert (analyze_only
);
3966 gcc_assert (mask_element
< 2 * nunits
);
3967 if (mask_element
!= index
)
3969 mask
[index
++] = mask_element
;
3971 if (index
== nunits
&& !noop_p
)
3973 indices
.new_vector (mask
, 2, nunits
);
3974 if (!can_vec_perm_const_p (mode
, indices
))
3976 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3980 "unsupported vect permute { ");
3981 for (i
= 0; i
< nunits
; ++i
)
3983 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3984 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3986 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3988 gcc_assert (analyze_only
);
3995 if (index
== nunits
)
3999 tree mask_vec
= NULL_TREE
;
4002 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
4004 if (second_vec_index
== -1)
4005 second_vec_index
= first_vec_index
;
4007 /* Generate the permute statement if necessary. */
4008 tree first_vec
= dr_chain
[first_vec_index
];
4009 tree second_vec
= dr_chain
[second_vec_index
];
4014 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4016 perm_dest
= make_ssa_name (perm_dest
);
4017 perm_stmt
= gimple_build_assign (perm_dest
,
4019 first_vec
, second_vec
,
4021 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4024 /* If mask was NULL_TREE generate the requested
4025 identity transform. */
4026 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
4028 /* Store the vector statement in NODE. */
4029 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
4033 first_vec_index
= -1;
4034 second_vec_index
= -1;
4043 typedef hash_map
<vec
<gimple
*>, slp_tree
,
4044 simple_hashmap_traits
<bst_traits
, slp_tree
> >
4045 scalar_stmts_to_slp_tree_map_t
;
4047 /* Vectorize SLP instance tree in postorder. */
4050 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
4051 scalar_stmts_to_slp_tree_map_t
*bst_map
)
4054 bool grouped_store
, is_store
;
4055 gimple_stmt_iterator si
;
4056 stmt_vec_info stmt_info
;
4057 unsigned int group_size
;
4062 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4065 /* See if we have already vectorized the same set of stmts and reuse their
4066 vectorized stmts. */
4068 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
4071 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
4076 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4077 vect_schedule_slp_instance (child
, instance
, bst_map
);
4079 /* Push SLP node def-type to stmts. */
4080 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4081 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4082 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4083 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
4085 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
4086 stmt_info
= vinfo_for_stmt (stmt
);
4088 /* VECTYPE is the type of the destination. */
4089 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4090 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4091 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
4093 if (!SLP_TREE_VEC_STMTS (node
).exists ())
4094 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4096 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_NOTE
,vect_location
,
4099 "------>vectorizing SLP node starting from: ");
4100 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4103 /* Vectorized stmts go before the last scalar stmt which is where
4104 all uses are ready. */
4105 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
4107 /* Mark the first element of the reduction chain as reduction to properly
4108 transform the node. In the analysis phase only the last element of the
4109 chain is marked as reduction. */
4110 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
4111 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
4113 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
4114 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
4117 /* Handle two-operation SLP nodes by vectorizing the group with
4118 both operations and then performing a merge. */
4119 if (SLP_TREE_TWO_OPERATORS (node
))
4121 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4122 enum tree_code ocode
= ERROR_MARK
;
4124 vec_perm_builder
mask (group_size
, group_size
, 1);
4125 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
4126 if (gimple_assign_rhs_code (ostmt
) != code0
)
4128 mask
.quick_push (1);
4129 ocode
= gimple_assign_rhs_code (ostmt
);
4132 mask
.quick_push (0);
4133 if (ocode
!= ERROR_MARK
)
4138 tree tmask
= NULL_TREE
;
4139 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4140 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4141 SLP_TREE_VEC_STMTS (node
).truncate (0);
4142 gimple_assign_set_rhs_code (stmt
, ocode
);
4143 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4144 gimple_assign_set_rhs_code (stmt
, code0
);
4145 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4146 SLP_TREE_VEC_STMTS (node
).truncate (0);
4147 tree meltype
= build_nonstandard_integer_type
4148 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4149 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4151 for (j
= 0; j
< v0
.length (); ++j
)
4153 /* Enforced by vect_build_slp_tree, which rejects variable-length
4154 vectors for SLP_TREE_TWO_OPERATORS. */
4155 unsigned int const_nunits
= nunits
.to_constant ();
4156 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4157 for (l
= 0; l
< const_nunits
; ++l
)
4159 if (k
>= group_size
)
4161 tree t
= build_int_cst (meltype
,
4162 mask
[k
++] * const_nunits
+ l
);
4163 melts
.quick_push (t
);
4165 tmask
= melts
.build ();
4167 /* ??? Not all targets support a VEC_PERM_EXPR with a
4168 constant mask that would translate to a vec_merge RTX
4169 (with their vec_perm_const_ok). We can either not
4170 vectorize in that case or let veclower do its job.
4171 Unfortunately that isn't too great and at least for
4172 plus/minus we'd eventually like to match targets
4173 vector addsub instructions. */
4175 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4177 gimple_assign_lhs (v0
[j
]),
4178 gimple_assign_lhs (v1
[j
]), tmask
);
4179 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
4180 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
4187 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4189 /* Restore stmt def-types. */
4190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4191 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4192 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4193 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
4198 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4199 For loop vectorization this is done in vectorizable_call, but for SLP
4200 it needs to be deferred until end of vect_schedule_slp, because multiple
4201 SLP instances may refer to the same scalar stmt. */
4204 vect_remove_slp_scalar_calls (slp_tree node
)
4206 gimple
*stmt
, *new_stmt
;
4207 gimple_stmt_iterator gsi
;
4211 stmt_vec_info stmt_info
;
4213 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4216 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4217 vect_remove_slp_scalar_calls (child
);
4219 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
4221 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
4223 stmt_info
= vinfo_for_stmt (stmt
);
4224 if (stmt_info
== NULL
4225 || is_pattern_stmt_p (stmt_info
)
4226 || !PURE_SLP_STMT (stmt_info
))
4228 lhs
= gimple_call_lhs (stmt
);
4229 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4230 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4231 set_vinfo_for_stmt (stmt
, NULL
);
4232 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4233 gsi
= gsi_for_stmt (stmt
);
4234 gsi_replace (&gsi
, new_stmt
, false);
4235 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4239 /* Generate vector code for all SLP instances in the loop/basic block. */
4242 vect_schedule_slp (vec_info
*vinfo
)
4244 vec
<slp_instance
> slp_instances
;
4245 slp_instance instance
;
4247 bool is_store
= false;
4250 scalar_stmts_to_slp_tree_map_t
*bst_map
4251 = new scalar_stmts_to_slp_tree_map_t ();
4252 slp_instances
= vinfo
->slp_instances
;
4253 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4255 /* Schedule the tree of INSTANCE. */
4256 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4258 if (dump_enabled_p ())
4259 dump_printf_loc (MSG_NOTE
, vect_location
,
4260 "vectorizing stmts using SLP.\n");
4264 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4266 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4269 gimple_stmt_iterator gsi
;
4271 /* Remove scalar call stmts. Do not do this for basic-block
4272 vectorization as not all uses may be vectorized.
4273 ??? Why should this be necessary? DCE should be able to
4274 remove the stmts itself.
4275 ??? For BB vectorization we can as well remove scalar
4276 stmts starting from the SLP tree root if they have no
4278 if (is_a
<loop_vec_info
> (vinfo
))
4279 vect_remove_slp_scalar_calls (root
);
4281 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4282 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4284 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4287 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4288 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4289 /* Free the attached stmt_vec_info and remove the stmt. */
4290 gsi
= gsi_for_stmt (store
);
4291 unlink_stmt_vdef (store
);
4292 gsi_remove (&gsi
, true);
4293 release_defs (store
);
4294 free_stmt_vec_info (store
);