1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
53 vect_free_slp_tree (slp_tree node
)
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
59 vect_free_slp_tree (child
);
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt
))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
70 SLP_TREE_CHILDREN (node
).release ();
71 SLP_TREE_SCALAR_STMTS (node
).release ();
72 SLP_TREE_VEC_STMTS (node
).release ();
73 SLP_TREE_LOAD_PERMUTATION (node
).release ();
79 /* Free the memory allocated for the SLP instance. */
82 vect_free_slp_instance (slp_instance instance
)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
85 SLP_INSTANCE_LOADS (instance
).release ();
90 /* Create an SLP node for SCALAR_STMTS. */
93 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
96 gimple
*stmt
= scalar_stmts
[0];
99 if (is_gimple_call (stmt
))
100 nops
= gimple_call_num_args (stmt
);
101 else if (is_gimple_assign (stmt
))
103 nops
= gimple_num_ops (stmt
) - 1;
104 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
107 else if (gimple_code (stmt
) == GIMPLE_PHI
)
112 node
= XNEW (struct _slp_tree
);
113 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
114 SLP_TREE_VEC_STMTS (node
).create (0);
115 SLP_TREE_CHILDREN (node
).create (nops
);
116 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
117 SLP_TREE_TWO_OPERATORS (node
) = false;
118 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
121 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
122 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
128 /* This structure is used in creation of an SLP tree. Each instance
129 corresponds to the same operand in a group of scalar stmts in an SLP
131 typedef struct _slp_oprnd_info
133 /* Def-stmts for the operands. */
134 vec
<gimple
*> def_stmts
;
135 /* Information about the first statement, its vector def-type, type, the
136 operand itself in case it's constant, and an indication if it's a pattern
139 enum vect_def_type first_dt
;
145 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 static vec
<slp_oprnd_info
>
148 vect_create_oprnd_info (int nops
, int group_size
)
151 slp_oprnd_info oprnd_info
;
152 vec
<slp_oprnd_info
> oprnds_info
;
154 oprnds_info
.create (nops
);
155 for (i
= 0; i
< nops
; i
++)
157 oprnd_info
= XNEW (struct _slp_oprnd_info
);
158 oprnd_info
->def_stmts
.create (group_size
);
159 oprnd_info
->first_dt
= vect_uninitialized_def
;
160 oprnd_info
->first_op_type
= NULL_TREE
;
161 oprnd_info
->first_pattern
= false;
162 oprnd_info
->second_pattern
= false;
163 oprnds_info
.quick_push (oprnd_info
);
170 /* Free operands info. */
173 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
176 slp_oprnd_info oprnd_info
;
178 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
180 oprnd_info
->def_stmts
.release ();
181 XDELETE (oprnd_info
);
184 oprnds_info
.release ();
188 /* Find the place of the data-ref in STMT in the interleaving chain that starts
189 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
192 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
194 gimple
*next_stmt
= first_stmt
;
197 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
202 if (next_stmt
== stmt
)
204 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
206 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
213 /* Check whether it is possible to load COUNT elements of type ELT_MODE
214 using the method implemented by duplicate_and_interleave. Return true
215 if so, returning the number of intermediate vectors in *NVECTORS_OUT
216 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
220 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
221 unsigned int *nvectors_out
,
222 tree
*vector_type_out
,
225 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
227 unsigned int nvectors
= 1;
230 scalar_int_mode int_mode
;
231 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
232 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
233 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
235 tree int_type
= build_nonstandard_integer_type
236 (GET_MODE_BITSIZE (int_mode
), 1);
237 tree vector_type
= build_vector_type (int_type
, nelts
);
238 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
240 vec_perm_builder
sel1 (nelts
, 2, 3);
241 vec_perm_builder
sel2 (nelts
, 2, 3);
242 poly_int64 half_nelts
= exact_div (nelts
, 2);
243 for (unsigned int i
= 0; i
< 3; ++i
)
246 sel1
.quick_push (i
+ nelts
);
247 sel2
.quick_push (half_nelts
+ i
);
248 sel2
.quick_push (half_nelts
+ i
+ nelts
);
250 vec_perm_indices
indices1 (sel1
, 2, nelts
);
251 vec_perm_indices
indices2 (sel2
, 2, nelts
);
252 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
253 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
256 *nvectors_out
= nvectors
;
258 *vector_type_out
= vector_type
;
261 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
263 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
270 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
276 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
277 they are of a valid type and that they match the defs of the first stmt of
278 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
279 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
280 indicates swap is required for cond_expr stmts. Specifically, *SWAP
281 is 1 if STMT is cond and operands of comparison need to be swapped;
282 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
283 If there is any operand swap in this function, *SWAP is set to non-zero
285 If there was a fatal error return -1; if the error could be corrected by
286 swapping operands of father node of this one, return 1; if everything is
289 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
290 vec
<gimple
*> stmts
, unsigned stmt_num
,
291 vec
<slp_oprnd_info
> *oprnds_info
)
293 gimple
*stmt
= stmts
[stmt_num
];
295 unsigned int i
, number_of_oprnds
;
297 enum vect_def_type dt
= vect_uninitialized_def
;
298 bool pattern
= false;
299 slp_oprnd_info oprnd_info
;
300 int first_op_idx
= 1;
301 bool commutative
= false;
302 bool first_op_cond
= false;
303 bool first
= stmt_num
== 0;
304 bool second
= stmt_num
== 1;
306 if (is_gimple_call (stmt
))
308 number_of_oprnds
= gimple_call_num_args (stmt
);
311 else if (is_gimple_assign (stmt
))
313 enum tree_code code
= gimple_assign_rhs_code (stmt
);
314 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
315 /* Swap can only be done for cond_expr if asked to, otherwise we
316 could result in different comparison code to the first stmt. */
317 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
318 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
320 first_op_cond
= true;
324 commutative
= commutative_tree_code (code
);
329 bool swapped
= (*swap
!= 0);
330 gcc_assert (!swapped
|| first_op_cond
);
331 for (i
= 0; i
< number_of_oprnds
; i
++)
336 /* Map indicating how operands of cond_expr should be swapped. */
337 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
338 int *map
= maps
[*swap
];
341 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
343 oprnd
= gimple_op (stmt
, map
[i
]);
346 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
348 oprnd_info
= (*oprnds_info
)[i
];
350 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
352 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "Build SLP failed: can't analyze def for ");
356 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
357 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
363 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
364 from the pattern. Check that all the stmts of the node are in the
366 if (def_stmt
&& gimple_bb (def_stmt
)
367 && vect_stmt_in_region_p (vinfo
, def_stmt
)
368 && vinfo_for_stmt (def_stmt
)
369 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
370 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
371 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
374 if (!first
&& !oprnd_info
->first_pattern
375 /* Allow different pattern state for the defs of the
376 first stmt in reduction chains. */
377 && (oprnd_info
->first_dt
!= vect_reduction_def
378 || (!second
&& !oprnd_info
->second_pattern
)))
388 if (dump_enabled_p ())
390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
391 "Build SLP failed: some of the stmts"
392 " are in a pattern, and others are not ");
393 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
394 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
400 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
401 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
403 if (dt
== vect_unknown_def_type
)
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
407 "Unsupported pattern.\n");
411 switch (gimple_code (def_stmt
))
418 if (dump_enabled_p ())
419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
420 "unsupported defining stmt:\n");
426 oprnd_info
->second_pattern
= pattern
;
430 oprnd_info
->first_dt
= dt
;
431 oprnd_info
->first_pattern
= pattern
;
432 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
436 /* Not first stmt of the group, check that the def-stmt/s match
437 the def-stmt/s of the first stmt. Allow different definition
438 types for reduction chains: the first stmt must be a
439 vect_reduction_def (a phi node), and the rest
440 vect_internal_def. */
441 tree type
= TREE_TYPE (oprnd
);
442 if ((oprnd_info
->first_dt
!= dt
443 && !(oprnd_info
->first_dt
== vect_reduction_def
444 && dt
== vect_internal_def
)
445 && !((oprnd_info
->first_dt
== vect_external_def
446 || oprnd_info
->first_dt
== vect_constant_def
)
447 && (dt
== vect_external_def
448 || dt
== vect_constant_def
)))
449 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
451 /* Try swapping operands if we got a mismatch. */
460 if (dump_enabled_p ())
461 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
462 "Build SLP failed: different types\n");
466 if ((dt
== vect_constant_def
467 || dt
== vect_external_def
)
468 && !current_vector_size
.is_constant ()
469 && (TREE_CODE (type
) == BOOLEAN_TYPE
470 || !can_duplicate_and_interleave_p (stmts
.length (),
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
476 "Build SLP failed: invalid type of def "
477 "for variable-length SLP ");
478 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
479 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
485 /* Check the types of the definitions. */
488 case vect_constant_def
:
489 case vect_external_def
:
492 case vect_reduction_def
:
493 case vect_induction_def
:
494 case vect_internal_def
:
495 oprnd_info
->def_stmts
.quick_push (def_stmt
);
499 /* FORNOW: Not supported. */
500 if (dump_enabled_p ())
502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
503 "Build SLP failed: illegal type of def ");
504 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
505 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
515 /* If there are already uses of this stmt in a SLP instance then
516 we've committed to the operand order and can't swap it. */
517 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
519 if (dump_enabled_p ())
521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
522 "Build SLP failed: cannot swap operands of "
524 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
531 tree cond
= gimple_assign_rhs1 (stmt
);
532 enum tree_code code
= TREE_CODE (cond
);
537 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
538 &TREE_OPERAND (cond
, 1));
539 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
544 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
545 gimple_assign_rhs3_ptr (stmt
));
546 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
547 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
548 gcc_assert (code
!= ERROR_MARK
);
549 TREE_SET_CODE (cond
, code
);
553 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
554 gimple_assign_rhs2_ptr (stmt
));
555 if (dump_enabled_p ())
557 dump_printf_loc (MSG_NOTE
, vect_location
,
558 "swapped operands to match def types in ");
559 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
567 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
568 caller's attempt to find the vector type in STMT with the narrowest
569 element type. Return true if VECTYPE is nonnull and if it is valid
570 for VINFO. When returning true, update MAX_NUNITS to reflect the
571 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
572 as for vect_build_slp_tree. */
575 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
576 tree vectype
, poly_uint64
*max_nunits
)
580 if (dump_enabled_p ())
582 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
583 "Build SLP failed: unsupported data-type in ");
584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
585 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
587 /* Fatal mismatch. */
591 /* If populating the vector type requires unrolling then fail
592 before adjusting *max_nunits for basic-block vectorization. */
593 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
594 unsigned HOST_WIDE_INT const_nunits
;
595 if (is_a
<bb_vec_info
> (vinfo
)
596 && (!nunits
.is_constant (&const_nunits
)
597 || const_nunits
> group_size
))
599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
600 "Build SLP failed: unrolling required "
601 "in basic block SLP\n");
602 /* Fatal mismatch. */
606 /* In case of multiple types we need to detect the smallest type. */
607 vect_update_max_nunits (max_nunits
, vectype
);
611 /* Verify if the scalar stmts STMTS are isomorphic, require data
612 permutation or are of unsupported types of operation. Return
613 true if they are, otherwise return false and indicate in *MATCHES
614 which stmts are not isomorphic to the first one. If MATCHES[0]
615 is false then this indicates the comparison could not be
616 carried out or the stmts will never be vectorized by SLP.
618 Note COND_EXPR is possibly ismorphic to another one after swapping its
619 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
620 the first stmt by swapping the two operands of comparison; set SWAP[i]
621 to 2 if stmt I is isormorphic to the first stmt by inverting the code
622 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
623 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
626 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
627 vec
<gimple
*> stmts
, unsigned int group_size
,
628 unsigned nops
, poly_uint64
*max_nunits
,
629 bool *matches
, bool *two_operators
)
632 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
633 enum tree_code first_stmt_code
= ERROR_MARK
;
634 enum tree_code alt_stmt_code
= ERROR_MARK
;
635 enum tree_code rhs_code
= ERROR_MARK
;
636 enum tree_code first_cond_code
= ERROR_MARK
;
638 bool need_same_oprnds
= false;
639 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
642 machine_mode optab_op2_mode
;
643 machine_mode vec_mode
;
645 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
647 /* For every stmt in NODE find its def stmt/s. */
648 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
653 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
656 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
659 /* Fail to vectorize statements marked as unvectorizable. */
660 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
665 "Build SLP failed: unvectorizable statement ");
666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
668 /* Fatal mismatch. */
673 lhs
= gimple_get_lhs (stmt
);
674 if (lhs
== NULL_TREE
)
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
679 "Build SLP failed: not GIMPLE_ASSIGN nor "
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
683 /* Fatal mismatch. */
688 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
689 vectype
= get_vectype_for_scalar_type (scalar_type
);
690 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
693 /* Fatal mismatch. */
698 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
700 rhs_code
= CALL_EXPR
;
701 if (gimple_call_internal_p (call_stmt
)
702 || gimple_call_tail_p (call_stmt
)
703 || gimple_call_noreturn_p (call_stmt
)
704 || !gimple_call_nothrow_p (call_stmt
)
705 || gimple_call_chain (call_stmt
))
707 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
710 "Build SLP failed: unsupported call type ");
711 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
714 /* Fatal mismatch. */
720 rhs_code
= gimple_assign_rhs_code (stmt
);
722 /* Check the operation. */
725 first_stmt_code
= rhs_code
;
727 /* Shift arguments should be equal in all the packed stmts for a
728 vector shift with scalar shift operand. */
729 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
730 || rhs_code
== LROTATE_EXPR
731 || rhs_code
== RROTATE_EXPR
)
733 vec_mode
= TYPE_MODE (vectype
);
735 /* First see if we have a vector/vector shift. */
736 optab
= optab_for_tree_code (rhs_code
, vectype
,
740 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
742 /* No vector/vector shift, try for a vector/scalar shift. */
743 optab
= optab_for_tree_code (rhs_code
, vectype
,
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
750 "Build SLP failed: no optab.\n");
751 /* Fatal mismatch. */
755 icode
= (int) optab_handler (optab
, vec_mode
);
756 if (icode
== CODE_FOR_nothing
)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
761 "op not supported by target.\n");
762 /* Fatal mismatch. */
766 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
767 if (!VECTOR_MODE_P (optab_op2_mode
))
769 need_same_oprnds
= true;
770 first_op1
= gimple_assign_rhs2 (stmt
);
774 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
776 need_same_oprnds
= true;
777 first_op1
= gimple_assign_rhs2 (stmt
);
782 if (first_stmt_code
!= rhs_code
783 && alt_stmt_code
== ERROR_MARK
)
784 alt_stmt_code
= rhs_code
;
785 if (first_stmt_code
!= rhs_code
786 && (first_stmt_code
!= IMAGPART_EXPR
787 || rhs_code
!= REALPART_EXPR
)
788 && (first_stmt_code
!= REALPART_EXPR
789 || rhs_code
!= IMAGPART_EXPR
)
790 /* Handle mismatches in plus/minus by computing both
791 and merging the results. */
792 && !((first_stmt_code
== PLUS_EXPR
793 || first_stmt_code
== MINUS_EXPR
)
794 && (alt_stmt_code
== PLUS_EXPR
795 || alt_stmt_code
== MINUS_EXPR
)
796 && rhs_code
== alt_stmt_code
)
797 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
798 && (first_stmt_code
== ARRAY_REF
799 || first_stmt_code
== BIT_FIELD_REF
800 || first_stmt_code
== INDIRECT_REF
801 || first_stmt_code
== COMPONENT_REF
802 || first_stmt_code
== MEM_REF
)))
804 if (dump_enabled_p ())
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
807 "Build SLP failed: different operation "
809 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
812 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
820 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
822 if (dump_enabled_p ())
824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
825 "Build SLP failed: different shift "
827 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
833 if (rhs_code
== CALL_EXPR
)
835 gimple
*first_stmt
= stmts
[0];
836 if (gimple_call_num_args (stmt
) != nops
837 || !operand_equal_p (gimple_call_fn (first_stmt
),
838 gimple_call_fn (stmt
), 0)
839 || gimple_call_fntype (first_stmt
)
840 != gimple_call_fntype (stmt
))
842 if (dump_enabled_p ())
844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
845 "Build SLP failed: different calls in ");
846 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
855 /* Grouped store or load. */
856 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
858 if (REFERENCE_CLASS_P (lhs
))
866 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
869 /* Check that there are no loads from different interleaving
870 chains in the same node. */
871 if (prev_first_load
!= first_load
)
873 if (dump_enabled_p ())
875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
877 "Build SLP failed: different "
878 "interleaving chains in one node ");
879 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
887 prev_first_load
= first_load
;
889 } /* Grouped access. */
892 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
894 /* Not grouped load. */
895 if (dump_enabled_p ())
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
898 "Build SLP failed: not grouped load ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
902 /* FORNOW: Not grouped loads are not supported. */
903 /* Fatal mismatch. */
908 /* Not memory operation. */
909 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
910 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
911 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
912 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
913 && rhs_code
!= CALL_EXPR
)
915 if (dump_enabled_p ())
917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
918 "Build SLP failed: operation");
919 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
920 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
922 /* Fatal mismatch. */
927 if (rhs_code
== COND_EXPR
)
929 tree cond_expr
= gimple_assign_rhs1 (stmt
);
930 enum tree_code cond_code
= TREE_CODE (cond_expr
);
931 enum tree_code swap_code
= ERROR_MARK
;
932 enum tree_code invert_code
= ERROR_MARK
;
935 first_cond_code
= TREE_CODE (cond_expr
);
936 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
938 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
939 swap_code
= swap_tree_comparison (cond_code
);
940 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
943 if (first_cond_code
== cond_code
)
945 /* Isomorphic can be achieved by swapping. */
946 else if (first_cond_code
== swap_code
)
948 /* Isomorphic can be achieved by inverting. */
949 else if (first_cond_code
== invert_code
)
953 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
956 "Build SLP failed: different"
958 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
970 for (i
= 0; i
< group_size
; ++i
)
974 /* If we allowed a two-operation SLP node verify the target can cope
975 with the permute we are going to use. */
976 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
977 if (alt_stmt_code
!= ERROR_MARK
978 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
980 unsigned HOST_WIDE_INT count
;
981 if (!nunits
.is_constant (&count
))
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
985 "Build SLP failed: different operations "
986 "not allowed with variable-length SLP.\n");
989 vec_perm_builder
sel (count
, count
, 1);
990 for (i
= 0; i
< count
; ++i
)
992 unsigned int elt
= i
;
993 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
995 sel
.quick_push (elt
);
997 vec_perm_indices
indices (sel
, 2, count
);
998 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1000 for (i
= 0; i
< group_size
; ++i
)
1001 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
1004 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1007 "Build SLP failed: different operation "
1009 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1013 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1019 *two_operators
= true;
1025 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1026 Note we never remove apart from at destruction time so we do not
1027 need a special value for deleted that differs from empty. */
1030 typedef vec
<gimple
*> value_type
;
1031 typedef vec
<gimple
*> compare_type
;
1032 static inline hashval_t
hash (value_type
);
1033 static inline bool equal (value_type existing
, value_type candidate
);
1034 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1035 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1036 static inline void mark_empty (value_type
&x
) { x
.release (); }
1037 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1038 static inline void remove (value_type
&x
) { x
.release (); }
1041 bst_traits::hash (value_type x
)
1044 for (unsigned i
= 0; i
< x
.length (); ++i
)
1045 h
.add_int (gimple_uid (x
[i
]));
1049 bst_traits::equal (value_type existing
, value_type candidate
)
1051 if (existing
.length () != candidate
.length ())
1053 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1054 if (existing
[i
] != candidate
[i
])
1059 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
1060 static scalar_stmts_set_t
*bst_fail
;
1063 vect_build_slp_tree_2 (vec_info
*vinfo
,
1064 vec
<gimple
*> stmts
, unsigned int group_size
,
1065 poly_uint64
*max_nunits
,
1066 vec
<slp_tree
> *loads
,
1067 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1068 unsigned max_tree_size
);
1071 vect_build_slp_tree (vec_info
*vinfo
,
1072 vec
<gimple
*> stmts
, unsigned int group_size
,
1073 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1074 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1075 unsigned max_tree_size
)
1077 if (bst_fail
->contains (stmts
))
1079 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1080 loads
, matches
, npermutes
, tree_size
,
1082 /* When SLP build fails for stmts record this, otherwise SLP build
1083 can be exponential in time when we allow to construct parts from
1084 scalars, see PR81723. */
1088 x
.create (stmts
.length ());
1095 /* Recursively build an SLP tree starting from NODE.
1096 Fail (and return a value not equal to zero) if def-stmts are not
1097 isomorphic, require data permutation or are of unsupported types of
1098 operation. Otherwise, return 0.
1099 The value returned is the depth in the SLP tree where a mismatch
1103 vect_build_slp_tree_2 (vec_info
*vinfo
,
1104 vec
<gimple
*> stmts
, unsigned int group_size
,
1105 poly_uint64
*max_nunits
,
1106 vec
<slp_tree
> *loads
,
1107 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1108 unsigned max_tree_size
)
1110 unsigned nops
, i
, this_tree_size
= 0;
1111 poly_uint64 this_max_nunits
= *max_nunits
;
1118 if (is_gimple_call (stmt
))
1119 nops
= gimple_call_num_args (stmt
);
1120 else if (is_gimple_assign (stmt
))
1122 nops
= gimple_num_ops (stmt
) - 1;
1123 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1126 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1131 /* If the SLP node is a PHI (induction or reduction), terminate
1133 if (gimple_code (stmt
) == GIMPLE_PHI
)
1135 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1136 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1137 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1141 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1142 /* Induction from different IVs is not supported. */
1143 if (def_type
== vect_induction_def
)
1145 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1146 if (stmt
!= stmts
[0])
1151 /* Else def types have to match. */
1152 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1154 /* But for reduction chains only check on the first stmt. */
1155 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1156 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1158 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1162 node
= vect_create_new_slp_node (stmts
);
1167 bool two_operators
= false;
1168 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1169 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1170 stmts
, group_size
, nops
,
1171 &this_max_nunits
, matches
, &two_operators
))
1174 /* If the SLP node is a load, terminate the recursion. */
1175 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1176 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1178 *max_nunits
= this_max_nunits
;
1179 node
= vect_create_new_slp_node (stmts
);
1180 loads
->safe_push (node
);
1184 /* Get at the operands, verifying they are compatible. */
1185 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1186 slp_oprnd_info oprnd_info
;
1187 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1189 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1190 stmts
, i
, &oprnds_info
);
1192 matches
[(res
== -1) ? 0 : i
] = false;
1196 for (i
= 0; i
< group_size
; ++i
)
1199 vect_free_oprnd_info (oprnds_info
);
1203 auto_vec
<slp_tree
, 4> children
;
1204 auto_vec
<slp_tree
> this_loads
;
1209 max_tree_size
-= *tree_size
;
1211 /* Create SLP_TREE nodes for the definition node/s. */
1212 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1215 unsigned old_nloads
= this_loads
.length ();
1216 unsigned old_tree_size
= this_tree_size
;
1219 if (oprnd_info
->first_dt
!= vect_internal_def
1220 && oprnd_info
->first_dt
!= vect_reduction_def
1221 && oprnd_info
->first_dt
!= vect_induction_def
)
1224 if (++this_tree_size
> max_tree_size
)
1226 FOR_EACH_VEC_ELT (children
, j
, child
)
1227 vect_free_slp_tree (child
);
1228 vect_free_oprnd_info (oprnds_info
);
1232 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1233 group_size
, &this_max_nunits
,
1234 &this_loads
, matches
, npermutes
,
1236 max_tree_size
)) != NULL
)
1238 /* If we have all children of child built up from scalars then just
1239 throw that away and build it up this node from scalars. */
1240 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1241 /* ??? Rejecting patterns this way doesn't work. We'd have to
1242 do extra work to cancel the pattern so the uses see the
1244 && !is_pattern_stmt_p
1245 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1247 slp_tree grandchild
;
1249 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1250 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1255 this_loads
.truncate (old_nloads
);
1256 this_tree_size
= old_tree_size
;
1257 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1258 vect_free_slp_tree (grandchild
);
1259 SLP_TREE_CHILDREN (child
).truncate (0);
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "Building parent vector operands from "
1263 "scalars instead\n");
1264 oprnd_info
->def_stmts
= vNULL
;
1265 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1266 children
.safe_push (child
);
1271 oprnd_info
->def_stmts
= vNULL
;
1272 children
.safe_push (child
);
1276 /* If the SLP build failed fatally and we analyze a basic-block
1277 simply treat nodes we fail to build as externally defined
1278 (and thus build vectors from the scalar defs).
1279 The cost model will reject outright expensive cases.
1280 ??? This doesn't treat cases where permutation ultimatively
1281 fails (or we don't try permutation below). Ideally we'd
1282 even compute a permutation that will end up with the maximum
1284 if (is_a
<bb_vec_info
> (vinfo
)
1286 /* ??? Rejecting patterns this way doesn't work. We'd have to
1287 do extra work to cancel the pattern so the uses see the
1289 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1291 dump_printf_loc (MSG_NOTE
, vect_location
,
1292 "Building vector operands from scalars\n");
1293 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1294 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1295 children
.safe_push (child
);
1296 oprnd_info
->def_stmts
= vNULL
;
1300 /* If the SLP build for operand zero failed and operand zero
1301 and one can be commutated try that for the scalar stmts
1302 that failed the match. */
1304 /* A first scalar stmt mismatch signals a fatal mismatch. */
1306 /* ??? For COND_EXPRs we can swap the comparison operands
1307 as well as the arms under some constraints. */
1309 && oprnds_info
[1]->first_dt
== vect_internal_def
1310 && is_gimple_assign (stmt
)
1311 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1313 /* Do so only if the number of not successful permutes was nor more
1314 than a cut-ff as re-trying the recursive match on
1315 possibly each level of the tree would expose exponential
1319 /* Verify if we can safely swap or if we committed to a specific
1320 operand order already. */
1321 for (j
= 0; j
< group_size
; ++j
)
1324 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1326 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1329 "Build SLP failed: cannot swap operands "
1331 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1337 /* Swap mismatched definition stmts. */
1338 dump_printf_loc (MSG_NOTE
, vect_location
,
1339 "Re-trying with swapped operands of stmts ");
1340 for (j
= 0; j
< group_size
; ++j
)
1343 std::swap (oprnds_info
[0]->def_stmts
[j
],
1344 oprnds_info
[1]->def_stmts
[j
]);
1345 dump_printf (MSG_NOTE
, "%d ", j
);
1347 dump_printf (MSG_NOTE
, "\n");
1348 /* And try again with scratch 'matches' ... */
1349 bool *tem
= XALLOCAVEC (bool, group_size
);
1350 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1351 group_size
, &this_max_nunits
,
1352 &this_loads
, tem
, npermutes
,
1354 max_tree_size
)) != NULL
)
1356 /* ... so if successful we can apply the operand swapping
1357 to the GIMPLE IL. This is necessary because for example
1358 vect_get_slp_defs uses operand indexes and thus expects
1359 canonical operand order. This is also necessary even
1360 if we end up building the operand from scalars as
1361 we'll continue to process swapped operand two. */
1362 for (j
= 0; j
< group_size
; ++j
)
1364 gimple
*stmt
= stmts
[j
];
1365 gimple_set_plf (stmt
, GF_PLF_1
, false);
1367 for (j
= 0; j
< group_size
; ++j
)
1369 gimple
*stmt
= stmts
[j
];
1372 /* Avoid swapping operands twice. */
1373 if (gimple_plf (stmt
, GF_PLF_1
))
1375 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1376 gimple_assign_rhs2_ptr (stmt
));
1377 gimple_set_plf (stmt
, GF_PLF_1
, true);
1380 /* Verify we swap all duplicates or none. */
1382 for (j
= 0; j
< group_size
; ++j
)
1384 gimple
*stmt
= stmts
[j
];
1385 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1388 /* If we have all children of child built up from scalars then
1389 just throw that away and build it up this node from scalars. */
1390 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1391 /* ??? Rejecting patterns this way doesn't work. We'd have
1392 to do extra work to cancel the pattern so the uses see the
1394 && !is_pattern_stmt_p
1395 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1398 slp_tree grandchild
;
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1401 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1406 this_loads
.truncate (old_nloads
);
1407 this_tree_size
= old_tree_size
;
1408 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1409 vect_free_slp_tree (grandchild
);
1410 SLP_TREE_CHILDREN (child
).truncate (0);
1412 dump_printf_loc (MSG_NOTE
, vect_location
,
1413 "Building parent vector operands from "
1414 "scalars instead\n");
1415 oprnd_info
->def_stmts
= vNULL
;
1416 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1417 children
.safe_push (child
);
1422 oprnd_info
->def_stmts
= vNULL
;
1423 children
.safe_push (child
);
1431 gcc_assert (child
== NULL
);
1432 FOR_EACH_VEC_ELT (children
, j
, child
)
1433 vect_free_slp_tree (child
);
1434 vect_free_oprnd_info (oprnds_info
);
1438 vect_free_oprnd_info (oprnds_info
);
1441 *tree_size
+= this_tree_size
;
1442 *max_nunits
= this_max_nunits
;
1443 loads
->safe_splice (this_loads
);
1445 node
= vect_create_new_slp_node (stmts
);
1446 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1447 SLP_TREE_CHILDREN (node
).splice (children
);
1451 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1454 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1460 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1461 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1462 ? " (external)" : "");
1463 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1465 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1466 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1469 vect_print_slp_tree (dump_kind
, loc
, child
);
1473 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1474 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1475 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1476 stmts in NODE are to be marked. */
1479 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1485 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1488 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1489 if (j
< 0 || i
== j
)
1490 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1492 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1493 vect_mark_slp_stmts (child
, mark
, j
);
1497 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1500 vect_mark_slp_stmts_relevant (slp_tree node
)
1504 stmt_vec_info stmt_info
;
1507 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1510 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1512 stmt_info
= vinfo_for_stmt (stmt
);
1513 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1514 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1515 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1518 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1519 vect_mark_slp_stmts_relevant (child
);
1523 /* Rearrange the statements of NODE according to PERMUTATION. */
1526 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1527 vec
<unsigned> permutation
)
1530 vec
<gimple
*> tmp_stmts
;
1534 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1535 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1537 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1538 tmp_stmts
.create (group_size
);
1539 tmp_stmts
.quick_grow_cleared (group_size
);
1541 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1542 tmp_stmts
[permutation
[i
]] = stmt
;
1544 SLP_TREE_SCALAR_STMTS (node
).release ();
1545 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1549 /* Attempt to reorder stmts in a reduction chain so that we don't
1550 require any load permutation. Return true if that was possible,
1551 otherwise return false. */
1554 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1556 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1559 slp_tree node
, load
;
1561 /* Compare all the permutation sequences to the first one. We know
1562 that at least one load is permuted. */
1563 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1564 if (!node
->load_permutation
.exists ())
1566 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1568 if (!load
->load_permutation
.exists ())
1570 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1571 if (lidx
!= node
->load_permutation
[j
])
1575 /* Check that the loads in the first sequence are different and there
1576 are no gaps between them. */
1577 auto_sbitmap
load_index (group_size
);
1578 bitmap_clear (load_index
);
1579 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1581 if (lidx
>= group_size
)
1583 if (bitmap_bit_p (load_index
, lidx
))
1586 bitmap_set_bit (load_index
, lidx
);
1588 for (i
= 0; i
< group_size
; i
++)
1589 if (!bitmap_bit_p (load_index
, i
))
1592 /* This permutation is valid for reduction. Since the order of the
1593 statements in the nodes is not important unless they are memory
1594 accesses, we can rearrange the statements in all the nodes
1595 according to the order of the loads. */
1596 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1597 node
->load_permutation
);
1599 /* We are done, no actual permutations need to be generated. */
1600 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1601 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1603 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1604 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1605 /* But we have to keep those permutations that are required because
1606 of handling of gaps. */
1607 if (known_eq (unrolling_factor
, 1U)
1608 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1609 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1610 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1612 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1613 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1619 /* Check if the required load permutations in the SLP instance
1620 SLP_INSTN are supported. */
1623 vect_supported_load_permutation_p (slp_instance slp_instn
)
1625 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1626 unsigned int i
, j
, k
, next
;
1628 gimple
*stmt
, *load
, *next_load
;
1630 if (dump_enabled_p ())
1632 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1633 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1634 if (node
->load_permutation
.exists ())
1635 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1636 dump_printf (MSG_NOTE
, "%d ", next
);
1638 for (k
= 0; k
< group_size
; ++k
)
1639 dump_printf (MSG_NOTE
, "%d ", k
);
1640 dump_printf (MSG_NOTE
, "\n");
1643 /* In case of reduction every load permutation is allowed, since the order
1644 of the reduction statements is not important (as opposed to the case of
1645 grouped stores). The only condition we need to check is that all the
1646 load nodes are of the same size and have the same permutation (and then
1647 rearrange all the nodes of the SLP instance according to this
1650 /* Check that all the load nodes are of the same size. */
1651 /* ??? Can't we assert this? */
1652 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1653 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1656 node
= SLP_INSTANCE_TREE (slp_instn
);
1657 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1659 /* Reduction (there are no data-refs in the root).
1660 In reduction chain the order of the loads is not important. */
1661 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1662 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1663 vect_attempt_slp_rearrange_stmts (slp_instn
);
1665 /* In basic block vectorization we allow any subchain of an interleaving
1667 FORNOW: not supported in loop SLP because of realignment compications. */
1668 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1670 /* Check whether the loads in an instance form a subchain and thus
1671 no permutation is necessary. */
1672 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1674 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1676 bool subchain_p
= true;
1678 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1681 && (next_load
!= load
1682 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1687 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1690 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1693 stmt_vec_info group_info
1694 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1695 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1696 unsigned HOST_WIDE_INT nunits
;
1697 unsigned k
, maxk
= 0;
1698 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1701 /* In BB vectorization we may not actually use a loaded vector
1702 accessing elements in excess of GROUP_SIZE. */
1703 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1704 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1705 || maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1708 "BB vectorization with gaps at the end of "
1709 "a load is not supported\n");
1713 /* Verify the permutation can be generated. */
1716 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1717 1, slp_instn
, true, &n_perms
))
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1721 "unsupported load permutation\n");
1729 /* For loop vectorization verify we can generate the permutation. Be
1730 conservative about the vectorization factor, there are permutations
1731 that will use three vector inputs only starting from a specific factor
1732 and the vectorization factor is not yet final.
1733 ??? The SLP instance unrolling factor might not be the maximum one. */
1736 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1737 LOOP_VINFO_VECT_FACTOR
1738 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1739 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1740 if (node
->load_permutation
.exists ()
1741 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1742 slp_instn
, true, &n_perms
))
1749 /* Find the last store in SLP INSTANCE. */
1752 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1754 gimple
*last
= NULL
, *stmt
;
1756 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1758 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1759 if (is_pattern_stmt_p (stmt_vinfo
))
1760 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1762 last
= get_later_stmt (stmt
, last
);
1768 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1771 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1772 stmt_vector_for_cost
*prologue_cost_vec
,
1773 stmt_vector_for_cost
*body_cost_vec
,
1774 unsigned ncopies_for_cost
,
1775 scalar_stmts_set_t
* visited
)
1780 stmt_vec_info stmt_info
;
1783 /* If we already costed the exact same set of scalar stmts we're done.
1784 We share the generated vector stmts for those. */
1785 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1788 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1790 /* Recurse down the SLP tree. */
1791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1792 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1793 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1794 body_cost_vec
, ncopies_for_cost
, visited
);
1796 /* Look at the first scalar stmt to determine the cost. */
1797 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1798 stmt_info
= vinfo_for_stmt (stmt
);
1799 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1801 vect_memory_access_type memory_access_type
1802 = (STMT_VINFO_STRIDED_P (stmt_info
)
1805 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1806 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1807 memory_access_type
, VLS_STORE
,
1808 node
, prologue_cost_vec
, body_cost_vec
);
1811 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1812 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1814 /* If the load is permuted then the alignment is determined by
1815 the first group element not by the first scalar stmt DR. */
1816 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1817 stmt_info
= vinfo_for_stmt (stmt
);
1818 /* Record the cost for the permutation. */
1820 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1821 ncopies_for_cost
, instance
, true,
1823 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1824 stmt_info
, 0, vect_body
);
1825 unsigned assumed_nunits
1826 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1827 /* And adjust the number of loads performed. This handles
1828 redundancies as well as loads that are later dead. */
1829 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1830 bitmap_clear (perm
);
1831 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1832 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1833 ncopies_for_cost
= 0;
1834 bool load_seen
= false;
1835 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1837 if (i
% assumed_nunits
== 0)
1843 if (bitmap_bit_p (perm
, i
))
1848 gcc_assert (ncopies_for_cost
1849 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1850 + assumed_nunits
- 1) / assumed_nunits
);
1851 poly_uint64 uf
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1852 ncopies_for_cost
*= estimated_poly_value (uf
);
1854 /* Record the cost for the vector loads. */
1855 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1856 memory_access_type
, node
, prologue_cost_vec
,
1861 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1863 /* ncopies_for_cost is the number of IVs we generate. */
1864 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1865 stmt_info
, 0, vect_body
);
1867 /* Prologue cost for the initial values and step vector. */
1868 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1870 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1872 ? vector_load
: vec_construct
,
1873 stmt_info
, 0, vect_prologue
);
1874 record_stmt_cost (prologue_cost_vec
, 1,
1876 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1877 ? vector_load
: vec_construct
,
1878 stmt_info
, 0, vect_prologue
);
1880 /* ??? No easy way to get at the actual number of vector stmts
1881 to be geneated and thus the derived IVs. */
1885 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1886 stmt_info
, 0, vect_body
);
1887 if (SLP_TREE_TWO_OPERATORS (node
))
1889 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1890 stmt_info
, 0, vect_body
);
1891 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1892 stmt_info
, 0, vect_body
);
1896 /* Push SLP node def-type to stmts. */
1897 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1898 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1899 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1900 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1902 /* Scan operands and account for prologue cost of constants/externals.
1903 ??? This over-estimates cost for multiple uses and should be
1905 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1906 lhs
= gimple_get_lhs (stmt
);
1907 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1909 tree op
= gimple_op (stmt
, i
);
1911 enum vect_def_type dt
;
1912 if (!op
|| op
== lhs
)
1914 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1916 /* Without looking at the actual initializer a vector of
1917 constants can be implemented as load from the constant pool.
1918 ??? We need to pass down stmt_info for a vector type
1919 even if it points to the wrong stmt. */
1920 if (dt
== vect_constant_def
)
1921 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1922 stmt_info
, 0, vect_prologue
);
1923 else if (dt
== vect_external_def
)
1924 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1925 stmt_info
, 0, vect_prologue
);
1929 /* Restore stmt def-types. */
1930 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1931 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1932 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1933 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1936 /* Compute the cost for the SLP instance INSTANCE. */
1939 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1941 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1942 unsigned ncopies_for_cost
;
1943 stmt_info_for_cost
*si
;
1946 if (dump_enabled_p ())
1947 dump_printf_loc (MSG_NOTE
, vect_location
,
1948 "=== vect_analyze_slp_cost ===\n");
1950 /* Calculate the number of vector stmts to create based on the unrolling
1951 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1952 GROUP_SIZE / NUNITS otherwise. */
1953 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1954 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1955 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1956 /* Get the estimated vectorization factor, which is always one for
1957 basic-block vectorization. */
1958 unsigned int assumed_vf
;
1959 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1960 assumed_vf
= vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info
));
1963 /* For reductions look at a reduction operand in case the reduction
1964 operation is widening like DOT_PROD or SAD. */
1965 tree vectype_for_cost
= STMT_VINFO_VECTYPE (stmt_info
);
1966 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1968 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1969 switch (gimple_assign_rhs_code (stmt
))
1973 vectype_for_cost
= get_vectype_for_scalar_type
1974 (TREE_TYPE (gimple_assign_rhs1 (stmt
)));
1979 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype_for_cost
);
1980 ncopies_for_cost
= (least_common_multiple (assumed_nunits
,
1981 group_size
* assumed_vf
)
1984 prologue_cost_vec
.create (10);
1985 body_cost_vec
.create (10);
1986 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1987 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1988 &prologue_cost_vec
, &body_cost_vec
,
1989 ncopies_for_cost
, visited
);
1992 /* Record the prologue costs, which were delayed until we were
1993 sure that SLP was successful. */
1994 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1996 struct _stmt_vec_info
*stmt_info
1997 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1998 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1999 si
->misalign
, vect_prologue
);
2002 /* Record the instance's instructions in the target cost model. */
2003 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
2005 struct _stmt_vec_info
*stmt_info
2006 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
2007 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
2008 si
->misalign
, vect_body
);
2011 prologue_cost_vec
.release ();
2012 body_cost_vec
.release ();
2015 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
2016 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
2017 the first GROUP1_SIZE stmts, since stores are consecutive), the second
2018 containing the remainder.
2019 Return the first stmt in the second group. */
2022 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
2024 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
2025 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
2026 gcc_assert (group1_size
> 0);
2027 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
2028 gcc_assert (group2_size
> 0);
2029 GROUP_SIZE (first_vinfo
) = group1_size
;
2031 gimple
*stmt
= first_stmt
;
2032 for (unsigned i
= group1_size
; i
> 1; i
--)
2034 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2035 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
2037 /* STMT is now the last element of the first group. */
2038 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2039 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
2041 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
2042 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
2044 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
2045 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
2048 /* For the second group, the GROUP_GAP is that before the original group,
2049 plus skipping over the first vector. */
2050 GROUP_GAP (vinfo_for_stmt (group2
)) =
2051 GROUP_GAP (first_vinfo
) + group1_size
;
2053 /* GROUP_GAP of the first group now has to skip over the second group too. */
2054 GROUP_GAP (first_vinfo
) += group2_size
;
2056 if (dump_enabled_p ())
2057 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2058 group1_size
, group2_size
);
2063 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2064 statements and a vector of NUNITS elements. */
2067 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2069 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2072 /* Analyze an SLP instance starting from a group of grouped stores. Call
2073 vect_build_slp_tree to build a tree of packed stmts if possible.
2074 Return FALSE if it's impossible to SLP any stmt in the loop. */
2077 vect_analyze_slp_instance (vec_info
*vinfo
,
2078 gimple
*stmt
, unsigned max_tree_size
)
2080 slp_instance new_instance
;
2082 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2083 tree vectype
, scalar_type
= NULL_TREE
;
2086 vec
<slp_tree
> loads
;
2087 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2088 vec
<gimple
*> scalar_stmts
;
2090 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2094 scalar_type
= TREE_TYPE (DR_REF (dr
));
2095 vectype
= get_vectype_for_scalar_type (scalar_type
);
2099 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2100 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2103 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2107 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2108 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2109 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2114 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2117 "Build SLP failed: unsupported data-type ");
2118 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2119 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2124 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2126 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2127 scalar_stmts
.create (group_size
);
2129 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2131 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2134 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2135 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2136 scalar_stmts
.safe_push (
2137 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2139 scalar_stmts
.safe_push (next
);
2140 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2142 /* Mark the first element of the reduction chain as reduction to properly
2143 transform the node. In the reduction analysis phase only the last
2144 element of the chain is marked as reduction. */
2145 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2146 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2150 /* Collect reduction statements. */
2151 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2152 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2153 scalar_stmts
.safe_push (next
);
2156 loads
.create (group_size
);
2158 /* Build the tree for the SLP instance. */
2159 bool *matches
= XALLOCAVEC (bool, group_size
);
2160 unsigned npermutes
= 0;
2161 bst_fail
= new scalar_stmts_set_t ();
2162 poly_uint64 max_nunits
= nunits
;
2163 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2164 &max_nunits
, &loads
, matches
, &npermutes
,
2165 NULL
, max_tree_size
);
2169 /* Calculate the unrolling factor based on the smallest type. */
2170 poly_uint64 unrolling_factor
2171 = calculate_unrolling_factor (max_nunits
, group_size
);
2173 if (maybe_ne (unrolling_factor
, 1U)
2174 && is_a
<bb_vec_info
> (vinfo
))
2176 unsigned HOST_WIDE_INT const_max_nunits
;
2177 if (!max_nunits
.is_constant (&const_max_nunits
)
2178 || const_max_nunits
> group_size
)
2180 if (dump_enabled_p ())
2181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2182 "Build SLP failed: store group "
2183 "size not a multiple of the vector size "
2184 "in basic block SLP\n");
2185 vect_free_slp_tree (node
);
2189 /* Fatal mismatch. */
2190 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2191 vect_free_slp_tree (node
);
2196 /* Create a new SLP instance. */
2197 new_instance
= XNEW (struct _slp_instance
);
2198 SLP_INSTANCE_TREE (new_instance
) = node
;
2199 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2200 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2201 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2203 /* Compute the load permutation. */
2205 bool loads_permuted
= false;
2206 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2208 vec
<unsigned> load_permutation
;
2210 gimple
*load
, *first_stmt
;
2211 bool this_load_permuted
= false;
2212 load_permutation
.create (group_size
);
2213 first_stmt
= GROUP_FIRST_ELEMENT
2214 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2215 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2217 int load_place
= vect_get_place_in_interleaving_chain
2219 gcc_assert (load_place
!= -1);
2220 if (load_place
!= j
)
2221 this_load_permuted
= true;
2222 load_permutation
.safe_push (load_place
);
2224 if (!this_load_permuted
2225 /* The load requires permutation when unrolling exposes
2226 a gap either because the group is larger than the SLP
2227 group-size or because there is a gap between the groups. */
2228 && (known_eq (unrolling_factor
, 1U)
2229 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2230 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2232 load_permutation
.release ();
2235 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2236 loads_permuted
= true;
2241 if (!vect_supported_load_permutation_p (new_instance
))
2243 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2246 "Build SLP failed: unsupported load "
2248 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2251 vect_free_slp_instance (new_instance
);
2256 /* If the loads and stores can be handled with load/store-lan
2257 instructions do not generate this SLP instance. */
2258 if (is_a
<loop_vec_info
> (vinfo
)
2260 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2263 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2265 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2266 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2267 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2268 /* Use SLP for strided accesses (or if we
2269 can't load-lanes). */
2270 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2271 || ! vect_load_lanes_supported
2272 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2273 GROUP_SIZE (stmt_vinfo
), false))
2276 if (i
== loads
.length ())
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2280 "Built SLP cancelled: can use "
2281 "load/store-lanes\n");
2282 vect_free_slp_instance (new_instance
);
2287 vinfo
->slp_instances
.safe_push (new_instance
);
2289 if (dump_enabled_p ())
2291 dump_printf_loc (MSG_NOTE
, vect_location
,
2292 "Final SLP tree for instance:\n");
2293 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2301 /* Failed to SLP. */
2302 /* Free the allocated memory. */
2303 scalar_stmts
.release ();
2307 /* For basic block SLP, try to break the group up into multiples of the
2309 unsigned HOST_WIDE_INT const_nunits
;
2310 if (is_a
<bb_vec_info
> (vinfo
)
2311 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2312 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2313 && nunits
.is_constant (&const_nunits
))
2315 /* We consider breaking the group only on VF boundaries from the existing
2317 for (i
= 0; i
< group_size
; i
++)
2318 if (!matches
[i
]) break;
2320 if (i
>= const_nunits
&& i
< group_size
)
2322 /* Split into two groups at the first vector boundary before i. */
2323 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2324 unsigned group1_size
= i
& ~(const_nunits
- 1);
2326 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2327 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2328 /* If the first non-match was in the middle of a vector,
2329 skip the rest of that vector. */
2330 if (group1_size
< i
)
2332 i
= group1_size
+ const_nunits
;
2334 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2337 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2340 /* Even though the first vector did not all match, we might be able to SLP
2341 (some) of the remainder. FORNOW ignore this possibility. */
2348 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2349 trees of packed scalar stmts if SLP is possible. */
2352 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2355 gimple
*first_element
;
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2360 /* Find SLP sequences starting from groups of grouped stores. */
2361 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2362 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2364 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2366 if (loop_vinfo
->reduction_chains
.length () > 0)
2368 /* Find SLP sequences starting from reduction chains. */
2369 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2370 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2373 /* Dissolve reduction chain group. */
2374 gimple
*next
, *stmt
= first_element
;
2377 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2378 next
= GROUP_NEXT_ELEMENT (vinfo
);
2379 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2380 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2383 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2384 = vect_internal_def
;
2388 /* Find SLP sequences starting from groups of reductions. */
2389 if (loop_vinfo
->reductions
.length () > 1)
2390 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2398 /* For each possible SLP instance decide whether to SLP it and calculate overall
2399 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2400 least one instance. */
2403 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2406 poly_uint64 unrolling_factor
= 1;
2407 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2408 slp_instance instance
;
2409 int decided_to_slp
= 0;
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2415 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2417 /* FORNOW: SLP if you can. */
2418 /* All unroll factors have the form current_vector_size * X for some
2419 rational X, so they must have a common multiple. */
2421 = force_common_multiple (unrolling_factor
,
2422 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2424 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2425 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2426 loop-based vectorization. Such stmts will be marked as HYBRID. */
2427 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2431 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2433 if (decided_to_slp
&& dump_enabled_p ())
2435 dump_printf_loc (MSG_NOTE
, vect_location
,
2436 "Decided to SLP %d instances. Unrolling factor ",
2438 dump_dec (MSG_NOTE
, unrolling_factor
);
2439 dump_printf (MSG_NOTE
, "\n");
2442 return (decided_to_slp
> 0);
2446 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2447 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2450 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2452 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2453 imm_use_iterator imm_iter
;
2455 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2457 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2458 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2461 /* Propagate hybrid down the SLP tree. */
2462 if (stype
== hybrid
)
2464 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2468 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2469 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2470 /* If we get a pattern stmt here we have to use the LHS of the
2471 original stmt for immediate uses. */
2472 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2473 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2474 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2476 if (gimple_code (stmt
) == GIMPLE_PHI
)
2477 def
= gimple_phi_result (stmt
);
2479 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2481 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2483 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2485 use_vinfo
= vinfo_for_stmt (use_stmt
);
2486 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2487 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2488 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2489 if (!STMT_SLP_TYPE (use_vinfo
)
2490 && (STMT_VINFO_RELEVANT (use_vinfo
)
2491 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2492 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2493 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2495 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2498 "def in non-SLP stmt: ");
2499 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2507 && !HYBRID_SLP_STMT (stmt_vinfo
))
2509 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2512 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2514 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2517 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2518 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2519 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2522 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2525 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2527 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2528 struct loop
*loopp
= (struct loop
*)wi
->info
;
2533 if (TREE_CODE (*tp
) == SSA_NAME
2534 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2536 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2537 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2538 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2540 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2543 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2545 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2553 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2556 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2557 /* If the stmt is in a SLP instance then this isn't a reason
2558 to mark use definitions in other SLP instances as hybrid. */
2559 if (! STMT_SLP_TYPE (use_vinfo
)
2560 && (STMT_VINFO_RELEVANT (use_vinfo
)
2561 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2562 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2563 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2570 /* Find stmts that must be both vectorized and SLPed. */
2573 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2576 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2577 slp_instance instance
;
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2583 /* First walk all pattern stmt in the loop and mark defs of uses as
2584 hybrid because immediate uses in them are not recorded. */
2585 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2587 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2588 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2591 gimple
*stmt
= gsi_stmt (gsi
);
2592 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2593 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2596 memset (&wi
, 0, sizeof (wi
));
2597 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2598 gimple_stmt_iterator gsi2
2599 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2600 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2601 vect_detect_hybrid_slp_1
, &wi
);
2602 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2603 vect_detect_hybrid_slp_2
,
2604 vect_detect_hybrid_slp_1
, &wi
);
2609 /* Then walk the SLP instance trees marking stmts with uses in
2610 non-SLP stmts as hybrid, also propagating hybrid down the
2611 SLP tree, collecting the above info on-the-fly. */
2612 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2614 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2615 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2621 /* Initialize a bb_vec_info struct for the statements between
2622 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2624 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2625 gimple_stmt_iterator region_end_in
)
2626 : vec_info (vec_info::bb
, init_cost (NULL
)),
2627 bb (gsi_bb (region_begin_in
)),
2628 region_begin (region_begin_in
),
2629 region_end (region_end_in
)
2631 gimple_stmt_iterator gsi
;
2633 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2636 gimple
*stmt
= gsi_stmt (gsi
);
2637 gimple_set_uid (stmt
, 0);
2638 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2645 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2646 stmts in the basic block. */
2648 _bb_vec_info::~_bb_vec_info ()
2650 for (gimple_stmt_iterator si
= region_begin
;
2651 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2653 gimple
*stmt
= gsi_stmt (si
);
2654 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2657 /* Free stmt_vec_info. */
2658 free_stmt_vec_info (stmt
);
2660 /* Reset region marker. */
2661 gimple_set_uid (stmt
, -1);
2668 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2669 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2671 Return true if the operations are supported. */
2674 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2675 slp_instance node_instance
)
2682 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2685 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2686 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2689 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2690 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2691 gcc_assert (stmt_info
);
2692 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2694 /* For BB vectorization vector types are assigned here.
2695 Memory accesses already got their vector type assigned
2696 in vect_analyze_data_refs. */
2697 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2699 && ! STMT_VINFO_DATA_REF (stmt_info
))
2701 gcc_assert (PURE_SLP_STMT (stmt_info
));
2703 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2704 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_NOTE
, vect_location
,
2707 "get vectype for scalar type: ");
2708 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2709 dump_printf (MSG_NOTE
, "\n");
2712 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2715 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2718 "not SLPed: unsupported data-type ");
2719 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2721 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2726 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2729 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2730 dump_printf (MSG_NOTE
, "\n");
2734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2735 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2738 /* Calculate the number of vector statements to be created for the
2739 scalar stmts in this node. For SLP reductions it is equal to the
2740 number of vector statements in the children (which has already been
2741 calculated by the recursive call). Otherwise it is the number of
2742 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2743 VF divided by the number of elements in a vector. */
2744 if (GROUP_FIRST_ELEMENT (stmt_info
)
2745 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2746 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2747 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2751 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2752 vf
= loop_vinfo
->vectorization_factor
;
2755 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2756 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2757 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2758 = vect_get_num_vectors (vf
* group_size
, vectype
);
2761 /* Push SLP node def-type to stmt operands. */
2762 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2763 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2764 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2765 = SLP_TREE_DEF_TYPE (child
);
2766 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2767 /* Restore def-types. */
2768 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2769 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2770 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2771 = vect_internal_def
;
2779 /* Analyze statements in SLP instances of VINFO. Return true if the
2780 operations are supported. */
2783 vect_slp_analyze_operations (vec_info
*vinfo
)
2785 slp_instance instance
;
2788 if (dump_enabled_p ())
2789 dump_printf_loc (MSG_NOTE
, vect_location
,
2790 "=== vect_slp_analyze_operations ===\n");
2792 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2794 if (!vect_slp_analyze_node_operations (vinfo
,
2795 SLP_INSTANCE_TREE (instance
),
2798 dump_printf_loc (MSG_NOTE
, vect_location
,
2799 "removing SLP instance operations starting from: ");
2800 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2801 SLP_TREE_SCALAR_STMTS
2802 (SLP_INSTANCE_TREE (instance
))[0], 0);
2803 vect_free_slp_instance (instance
);
2804 vinfo
->slp_instances
.ordered_remove (i
);
2808 /* Compute the costs of the SLP instance. */
2809 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2814 return !vinfo
->slp_instances
.is_empty ();
2818 /* Compute the scalar cost of the SLP node NODE and its children
2819 and return it. Do not account defs that are marked in LIFE and
2820 update LIFE according to uses of NODE. */
2823 vect_bb_slp_scalar_cost (basic_block bb
,
2824 slp_tree node
, vec
<bool, va_heap
> *life
)
2826 unsigned scalar_cost
= 0;
2831 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2834 ssa_op_iter op_iter
;
2835 def_operand_p def_p
;
2836 stmt_vec_info stmt_info
;
2841 /* If there is a non-vectorized use of the defs then the scalar
2842 stmt is kept live in which case we do not account it or any
2843 required defs in the SLP children in the scalar cost. This
2844 way we make the vectorization more costly when compared to
2846 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2848 imm_use_iterator use_iter
;
2850 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2851 if (!is_gimple_debug (use_stmt
)
2852 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2854 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2857 BREAK_FROM_IMM_USE_STMT (use_iter
);
2863 /* Count scalar stmts only once. */
2864 if (gimple_visited_p (stmt
))
2866 gimple_set_visited (stmt
, true);
2868 stmt_info
= vinfo_for_stmt (stmt
);
2869 if (STMT_VINFO_DATA_REF (stmt_info
))
2871 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2872 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2874 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2877 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2879 scalar_cost
+= stmt_cost
;
2882 auto_vec
<bool, 20> subtree_life
;
2883 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2885 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2887 /* Do not directly pass LIFE to the recursive call, copy it to
2888 confine changes in the callee to the current child/subtree. */
2889 subtree_life
.safe_splice (*life
);
2890 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2891 subtree_life
.truncate (0);
2898 /* Check if vectorization of the basic block is profitable. */
2901 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2903 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2904 slp_instance instance
;
2906 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2907 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2909 /* Calculate scalar cost. */
2910 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2912 auto_vec
<bool, 20> life
;
2913 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2914 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2915 SLP_INSTANCE_TREE (instance
),
2919 /* Unset visited flag. */
2920 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2921 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2922 gimple_set_visited (gsi_stmt (gsi
), false);
2924 /* Complete the target-specific cost calculation. */
2925 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2926 &vec_inside_cost
, &vec_epilogue_cost
);
2928 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2930 if (dump_enabled_p ())
2932 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2933 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2935 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2936 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2937 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2940 /* Vectorization is profitable if its cost is more than the cost of scalar
2941 version. Note that we err on the vector side for equal cost because
2942 the cost estimate is otherwise quite pessimistic (constant uses are
2943 free on the scalar side but cost a load on the vector side for
2945 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2951 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2952 if so and sets fatal to true if failure is independent of
2953 current_vector_size. */
2956 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2957 gimple_stmt_iterator region_end
,
2958 vec
<data_reference_p
> datarefs
, int n_stmts
,
2961 bb_vec_info bb_vinfo
;
2962 slp_instance instance
;
2964 poly_uint64 min_vf
= 2;
2966 /* The first group of checks is independent of the vector size. */
2969 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2971 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2973 "not vectorized: too many instructions in "
2975 free_data_refs (datarefs
);
2979 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2983 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2985 /* Analyze the data references. */
2987 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2989 if (dump_enabled_p ())
2990 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2991 "not vectorized: unhandled data-ref in basic "
2998 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3000 if (dump_enabled_p ())
3001 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3002 "not vectorized: not enough data-refs in "
3009 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3011 if (dump_enabled_p ())
3012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3013 "not vectorized: unhandled data access in "
3020 /* If there are no grouped stores in the region there is no need
3021 to continue with pattern recog as vect_analyze_slp will fail
3023 if (bb_vinfo
->grouped_stores
.is_empty ())
3025 if (dump_enabled_p ())
3026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3027 "not vectorized: no grouped stores in "
3034 /* While the rest of the analysis below depends on it in some way. */
3037 vect_pattern_recog (bb_vinfo
);
3039 /* Check the SLP opportunities in the basic block, analyze and build SLP
3041 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3043 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3046 "Failed to SLP the basic block.\n");
3047 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3048 "not vectorized: failed to find SLP opportunities "
3049 "in basic block.\n");
3056 vect_record_base_alignments (bb_vinfo
);
3058 /* Analyze and verify the alignment of data references and the
3059 dependence in the SLP instances. */
3060 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3062 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3063 || ! vect_slp_analyze_instance_dependence (instance
))
3065 dump_printf_loc (MSG_NOTE
, vect_location
,
3066 "removing SLP instance operations starting from: ");
3067 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
3068 SLP_TREE_SCALAR_STMTS
3069 (SLP_INSTANCE_TREE (instance
))[0], 0);
3070 vect_free_slp_instance (instance
);
3071 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3075 /* Mark all the statements that we want to vectorize as pure SLP and
3077 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3078 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3082 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3088 if (!vect_slp_analyze_operations (bb_vinfo
))
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3092 "not vectorized: bad operation in basic block.\n");
3098 /* Cost model: check if the vectorization is worthwhile. */
3099 if (!unlimited_cost_model (NULL
)
3100 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3102 if (dump_enabled_p ())
3103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3104 "not vectorized: vectorization is not "
3111 if (dump_enabled_p ())
3112 dump_printf_loc (MSG_NOTE
, vect_location
,
3113 "Basic block will be vectorized using SLP\n");
3119 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3120 true if anything in the basic-block was vectorized. */
3123 vect_slp_bb (basic_block bb
)
3125 bb_vec_info bb_vinfo
;
3126 gimple_stmt_iterator gsi
;
3127 bool any_vectorized
= false;
3128 auto_vector_sizes vector_sizes
;
3130 if (dump_enabled_p ())
3131 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3133 /* Autodetect first vector size we try. */
3134 current_vector_size
= 0;
3135 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3136 unsigned int next_size
= 0;
3138 gsi
= gsi_start_bb (bb
);
3140 poly_uint64 autodetected_vector_size
= 0;
3143 if (gsi_end_p (gsi
))
3146 gimple_stmt_iterator region_begin
= gsi
;
3147 vec
<data_reference_p
> datarefs
= vNULL
;
3150 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3152 gimple
*stmt
= gsi_stmt (gsi
);
3153 if (is_gimple_debug (stmt
))
3157 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3158 vect_location
= gimple_location (stmt
);
3160 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3164 /* Skip leading unhandled stmts. */
3165 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3171 gimple_stmt_iterator region_end
= gsi
;
3173 bool vectorized
= false;
3175 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3176 datarefs
, insns
, fatal
);
3178 && dbg_cnt (vect_slp
))
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3183 vect_schedule_slp (bb_vinfo
);
3185 if (dump_enabled_p ())
3186 dump_printf_loc (MSG_NOTE
, vect_location
,
3187 "basic block part vectorized\n");
3193 any_vectorized
|= vectorized
;
3196 autodetected_vector_size
= current_vector_size
;
3198 if (next_size
< vector_sizes
.length ()
3199 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3203 || next_size
== vector_sizes
.length ()
3204 || known_eq (current_vector_size
, 0U)
3205 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3206 vector sizes will fail do not bother iterating. */
3209 if (gsi_end_p (region_end
))
3212 /* Skip the unhandled stmt. */
3215 /* And reset vector sizes. */
3216 current_vector_size
= 0;
3221 /* Try the next biggest vector size. */
3222 current_vector_size
= vector_sizes
[next_size
++];
3223 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_NOTE
, vect_location
,
3226 "***** Re-trying analysis with "
3228 dump_dec (MSG_NOTE
, current_vector_size
);
3229 dump_printf (MSG_NOTE
, "\n");
3237 return any_vectorized
;
3241 /* Return 1 if vector type of boolean constant which is OPNUM
3242 operand in statement STMT is a boolean vector. */
3245 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3247 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3248 enum tree_code code
= gimple_expr_code (stmt
);
3251 enum vect_def_type dt
;
3253 /* For comparison and COND_EXPR type is chosen depending
3254 on the other comparison operand. */
3255 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3258 op
= gimple_assign_rhs1 (stmt
);
3260 op
= gimple_assign_rhs2 (stmt
);
3262 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3266 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3269 if (code
== COND_EXPR
)
3271 tree cond
= gimple_assign_rhs1 (stmt
);
3273 if (TREE_CODE (cond
) == SSA_NAME
)
3276 op
= TREE_OPERAND (cond
, 1);
3278 op
= TREE_OPERAND (cond
, 0);
3280 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3284 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3287 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3290 /* Build a variable-length vector in which the elements in ELTS are repeated
3291 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3292 RESULTS and add any new instructions to SEQ.
3294 The approach we use is:
3296 (1) Find a vector mode VM with integer elements of mode IM.
3298 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3299 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3300 from small vectors to IM.
3302 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3304 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3305 correct byte contents.
3307 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3309 We try to find the largest IM for which this sequence works, in order
3310 to cut down on the number of interleaves. */
3313 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3314 unsigned int nresults
, vec
<tree
> &results
)
3316 unsigned int nelts
= elts
.length ();
3317 tree element_type
= TREE_TYPE (vector_type
);
3319 /* (1) Find a vector mode VM with integer elements of mode IM. */
3320 unsigned int nvectors
= 1;
3321 tree new_vector_type
;
3323 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3324 &nvectors
, &new_vector_type
,
3328 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3329 unsigned int partial_nelts
= nelts
/ nvectors
;
3330 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3332 tree_vector_builder partial_elts
;
3333 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3334 pieces
.quick_grow (nvectors
* 2);
3335 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3337 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3338 ELTS' has mode IM. */
3339 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3340 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3341 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3342 tree t
= gimple_build_vector (seq
, &partial_elts
);
3343 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3344 TREE_TYPE (new_vector_type
), t
);
3346 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3347 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3350 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3351 correct byte contents.
3353 We need to repeat the following operation log2(nvectors) times:
3355 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3356 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3358 However, if each input repeats every N elements and the VF is
3359 a multiple of N * 2, the HI result is the same as the LO. */
3360 unsigned int in_start
= 0;
3361 unsigned int out_start
= nvectors
;
3362 unsigned int hi_start
= nvectors
/ 2;
3363 /* A bound on the number of outputs needed to produce NRESULTS results
3364 in the final iteration. */
3365 unsigned int noutputs_bound
= nvectors
* nresults
;
3366 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3368 noutputs_bound
/= 2;
3369 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3370 for (unsigned int i
= 0; i
< limit
; ++i
)
3373 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3376 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3380 tree output
= make_ssa_name (new_vector_type
);
3381 tree input1
= pieces
[in_start
+ (i
/ 2)];
3382 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3383 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3386 gimple_seq_add_stmt (seq
, stmt
);
3387 pieces
[out_start
+ i
] = output
;
3389 std::swap (in_start
, out_start
);
3392 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3393 results
.reserve (nresults
);
3394 for (unsigned int i
= 0; i
< nresults
; ++i
)
3396 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3397 pieces
[in_start
+ i
]));
3399 results
.quick_push (results
[i
- nvectors
]);
3403 /* For constant and loop invariant defs of SLP_NODE this function returns
3404 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3405 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3406 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3407 REDUC_INDEX is the index of the reduction operand in the statements, unless
3411 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3412 vec
<tree
> *vec_oprnds
,
3413 unsigned int op_num
, unsigned int number_of_vectors
)
3415 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3416 gimple
*stmt
= stmts
[0];
3417 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3418 unsigned HOST_WIDE_INT nunits
;
3420 unsigned j
, number_of_places_left_in_vector
;
3423 int group_size
= stmts
.length ();
3424 unsigned int vec_num
, i
;
3425 unsigned number_of_copies
= 1;
3427 voprnds
.create (number_of_vectors
);
3428 bool constant_p
, is_store
;
3429 tree neutral_op
= NULL
;
3430 enum tree_code code
= gimple_expr_code (stmt
);
3431 gimple_seq ctor_seq
= NULL
;
3432 auto_vec
<tree
, 16> permute_results
;
3434 /* Check if vector type is a boolean vector. */
3435 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3436 && vect_mask_constant_operand_p (stmt
, op_num
))
3438 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3440 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3442 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3445 op
= gimple_assign_rhs1 (stmt
);
3452 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3453 created vectors. It is greater than 1 if unrolling is performed.
3455 For example, we have two scalar operands, s1 and s2 (e.g., group of
3456 strided accesses of size two), while NUNITS is four (i.e., four scalars
3457 of this type can be packed in a vector). The output vector will contain
3458 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3461 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3462 containing the operands.
3464 For example, NUNITS is four as before, and the group size is 8
3465 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3466 {s5, s6, s7, s8}. */
3468 /* When using duplicate_and_interleave, we just need one element for
3469 each scalar statement. */
3470 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3471 nunits
= group_size
;
3473 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3475 number_of_places_left_in_vector
= nunits
;
3477 tree_vector_builder
elts (vector_type
, nunits
, 1);
3478 elts
.quick_grow (nunits
);
3479 bool place_after_defs
= false;
3480 for (j
= 0; j
< number_of_copies
; j
++)
3482 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3485 op
= gimple_assign_rhs1 (stmt
);
3492 tree cond
= gimple_assign_rhs1 (stmt
);
3493 if (TREE_CODE (cond
) == SSA_NAME
)
3494 op
= gimple_op (stmt
, op_num
+ 1);
3495 else if (op_num
== 0 || op_num
== 1)
3496 op
= TREE_OPERAND (cond
, op_num
);
3500 op
= gimple_assign_rhs2 (stmt
);
3502 op
= gimple_assign_rhs3 (stmt
);
3508 op
= gimple_call_arg (stmt
, op_num
);
3515 op
= gimple_op (stmt
, op_num
+ 1);
3516 /* Unlike the other binary operators, shifts/rotates have
3517 the shift count being int, instead of the same type as
3518 the lhs, so make sure the scalar is the right type if
3519 we are dealing with vectors of
3520 long long/long/short/char. */
3521 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3522 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3526 op
= gimple_op (stmt
, op_num
+ 1);
3531 /* Create 'vect_ = {op0,op1,...,opn}'. */
3532 number_of_places_left_in_vector
--;
3534 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3536 if (CONSTANT_CLASS_P (op
))
3538 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3540 /* Can't use VIEW_CONVERT_EXPR for booleans because
3541 of possibly different sizes of scalar value and
3543 if (integer_zerop (op
))
3544 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3545 else if (integer_onep (op
))
3546 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3551 op
= fold_unary (VIEW_CONVERT_EXPR
,
3552 TREE_TYPE (vector_type
), op
);
3553 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3557 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3559 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3562 = build_all_ones_cst (TREE_TYPE (vector_type
));
3564 = build_zero_cst (TREE_TYPE (vector_type
));
3565 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3566 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3572 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3575 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3578 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3582 elts
[number_of_places_left_in_vector
] = op
;
3583 if (!CONSTANT_CLASS_P (op
))
3585 if (TREE_CODE (orig_op
) == SSA_NAME
3586 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3587 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3588 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3589 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3590 place_after_defs
= true;
3592 if (number_of_places_left_in_vector
== 0)
3595 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3596 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3597 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3600 if (vec_oprnds
->is_empty ())
3601 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3604 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3607 gimple_stmt_iterator gsi
;
3608 if (place_after_defs
)
3611 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3612 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3615 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3616 if (ctor_seq
!= NULL
)
3618 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3619 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3622 voprnds
.quick_push (init
);
3623 place_after_defs
= false;
3624 number_of_places_left_in_vector
= nunits
;
3626 elts
.new_vector (vector_type
, nunits
, 1);
3627 elts
.quick_grow (nunits
);
3632 /* Since the vectors are created in the reverse order, we should invert
3634 vec_num
= voprnds
.length ();
3635 for (j
= vec_num
; j
!= 0; j
--)
3637 vop
= voprnds
[j
- 1];
3638 vec_oprnds
->quick_push (vop
);
3643 /* In case that VF is greater than the unrolling factor needed for the SLP
3644 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3645 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3646 to replicate the vectors. */
3647 while (number_of_vectors
> vec_oprnds
->length ())
3649 tree neutral_vec
= NULL
;
3654 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3656 vec_oprnds
->quick_push (neutral_vec
);
3660 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3661 vec_oprnds
->quick_push (vop
);
3667 /* Get vectorized definitions from SLP_NODE that contains corresponding
3668 vectorized def-stmts. */
3671 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3674 gimple
*vec_def_stmt
;
3677 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3679 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3681 gcc_assert (vec_def_stmt
);
3682 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3683 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3685 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3686 vec_oprnds
->quick_push (vec_oprnd
);
3691 /* Get vectorized definitions for SLP_NODE.
3692 If the scalar definitions are loop invariants or constants, collect them and
3693 call vect_get_constant_vectors() to create vector stmts.
3694 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3695 must be stored in the corresponding child of SLP_NODE, and we call
3696 vect_get_slp_vect_defs () to retrieve them. */
3699 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3700 vec
<vec
<tree
> > *vec_oprnds
)
3703 int number_of_vects
= 0, i
;
3704 unsigned int child_index
= 0;
3705 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3706 slp_tree child
= NULL
;
3709 bool vectorized_defs
;
3711 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3712 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3714 /* For each operand we check if it has vectorized definitions in a child
3715 node or we need to create them (for invariants and constants). We
3716 check if the LHS of the first stmt of the next child matches OPRND.
3717 If it does, we found the correct child. Otherwise, we call
3718 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3719 to check this child node for the next operand. */
3720 vectorized_defs
= false;
3721 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3723 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3725 /* We have to check both pattern and original def, if available. */
3726 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3728 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3730 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3733 if (gimple_code (first_def
) == GIMPLE_PHI
)
3734 first_def_op
= gimple_phi_result (first_def
);
3736 first_def_op
= gimple_get_lhs (first_def
);
3737 if (operand_equal_p (oprnd
, first_def_op
, 0)
3739 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3741 /* The number of vector defs is determined by the number of
3742 vector statements in the node from which we get those
3744 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3745 vectorized_defs
= true;
3753 if (!vectorized_defs
)
3757 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3758 /* Number of vector stmts was calculated according to LHS in
3759 vect_schedule_slp_instance (), fix it by replacing LHS with
3760 RHS, if necessary. See vect_get_smallest_scalar_type () for
3762 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3764 if (rhs_size_unit
!= lhs_size_unit
)
3766 number_of_vects
*= rhs_size_unit
;
3767 number_of_vects
/= lhs_size_unit
;
3772 /* Allocate memory for vectorized defs. */
3774 vec_defs
.create (number_of_vects
);
3776 /* For reduction defs we call vect_get_constant_vectors (), since we are
3777 looking for initial loop invariant values. */
3778 if (vectorized_defs
)
3779 /* The defs are already vectorized. */
3780 vect_get_slp_vect_defs (child
, &vec_defs
);
3782 /* Build vectors from scalar defs. */
3783 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3786 vec_oprnds
->quick_push (vec_defs
);
3790 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3791 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3792 permute statements for the SLP node NODE of the SLP instance
3793 SLP_NODE_INSTANCE. */
3796 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3797 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3798 slp_instance slp_node_instance
, bool analyze_only
,
3801 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3802 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3803 tree mask_element_type
= NULL_TREE
, mask_type
;
3805 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3806 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3807 unsigned int mask_element
;
3809 unsigned HOST_WIDE_INT nunits
, const_vf
;
3811 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3814 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3816 mode
= TYPE_MODE (vectype
);
3818 /* At the moment, all permutations are represented using per-element
3819 indices, so we can't cope with variable vector lengths or
3820 vectorization factors. */
3821 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
3822 || !vf
.is_constant (&const_vf
))
3825 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3826 same size as the vector element being permuted. */
3827 mask_element_type
= lang_hooks
.types
.type_for_mode
3828 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3829 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3830 vec_perm_builder
mask (nunits
, nunits
, 1);
3831 mask
.quick_grow (nunits
);
3832 vec_perm_indices indices
;
3834 /* Initialize the vect stmts of NODE to properly insert the generated
3837 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3838 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3839 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3841 /* Generate permutation masks for every NODE. Number of masks for each NODE
3842 is equal to GROUP_SIZE.
3843 E.g., we have a group of three nodes with three loads from the same
3844 location in each node, and the vector size is 4. I.e., we have a
3845 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3846 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3847 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3850 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3851 The last mask is illegal since we assume two operands for permute
3852 operation, and the mask element values can't be outside that range.
3853 Hence, the last mask must be converted into {2,5,5,5}.
3854 For the first two permutations we need the first and the second input
3855 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3856 we need the second and the third vectors: {b1,c1,a2,b2} and
3859 int vect_stmts_counter
= 0;
3860 unsigned int index
= 0;
3861 int first_vec_index
= -1;
3862 int second_vec_index
= -1;
3866 for (unsigned int j
= 0; j
< const_vf
; j
++)
3868 for (int k
= 0; k
< group_size
; k
++)
3870 unsigned int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3871 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3872 vec_index
= i
/ nunits
;
3873 mask_element
= i
% nunits
;
3874 if (vec_index
== first_vec_index
3875 || first_vec_index
== -1)
3877 first_vec_index
= vec_index
;
3879 else if (vec_index
== second_vec_index
3880 || second_vec_index
== -1)
3882 second_vec_index
= vec_index
;
3883 mask_element
+= nunits
;
3887 if (dump_enabled_p ())
3889 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3890 "permutation requires at "
3891 "least three vectors ");
3892 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3895 gcc_assert (analyze_only
);
3899 gcc_assert (mask_element
< 2 * nunits
);
3900 if (mask_element
!= index
)
3902 mask
[index
++] = mask_element
;
3904 if (index
== nunits
&& !noop_p
)
3906 indices
.new_vector (mask
, 2, nunits
);
3907 if (!can_vec_perm_const_p (mode
, indices
))
3909 if (dump_enabled_p ())
3911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3913 "unsupported vect permute { ");
3914 for (i
= 0; i
< nunits
; ++i
)
3916 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3917 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3919 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3921 gcc_assert (analyze_only
);
3928 if (index
== nunits
)
3932 tree mask_vec
= NULL_TREE
;
3935 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3937 if (second_vec_index
== -1)
3938 second_vec_index
= first_vec_index
;
3940 /* Generate the permute statement if necessary. */
3941 tree first_vec
= dr_chain
[first_vec_index
];
3942 tree second_vec
= dr_chain
[second_vec_index
];
3947 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3949 perm_dest
= make_ssa_name (perm_dest
);
3950 perm_stmt
= gimple_build_assign (perm_dest
,
3952 first_vec
, second_vec
,
3954 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3957 /* If mask was NULL_TREE generate the requested
3958 identity transform. */
3959 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3961 /* Store the vector statement in NODE. */
3962 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3966 first_vec_index
= -1;
3967 second_vec_index
= -1;
3976 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3977 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3978 scalar_stmts_to_slp_tree_map_t
;
3980 /* Vectorize SLP instance tree in postorder. */
3983 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3984 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3987 bool grouped_store
, is_store
;
3988 gimple_stmt_iterator si
;
3989 stmt_vec_info stmt_info
;
3990 unsigned int group_size
;
3995 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3998 /* See if we have already vectorized the same set of stmts and reuse their
3999 vectorized stmts. */
4001 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
4004 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
4009 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4010 vect_schedule_slp_instance (child
, instance
, bst_map
);
4012 /* Push SLP node def-type to stmts. */
4013 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4014 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4015 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4016 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
4018 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
4019 stmt_info
= vinfo_for_stmt (stmt
);
4021 /* VECTYPE is the type of the destination. */
4022 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4023 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4024 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
4026 if (!SLP_TREE_VEC_STMTS (node
).exists ())
4027 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4029 if (dump_enabled_p ())
4031 dump_printf_loc (MSG_NOTE
,vect_location
,
4032 "------>vectorizing SLP node starting from: ");
4033 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
4036 /* Vectorized stmts go before the last scalar stmt which is where
4037 all uses are ready. */
4038 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
4040 /* Mark the first element of the reduction chain as reduction to properly
4041 transform the node. In the analysis phase only the last element of the
4042 chain is marked as reduction. */
4043 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
4044 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
4046 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
4047 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
4050 /* Handle two-operation SLP nodes by vectorizing the group with
4051 both operations and then performing a merge. */
4052 if (SLP_TREE_TWO_OPERATORS (node
))
4054 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4055 enum tree_code ocode
= ERROR_MARK
;
4057 vec_perm_builder
mask (group_size
, group_size
, 1);
4058 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
4059 if (gimple_assign_rhs_code (ostmt
) != code0
)
4061 mask
.quick_push (1);
4062 ocode
= gimple_assign_rhs_code (ostmt
);
4065 mask
.quick_push (0);
4066 if (ocode
!= ERROR_MARK
)
4071 tree tmask
= NULL_TREE
;
4072 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4073 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4074 SLP_TREE_VEC_STMTS (node
).truncate (0);
4075 gimple_assign_set_rhs_code (stmt
, ocode
);
4076 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4077 gimple_assign_set_rhs_code (stmt
, code0
);
4078 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4079 SLP_TREE_VEC_STMTS (node
).truncate (0);
4080 tree meltype
= build_nonstandard_integer_type
4081 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4082 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4084 for (j
= 0; j
< v0
.length (); ++j
)
4086 /* Enforced by vect_build_slp_tree, which rejects variable-length
4087 vectors for SLP_TREE_TWO_OPERATORS. */
4088 unsigned int const_nunits
= nunits
.to_constant ();
4089 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4090 for (l
= 0; l
< const_nunits
; ++l
)
4092 if (k
>= group_size
)
4094 tree t
= build_int_cst (meltype
,
4095 mask
[k
++] * const_nunits
+ l
);
4096 melts
.quick_push (t
);
4098 tmask
= melts
.build ();
4100 /* ??? Not all targets support a VEC_PERM_EXPR with a
4101 constant mask that would translate to a vec_merge RTX
4102 (with their vec_perm_const_ok). We can either not
4103 vectorize in that case or let veclower do its job.
4104 Unfortunately that isn't too great and at least for
4105 plus/minus we'd eventually like to match targets
4106 vector addsub instructions. */
4108 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4110 gimple_assign_lhs (v0
[j
]),
4111 gimple_assign_lhs (v1
[j
]), tmask
);
4112 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
4113 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
4120 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
4122 /* Restore stmt def-types. */
4123 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4124 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4125 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
4126 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
4131 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4132 For loop vectorization this is done in vectorizable_call, but for SLP
4133 it needs to be deferred until end of vect_schedule_slp, because multiple
4134 SLP instances may refer to the same scalar stmt. */
4137 vect_remove_slp_scalar_calls (slp_tree node
)
4139 gimple
*stmt
, *new_stmt
;
4140 gimple_stmt_iterator gsi
;
4144 stmt_vec_info stmt_info
;
4146 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4149 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4150 vect_remove_slp_scalar_calls (child
);
4152 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
4154 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
4156 stmt_info
= vinfo_for_stmt (stmt
);
4157 if (stmt_info
== NULL
4158 || is_pattern_stmt_p (stmt_info
)
4159 || !PURE_SLP_STMT (stmt_info
))
4161 lhs
= gimple_call_lhs (stmt
);
4162 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4163 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4164 set_vinfo_for_stmt (stmt
, NULL
);
4165 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4166 gsi
= gsi_for_stmt (stmt
);
4167 gsi_replace (&gsi
, new_stmt
, false);
4168 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4172 /* Generate vector code for all SLP instances in the loop/basic block. */
4175 vect_schedule_slp (vec_info
*vinfo
)
4177 vec
<slp_instance
> slp_instances
;
4178 slp_instance instance
;
4180 bool is_store
= false;
4182 slp_instances
= vinfo
->slp_instances
;
4183 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4185 /* Schedule the tree of INSTANCE. */
4186 scalar_stmts_to_slp_tree_map_t
*bst_map
4187 = new scalar_stmts_to_slp_tree_map_t ();
4188 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4191 if (dump_enabled_p ())
4192 dump_printf_loc (MSG_NOTE
, vect_location
,
4193 "vectorizing stmts using SLP.\n");
4196 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4198 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4201 gimple_stmt_iterator gsi
;
4203 /* Remove scalar call stmts. Do not do this for basic-block
4204 vectorization as not all uses may be vectorized.
4205 ??? Why should this be necessary? DCE should be able to
4206 remove the stmts itself.
4207 ??? For BB vectorization we can as well remove scalar
4208 stmts starting from the SLP tree root if they have no
4210 if (is_a
<loop_vec_info
> (vinfo
))
4211 vect_remove_slp_scalar_calls (root
);
4213 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4214 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4216 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4219 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4220 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4221 /* Free the attached stmt_vec_info and remove the stmt. */
4222 gsi
= gsi_for_stmt (store
);
4223 unlink_stmt_vdef (store
);
4224 gsi_remove (&gsi
, true);
4225 release_defs (store
);
4226 free_stmt_vec_info (store
);