1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 if (--node
->refcnt
!= 0)
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
64 vect_free_slp_tree (child
, final_p
);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
72 stmt_vec_info stmt_info
;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
80 SLP_TREE_CHILDREN (node
).release ();
81 SLP_TREE_SCALAR_STMTS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_VEC_STMTS (node
).create (0);
126 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
127 SLP_TREE_CHILDREN (node
).create (nops
);
128 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
129 SLP_TREE_TWO_OPERATORS (node
) = false;
130 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
134 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
135 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
141 /* This structure is used in creation of an SLP tree. Each instance
142 corresponds to the same operand in a group of scalar stmts in an SLP
144 typedef struct _slp_oprnd_info
146 /* Def-stmts for the operands. */
147 vec
<stmt_vec_info
> def_stmts
;
148 /* Information about the first statement, its vector def-type, type, the
149 operand itself in case it's constant, and an indication if it's a pattern
152 enum vect_def_type first_dt
;
158 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
160 static vec
<slp_oprnd_info
>
161 vect_create_oprnd_info (int nops
, int group_size
)
164 slp_oprnd_info oprnd_info
;
165 vec
<slp_oprnd_info
> oprnds_info
;
167 oprnds_info
.create (nops
);
168 for (i
= 0; i
< nops
; i
++)
170 oprnd_info
= XNEW (struct _slp_oprnd_info
);
171 oprnd_info
->def_stmts
.create (group_size
);
172 oprnd_info
->first_dt
= vect_uninitialized_def
;
173 oprnd_info
->first_op_type
= NULL_TREE
;
174 oprnd_info
->first_pattern
= false;
175 oprnd_info
->second_pattern
= false;
176 oprnds_info
.quick_push (oprnd_info
);
183 /* Free operands info. */
186 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
189 slp_oprnd_info oprnd_info
;
191 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
193 oprnd_info
->def_stmts
.release ();
194 XDELETE (oprnd_info
);
197 oprnds_info
.release ();
201 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
202 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
206 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
207 stmt_vec_info first_stmt_info
)
209 stmt_vec_info next_stmt_info
= first_stmt_info
;
212 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
217 if (next_stmt_info
== stmt_info
)
219 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
221 result
+= DR_GROUP_GAP (next_stmt_info
);
223 while (next_stmt_info
);
228 /* Check whether it is possible to load COUNT elements of type ELT_MODE
229 using the method implemented by duplicate_and_interleave. Return true
230 if so, returning the number of intermediate vectors in *NVECTORS_OUT
231 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
235 can_duplicate_and_interleave_p (unsigned int count
, machine_mode elt_mode
,
236 unsigned int *nvectors_out
,
237 tree
*vector_type_out
,
240 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
242 unsigned int nvectors
= 1;
245 scalar_int_mode int_mode
;
246 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
247 if (multiple_p (current_vector_size
, elt_bytes
, &nelts
)
248 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
250 tree int_type
= build_nonstandard_integer_type
251 (GET_MODE_BITSIZE (int_mode
), 1);
252 tree vector_type
= build_vector_type (int_type
, nelts
);
253 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
255 vec_perm_builder
sel1 (nelts
, 2, 3);
256 vec_perm_builder
sel2 (nelts
, 2, 3);
257 poly_int64 half_nelts
= exact_div (nelts
, 2);
258 for (unsigned int i
= 0; i
< 3; ++i
)
261 sel1
.quick_push (i
+ nelts
);
262 sel2
.quick_push (half_nelts
+ i
);
263 sel2
.quick_push (half_nelts
+ i
+ nelts
);
265 vec_perm_indices
indices1 (sel1
, 2, nelts
);
266 vec_perm_indices
indices2 (sel2
, 2, nelts
);
267 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
268 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
271 *nvectors_out
= nvectors
;
273 *vector_type_out
= vector_type
;
276 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
278 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
285 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
291 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
292 they are of a valid type and that they match the defs of the first stmt of
293 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
294 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
295 indicates swap is required for cond_expr stmts. Specifically, *SWAP
296 is 1 if STMT is cond and operands of comparison need to be swapped;
297 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
298 If there is any operand swap in this function, *SWAP is set to non-zero
300 If there was a fatal error return -1; if the error could be corrected by
301 swapping operands of father node of this one, return 1; if everything is
304 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
305 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
306 vec
<slp_oprnd_info
> *oprnds_info
)
308 stmt_vec_info stmt_info
= stmts
[stmt_num
];
310 unsigned int i
, number_of_oprnds
;
311 enum vect_def_type dt
= vect_uninitialized_def
;
312 bool pattern
= false;
313 slp_oprnd_info oprnd_info
;
314 int first_op_idx
= 1;
315 unsigned int commutative_op
= -1U;
316 bool first_op_cond
= false;
317 bool first
= stmt_num
== 0;
318 bool second
= stmt_num
== 1;
320 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
322 number_of_oprnds
= gimple_call_num_args (stmt
);
324 if (gimple_call_internal_p (stmt
))
326 internal_fn ifn
= gimple_call_internal_fn (stmt
);
327 commutative_op
= first_commutative_argument (ifn
);
330 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
332 enum tree_code code
= gimple_assign_rhs_code (stmt
);
333 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
334 /* Swap can only be done for cond_expr if asked to, otherwise we
335 could result in different comparison code to the first stmt. */
336 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
337 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
339 first_op_cond
= true;
343 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
348 bool swapped
= (*swap
!= 0);
349 gcc_assert (!swapped
|| first_op_cond
);
350 for (i
= 0; i
< number_of_oprnds
; i
++)
355 /* Map indicating how operands of cond_expr should be swapped. */
356 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
357 int *map
= maps
[*swap
];
360 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
361 first_op_idx
), map
[i
]);
363 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
366 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
368 oprnd_info
= (*oprnds_info
)[i
];
370 stmt_vec_info def_stmt_info
;
371 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
373 if (dump_enabled_p ())
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
375 "Build SLP failed: can't analyze def for %T\n",
382 oprnd_info
->second_pattern
= pattern
;
386 oprnd_info
->first_dt
= dt
;
387 oprnd_info
->first_pattern
= pattern
;
388 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
397 tree type
= TREE_TYPE (oprnd
);
398 if ((oprnd_info
->first_dt
!= dt
399 && !(oprnd_info
->first_dt
== vect_reduction_def
400 && dt
== vect_internal_def
)
401 && !((oprnd_info
->first_dt
== vect_external_def
402 || oprnd_info
->first_dt
== vect_constant_def
)
403 && (dt
== vect_external_def
404 || dt
== vect_constant_def
)))
405 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
407 /* Try swapping operands if we got a mismatch. */
408 if (i
== commutative_op
&& !swapped
)
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
416 "Build SLP failed: different types\n");
420 if ((dt
== vect_constant_def
421 || dt
== vect_external_def
)
422 && !current_vector_size
.is_constant ()
423 && (TREE_CODE (type
) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts
.length (),
427 if (dump_enabled_p ())
428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
429 "Build SLP failed: invalid type of def "
430 "for variable-length SLP %T\n", oprnd
);
435 /* Check the types of the definitions. */
438 case vect_constant_def
:
439 case vect_external_def
:
442 case vect_reduction_def
:
443 case vect_induction_def
:
444 case vect_internal_def
:
445 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
449 /* FORNOW: Not supported. */
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
452 "Build SLP failed: illegal type of def %T\n",
462 /* If there are already uses of this stmt in a SLP instance then
463 we've committed to the operand order and can't swap it. */
464 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
466 if (dump_enabled_p ())
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
468 "Build SLP failed: cannot swap operands of "
469 "shared stmt %G", stmt_info
->stmt
);
475 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
476 tree cond
= gimple_assign_rhs1 (stmt
);
477 enum tree_code code
= TREE_CODE (cond
);
482 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
483 &TREE_OPERAND (cond
, 1));
484 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
489 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
490 gimple_assign_rhs3_ptr (stmt
));
491 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
492 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
493 gcc_assert (code
!= ERROR_MARK
);
494 TREE_SET_CODE (cond
, code
);
499 unsigned int op
= commutative_op
+ first_op_idx
;
500 swap_ssa_operands (stmt_info
->stmt
,
501 gimple_op_ptr (stmt_info
->stmt
, op
),
502 gimple_op_ptr (stmt_info
->stmt
, op
+ 1));
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE
, vect_location
,
506 "swapped operands to match def types in %G",
514 /* Return true if call statements CALL1 and CALL2 are similar enough
515 to be combined into the same SLP group. */
518 compatible_calls_p (gcall
*call1
, gcall
*call2
)
520 unsigned int nargs
= gimple_call_num_args (call1
);
521 if (nargs
!= gimple_call_num_args (call2
))
524 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
527 if (gimple_call_internal_p (call1
))
529 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
530 TREE_TYPE (gimple_call_lhs (call2
))))
532 for (unsigned int i
= 0; i
< nargs
; ++i
)
533 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
534 TREE_TYPE (gimple_call_arg (call2
, i
))))
539 if (!operand_equal_p (gimple_call_fn (call1
),
540 gimple_call_fn (call2
), 0))
543 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
549 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
550 caller's attempt to find the vector type in STMT_INFO with the narrowest
551 element type. Return true if VECTYPE is nonnull and if it is valid
552 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
553 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
554 vect_build_slp_tree. */
557 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
558 tree vectype
, poly_uint64
*max_nunits
)
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
564 "Build SLP failed: unsupported data-type in %G\n",
566 /* Fatal mismatch. */
570 /* If populating the vector type requires unrolling then fail
571 before adjusting *max_nunits for basic-block vectorization. */
572 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
573 unsigned HOST_WIDE_INT const_nunits
;
574 if (STMT_VINFO_BB_VINFO (stmt_info
)
575 && (!nunits
.is_constant (&const_nunits
)
576 || const_nunits
> group_size
))
578 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
580 "Build SLP failed: unrolling required "
581 "in basic block SLP\n");
582 /* Fatal mismatch. */
586 /* In case of multiple types we need to detect the smallest type. */
587 vect_update_max_nunits (max_nunits
, vectype
);
591 /* STMTS is a group of GROUP_SIZE SLP statements in which some
592 statements do the same operation as the first statement and in which
593 the others do ALT_STMT_CODE. Return true if we can take one vector
594 of the first operation and one vector of the second and permute them
595 to get the required result. VECTYPE is the type of the vector that
596 would be permuted. */
599 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
600 unsigned int group_size
, tree vectype
,
601 tree_code alt_stmt_code
)
603 unsigned HOST_WIDE_INT count
;
604 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
607 vec_perm_builder
sel (count
, count
, 1);
608 for (unsigned int i
= 0; i
< count
; ++i
)
610 unsigned int elt
= i
;
611 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
612 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
614 sel
.quick_push (elt
);
616 vec_perm_indices
indices (sel
, 2, count
);
617 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
620 /* Verify if the scalar stmts STMTS are isomorphic, require data
621 permutation or are of unsupported types of operation. Return
622 true if they are, otherwise return false and indicate in *MATCHES
623 which stmts are not isomorphic to the first one. If MATCHES[0]
624 is false then this indicates the comparison could not be
625 carried out or the stmts will never be vectorized by SLP.
627 Note COND_EXPR is possibly ismorphic to another one after swapping its
628 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
629 the first stmt by swapping the two operands of comparison; set SWAP[i]
630 to 2 if stmt I is isormorphic to the first stmt by inverting the code
631 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
632 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
635 vect_build_slp_tree_1 (unsigned char *swap
,
636 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
637 poly_uint64
*max_nunits
, bool *matches
,
641 stmt_vec_info first_stmt_info
= stmts
[0];
642 enum tree_code first_stmt_code
= ERROR_MARK
;
643 enum tree_code alt_stmt_code
= ERROR_MARK
;
644 enum tree_code rhs_code
= ERROR_MARK
;
645 enum tree_code first_cond_code
= ERROR_MARK
;
647 bool need_same_oprnds
= false;
648 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
651 machine_mode optab_op2_mode
;
652 machine_mode vec_mode
;
653 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
655 /* For every stmt in NODE find its def stmt/s. */
656 stmt_vec_info stmt_info
;
657 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
659 gimple
*stmt
= stmt_info
->stmt
;
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
666 /* Fail to vectorize statements marked as unvectorizable. */
667 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
669 if (dump_enabled_p ())
670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
671 "Build SLP failed: unvectorizable statement %G",
673 /* Fatal mismatch. */
678 lhs
= gimple_get_lhs (stmt
);
679 if (lhs
== NULL_TREE
)
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
683 "Build SLP failed: not GIMPLE_ASSIGN nor "
684 "GIMPLE_CALL %G", stmt
);
685 /* Fatal mismatch. */
691 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
694 && !vect_record_max_nunits (stmt_info
, group_size
,
695 nunits_vectype
, max_nunits
)))
697 /* Fatal mismatch. */
702 gcc_assert (vectype
);
704 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
706 rhs_code
= CALL_EXPR
;
707 if ((gimple_call_internal_p (call_stmt
)
708 && (!vectorizable_internal_fn_p
709 (gimple_call_internal_fn (call_stmt
))))
710 || gimple_call_tail_p (call_stmt
)
711 || gimple_call_noreturn_p (call_stmt
)
712 || !gimple_call_nothrow_p (call_stmt
)
713 || gimple_call_chain (call_stmt
))
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
717 "Build SLP failed: unsupported call type %G",
719 /* Fatal mismatch. */
725 rhs_code
= gimple_assign_rhs_code (stmt
);
727 /* Check the operation. */
730 first_stmt_code
= rhs_code
;
732 /* Shift arguments should be equal in all the packed stmts for a
733 vector shift with scalar shift operand. */
734 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
735 || rhs_code
== LROTATE_EXPR
736 || rhs_code
== RROTATE_EXPR
)
738 if (vectype
== boolean_type_node
)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: shift of a"
744 /* Fatal mismatch. */
749 vec_mode
= TYPE_MODE (vectype
);
751 /* First see if we have a vector/vector shift. */
752 optab
= optab_for_tree_code (rhs_code
, vectype
,
756 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
758 /* No vector/vector shift, try for a vector/scalar shift. */
759 optab
= optab_for_tree_code (rhs_code
, vectype
,
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
766 "Build SLP failed: no optab.\n");
767 /* Fatal mismatch. */
771 icode
= (int) optab_handler (optab
, vec_mode
);
772 if (icode
== CODE_FOR_nothing
)
774 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
777 "op not supported by target.\n");
778 /* Fatal mismatch. */
782 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
783 if (!VECTOR_MODE_P (optab_op2_mode
))
785 need_same_oprnds
= true;
786 first_op1
= gimple_assign_rhs2 (stmt
);
790 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
792 need_same_oprnds
= true;
793 first_op1
= gimple_assign_rhs2 (stmt
);
798 if (first_stmt_code
!= rhs_code
799 && alt_stmt_code
== ERROR_MARK
)
800 alt_stmt_code
= rhs_code
;
801 if (first_stmt_code
!= rhs_code
802 && (first_stmt_code
!= IMAGPART_EXPR
803 || rhs_code
!= REALPART_EXPR
)
804 && (first_stmt_code
!= REALPART_EXPR
805 || rhs_code
!= IMAGPART_EXPR
)
806 /* Handle mismatches in plus/minus by computing both
807 and merging the results. */
808 && !((first_stmt_code
== PLUS_EXPR
809 || first_stmt_code
== MINUS_EXPR
)
810 && (alt_stmt_code
== PLUS_EXPR
811 || alt_stmt_code
== MINUS_EXPR
)
812 && rhs_code
== alt_stmt_code
)
813 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
814 && (first_stmt_code
== ARRAY_REF
815 || first_stmt_code
== BIT_FIELD_REF
816 || first_stmt_code
== INDIRECT_REF
817 || first_stmt_code
== COMPONENT_REF
818 || first_stmt_code
== MEM_REF
)))
820 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
823 "Build SLP failed: different operation "
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
826 "original stmt %G", first_stmt_info
->stmt
);
833 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
837 "Build SLP failed: different shift "
838 "arguments in %G", stmt
);
843 if (rhs_code
== CALL_EXPR
)
845 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
846 as_a
<gcall
*> (stmt
)))
848 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "Build SLP failed: different calls in %G",
858 /* Grouped store or load. */
859 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
861 if (REFERENCE_CLASS_P (lhs
))
869 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
872 /* Check that there are no loads from different interleaving
873 chains in the same node. */
874 if (prev_first_load
!= first_load
)
876 if (dump_enabled_p ())
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
879 "Build SLP failed: different "
880 "interleaving chains in one node %G",
887 prev_first_load
= first_load
;
889 } /* Grouped access. */
892 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
894 /* Not grouped load. */
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
897 "Build SLP failed: not grouped load %G", stmt
);
899 /* FORNOW: Not grouped loads are not supported. */
900 /* Fatal mismatch. */
905 /* Not memory operation. */
906 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
907 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
908 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
909 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
910 && rhs_code
!= CALL_EXPR
)
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
914 "Build SLP failed: operation unsupported %G",
916 /* Fatal mismatch. */
921 if (rhs_code
== COND_EXPR
)
923 tree cond_expr
= gimple_assign_rhs1 (stmt
);
924 enum tree_code cond_code
= TREE_CODE (cond_expr
);
925 enum tree_code swap_code
= ERROR_MARK
;
926 enum tree_code invert_code
= ERROR_MARK
;
929 first_cond_code
= TREE_CODE (cond_expr
);
930 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
932 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
933 swap_code
= swap_tree_comparison (cond_code
);
934 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
937 if (first_cond_code
== cond_code
)
939 /* Isomorphic can be achieved by swapping. */
940 else if (first_cond_code
== swap_code
)
942 /* Isomorphic can be achieved by inverting. */
943 else if (first_cond_code
== invert_code
)
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
949 "Build SLP failed: different"
950 " operation %G", stmt
);
960 for (i
= 0; i
< group_size
; ++i
)
964 /* If we allowed a two-operation SLP node verify the target can cope
965 with the permute we are going to use. */
966 if (alt_stmt_code
!= ERROR_MARK
967 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
969 if (vectype
== boolean_type_node
970 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
971 vectype
, alt_stmt_code
))
973 for (i
= 0; i
< group_size
; ++i
)
974 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
977 if (dump_enabled_p ())
979 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
980 "Build SLP failed: different operation "
981 "in stmt %G", stmts
[i
]->stmt
);
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
983 "original stmt %G", first_stmt_info
->stmt
);
988 *two_operators
= true;
994 /* Traits for the hash_set to record failed SLP builds for a stmt set.
995 Note we never remove apart from at destruction time so we do not
996 need a special value for deleted that differs from empty. */
999 typedef vec
<stmt_vec_info
> value_type
;
1000 typedef vec
<stmt_vec_info
> compare_type
;
1001 static inline hashval_t
hash (value_type
);
1002 static inline bool equal (value_type existing
, value_type candidate
);
1003 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1004 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1005 static inline void mark_empty (value_type
&x
) { x
.release (); }
1006 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1007 static inline void remove (value_type
&x
) { x
.release (); }
1010 bst_traits::hash (value_type x
)
1013 for (unsigned i
= 0; i
< x
.length (); ++i
)
1014 h
.add_int (gimple_uid (x
[i
]->stmt
));
1018 bst_traits::equal (value_type existing
, value_type candidate
)
1020 if (existing
.length () != candidate
.length ())
1022 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1023 if (existing
[i
] != candidate
[i
])
1028 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1029 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1030 scalar_stmts_to_slp_tree_map_t
;
1033 vect_build_slp_tree_2 (vec_info
*vinfo
,
1034 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1035 poly_uint64
*max_nunits
,
1036 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1037 unsigned max_tree_size
,
1038 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1041 vect_build_slp_tree (vec_info
*vinfo
,
1042 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1043 poly_uint64
*max_nunits
,
1044 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1045 unsigned max_tree_size
,
1046 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1048 if (slp_tree
*leader
= bst_map
->get (stmts
))
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1052 *leader
? "" : "failed ", *leader
);
1054 (*leader
)->refcnt
++;
1057 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1058 matches
, npermutes
, tree_size
,
1059 max_tree_size
, bst_map
);
1060 /* Keep a reference for the bst_map use. */
1063 bst_map
->put (stmts
.copy (), res
);
1067 /* Recursively build an SLP tree starting from NODE.
1068 Fail (and return a value not equal to zero) if def-stmts are not
1069 isomorphic, require data permutation or are of unsupported types of
1070 operation. Otherwise, return 0.
1071 The value returned is the depth in the SLP tree where a mismatch
1075 vect_build_slp_tree_2 (vec_info
*vinfo
,
1076 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1077 poly_uint64
*max_nunits
,
1078 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1079 unsigned max_tree_size
,
1080 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1082 unsigned nops
, i
, this_tree_size
= 0;
1083 poly_uint64 this_max_nunits
= *max_nunits
;
1088 stmt_vec_info stmt_info
= stmts
[0];
1089 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1090 nops
= gimple_call_num_args (stmt
);
1091 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1093 nops
= gimple_num_ops (stmt
) - 1;
1094 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1097 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1102 /* If the SLP node is a PHI (induction or reduction), terminate
1104 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1106 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1107 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1108 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1111 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1112 /* Induction from different IVs is not supported. */
1113 if (def_type
== vect_induction_def
)
1115 stmt_vec_info other_info
;
1116 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1117 if (stmt_info
!= other_info
)
1120 else if (def_type
== vect_reduction_def
1121 || def_type
== vect_double_reduction_def
1122 || def_type
== vect_nested_cycle
)
1124 /* Else def types have to match. */
1125 stmt_vec_info other_info
;
1126 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1128 /* But for reduction chains only check on the first stmt. */
1129 if (REDUC_GROUP_FIRST_ELEMENT (other_info
)
1130 && REDUC_GROUP_FIRST_ELEMENT (other_info
) != stmt_info
)
1132 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1138 node
= vect_create_new_slp_node (stmts
);
1143 bool two_operators
= false;
1144 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1145 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1146 &this_max_nunits
, matches
, &two_operators
))
1149 /* If the SLP node is a load, terminate the recursion. */
1150 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1151 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1153 *max_nunits
= this_max_nunits
;
1154 node
= vect_create_new_slp_node (stmts
);
1158 /* Get at the operands, verifying they are compatible. */
1159 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1160 slp_oprnd_info oprnd_info
;
1161 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1163 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1164 stmts
, i
, &oprnds_info
);
1166 matches
[(res
== -1) ? 0 : i
] = false;
1170 for (i
= 0; i
< group_size
; ++i
)
1173 vect_free_oprnd_info (oprnds_info
);
1177 auto_vec
<slp_tree
, 4> children
;
1179 stmt_info
= stmts
[0];
1182 max_tree_size
-= *tree_size
;
1184 /* Create SLP_TREE nodes for the definition node/s. */
1185 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1188 unsigned old_tree_size
= this_tree_size
;
1191 if (oprnd_info
->first_dt
!= vect_internal_def
1192 && oprnd_info
->first_dt
!= vect_reduction_def
1193 && oprnd_info
->first_dt
!= vect_induction_def
)
1196 if (++this_tree_size
> max_tree_size
)
1198 if (dump_enabled_p ())
1199 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1201 "Build SLP failed: SLP tree too large\n");
1202 FOR_EACH_VEC_ELT (children
, j
, child
)
1203 vect_free_slp_tree (child
, false);
1204 vect_free_oprnd_info (oprnds_info
);
1208 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1209 group_size
, &this_max_nunits
,
1212 max_tree_size
, bst_map
)) != NULL
)
1214 /* If we have all children of child built up from scalars then just
1215 throw that away and build it up this node from scalars. */
1216 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1217 /* ??? Rejecting patterns this way doesn't work. We'd have to
1218 do extra work to cancel the pattern so the uses see the
1220 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1222 slp_tree grandchild
;
1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1225 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1230 this_tree_size
= old_tree_size
;
1231 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1232 vect_free_slp_tree (grandchild
, false);
1233 SLP_TREE_CHILDREN (child
).truncate (0);
1235 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE
, vect_location
,
1237 "Building parent vector operands from "
1238 "scalars instead\n");
1239 oprnd_info
->def_stmts
= vNULL
;
1240 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1241 children
.safe_push (child
);
1246 oprnd_info
->def_stmts
= vNULL
;
1247 children
.safe_push (child
);
1251 /* If the SLP build failed fatally and we analyze a basic-block
1252 simply treat nodes we fail to build as externally defined
1253 (and thus build vectors from the scalar defs).
1254 The cost model will reject outright expensive cases.
1255 ??? This doesn't treat cases where permutation ultimatively
1256 fails (or we don't try permutation below). Ideally we'd
1257 even compute a permutation that will end up with the maximum
1259 if (is_a
<bb_vec_info
> (vinfo
)
1261 /* ??? Rejecting patterns this way doesn't work. We'd have to
1262 do extra work to cancel the pattern so the uses see the
1264 && !is_pattern_stmt_p (stmt_info
))
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE
, vect_location
,
1268 "Building vector operands from scalars\n");
1269 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1270 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1271 children
.safe_push (child
);
1272 oprnd_info
->def_stmts
= vNULL
;
1276 /* If the SLP build for operand zero failed and operand zero
1277 and one can be commutated try that for the scalar stmts
1278 that failed the match. */
1280 /* A first scalar stmt mismatch signals a fatal mismatch. */
1282 /* ??? For COND_EXPRs we can swap the comparison operands
1283 as well as the arms under some constraints. */
1285 && oprnds_info
[1]->first_dt
== vect_internal_def
1286 && is_gimple_assign (stmt_info
->stmt
)
1287 /* Do so only if the number of not successful permutes was nor more
1288 than a cut-ff as re-trying the recursive match on
1289 possibly each level of the tree would expose exponential
1293 /* See whether we can swap the matching or the non-matching
1295 bool swap_not_matching
= true;
1298 for (j
= 0; j
< group_size
; ++j
)
1300 if (matches
[j
] != !swap_not_matching
)
1302 stmt_vec_info stmt_info
= stmts
[j
];
1303 /* Verify if we can swap operands of this stmt. */
1304 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1306 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1308 if (!swap_not_matching
)
1310 swap_not_matching
= false;
1313 /* Verify if we can safely swap or if we committed to a
1314 specific operand order already.
1315 ??? Instead of modifying GIMPLE stmts here we could
1316 record whether we want to swap operands in the SLP
1317 node and temporarily do that when processing it
1318 (or wrap operand accessors in a helper). */
1319 else if (swap
[j
] != 0
1320 || STMT_VINFO_NUM_SLP_USES (stmt_info
))
1322 if (!swap_not_matching
)
1324 if (dump_enabled_p ())
1325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1327 "Build SLP failed: cannot swap "
1328 "operands of shared stmt %G",
1332 swap_not_matching
= false;
1337 while (j
!= group_size
);
1339 /* Swap mismatched definition stmts. */
1340 if (dump_enabled_p ())
1341 dump_printf_loc (MSG_NOTE
, vect_location
,
1342 "Re-trying with swapped operands of stmts ");
1343 for (j
= 0; j
< group_size
; ++j
)
1344 if (matches
[j
] == !swap_not_matching
)
1346 std::swap (oprnds_info
[0]->def_stmts
[j
],
1347 oprnds_info
[1]->def_stmts
[j
]);
1348 if (dump_enabled_p ())
1349 dump_printf (MSG_NOTE
, "%d ", j
);
1351 if (dump_enabled_p ())
1352 dump_printf (MSG_NOTE
, "\n");
1353 /* And try again with scratch 'matches' ... */
1354 bool *tem
= XALLOCAVEC (bool, group_size
);
1355 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1356 group_size
, &this_max_nunits
,
1359 max_tree_size
, bst_map
)) != NULL
)
1361 /* ... so if successful we can apply the operand swapping
1362 to the GIMPLE IL. This is necessary because for example
1363 vect_get_slp_defs uses operand indexes and thus expects
1364 canonical operand order. This is also necessary even
1365 if we end up building the operand from scalars as
1366 we'll continue to process swapped operand two. */
1367 for (j
= 0; j
< group_size
; ++j
)
1368 gimple_set_plf (stmts
[j
]->stmt
, GF_PLF_1
, false);
1369 for (j
= 0; j
< group_size
; ++j
)
1370 if (matches
[j
] == !swap_not_matching
)
1372 gassign
*stmt
= as_a
<gassign
*> (stmts
[j
]->stmt
);
1373 /* Avoid swapping operands twice. */
1374 if (gimple_plf (stmt
, GF_PLF_1
))
1376 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1377 gimple_assign_rhs2_ptr (stmt
));
1378 gimple_set_plf (stmt
, GF_PLF_1
, true);
1380 /* Verify we swap all duplicates or none. */
1382 for (j
= 0; j
< group_size
; ++j
)
1383 gcc_assert (gimple_plf (stmts
[j
]->stmt
, GF_PLF_1
)
1384 == (matches
[j
] == !swap_not_matching
));
1386 /* If we have all children of child built up from scalars then
1387 just throw that away and build it up this node from scalars. */
1388 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1389 /* ??? Rejecting patterns this way doesn't work. We'd have
1390 to do extra work to cancel the pattern so the uses see the
1392 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1395 slp_tree grandchild
;
1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1398 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1403 this_tree_size
= old_tree_size
;
1404 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1405 vect_free_slp_tree (grandchild
, false);
1406 SLP_TREE_CHILDREN (child
).truncate (0);
1408 if (dump_enabled_p ())
1409 dump_printf_loc (MSG_NOTE
, vect_location
,
1410 "Building parent vector operands from "
1411 "scalars instead\n");
1412 oprnd_info
->def_stmts
= vNULL
;
1413 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1414 children
.safe_push (child
);
1419 oprnd_info
->def_stmts
= vNULL
;
1420 children
.safe_push (child
);
1428 gcc_assert (child
== NULL
);
1429 FOR_EACH_VEC_ELT (children
, j
, child
)
1430 vect_free_slp_tree (child
, false);
1431 vect_free_oprnd_info (oprnds_info
);
1435 vect_free_oprnd_info (oprnds_info
);
1438 *tree_size
+= this_tree_size
;
1439 *max_nunits
= this_max_nunits
;
1441 node
= vect_create_new_slp_node (stmts
);
1442 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1443 SLP_TREE_CHILDREN (node
).splice (children
);
1447 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1450 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1451 slp_tree node
, hash_set
<slp_tree
> &visited
)
1454 stmt_vec_info stmt_info
;
1457 if (visited
.add (node
))
1460 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1461 dump_user_location_t user_loc
= loc
.get_user_location ();
1462 dump_printf_loc (metadata
, user_loc
, "node%s %p\n",
1463 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1464 ? " (external)" : "", node
);
1465 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1466 dump_printf_loc (metadata
, user_loc
, "\tstmt %d %G", i
, stmt_info
->stmt
);
1467 if (SLP_TREE_CHILDREN (node
).is_empty ())
1469 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1470 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1471 dump_printf (dump_kind
, " %p", (void *)child
);
1472 dump_printf (dump_kind
, "\n");
1473 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1474 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1478 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1481 hash_set
<slp_tree
> visited
;
1482 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1485 /* Mark the tree rooted at NODE with PURE_SLP. */
1488 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1491 stmt_vec_info stmt_info
;
1494 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1497 if (visited
.add (node
))
1500 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1501 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1503 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1504 vect_mark_slp_stmts (child
, visited
);
1508 vect_mark_slp_stmts (slp_tree node
)
1510 hash_set
<slp_tree
> visited
;
1511 vect_mark_slp_stmts (node
, visited
);
1514 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1517 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1520 stmt_vec_info stmt_info
;
1523 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1526 if (visited
.add (node
))
1529 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1531 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1532 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1533 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1536 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1537 vect_mark_slp_stmts_relevant (child
, visited
);
1541 vect_mark_slp_stmts_relevant (slp_tree node
)
1543 hash_set
<slp_tree
> visited
;
1544 vect_mark_slp_stmts_relevant (node
, visited
);
1548 /* Rearrange the statements of NODE according to PERMUTATION. */
1551 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1552 vec
<unsigned> permutation
,
1553 hash_set
<slp_tree
> &visited
)
1555 stmt_vec_info stmt_info
;
1556 vec
<stmt_vec_info
> tmp_stmts
;
1560 if (visited
.add (node
))
1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1564 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1566 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1567 tmp_stmts
.create (group_size
);
1568 tmp_stmts
.quick_grow_cleared (group_size
);
1570 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1571 tmp_stmts
[permutation
[i
]] = stmt_info
;
1573 SLP_TREE_SCALAR_STMTS (node
).release ();
1574 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1578 /* Attempt to reorder stmts in a reduction chain so that we don't
1579 require any load permutation. Return true if that was possible,
1580 otherwise return false. */
1583 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1585 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1588 slp_tree node
, load
;
1590 /* Compare all the permutation sequences to the first one. We know
1591 that at least one load is permuted. */
1592 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1593 if (!node
->load_permutation
.exists ())
1595 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1597 if (!load
->load_permutation
.exists ())
1599 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1600 if (lidx
!= node
->load_permutation
[j
])
1604 /* Check that the loads in the first sequence are different and there
1605 are no gaps between them. */
1606 auto_sbitmap
load_index (group_size
);
1607 bitmap_clear (load_index
);
1608 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1610 if (lidx
>= group_size
)
1612 if (bitmap_bit_p (load_index
, lidx
))
1615 bitmap_set_bit (load_index
, lidx
);
1617 for (i
= 0; i
< group_size
; i
++)
1618 if (!bitmap_bit_p (load_index
, i
))
1621 /* This permutation is valid for reduction. Since the order of the
1622 statements in the nodes is not important unless they are memory
1623 accesses, we can rearrange the statements in all the nodes
1624 according to the order of the loads. */
1625 hash_set
<slp_tree
> visited
;
1626 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1627 node
->load_permutation
, visited
);
1629 /* We are done, no actual permutations need to be generated. */
1630 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1631 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1633 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1634 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1635 /* But we have to keep those permutations that are required because
1636 of handling of gaps. */
1637 if (known_eq (unrolling_factor
, 1U)
1638 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1639 && DR_GROUP_GAP (first_stmt_info
) == 0))
1640 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1642 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1643 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1649 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1652 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1653 hash_set
<slp_tree
> &visited
)
1655 if (visited
.add (node
))
1658 if (SLP_TREE_CHILDREN (node
).length () == 0)
1660 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1661 if (SLP_TREE_DEF_TYPE (node
) == vect_internal_def
1662 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1663 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1664 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1671 vect_gather_slp_loads (inst
, child
, visited
);
1676 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1678 hash_set
<slp_tree
> visited
;
1679 vect_gather_slp_loads (inst
, node
, visited
);
1682 /* Check if the required load permutations in the SLP instance
1683 SLP_INSTN are supported. */
1686 vect_supported_load_permutation_p (slp_instance slp_instn
)
1688 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1689 unsigned int i
, j
, k
, next
;
1692 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1695 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1696 if (node
->load_permutation
.exists ())
1697 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1698 dump_printf (MSG_NOTE
, "%d ", next
);
1700 for (k
= 0; k
< group_size
; ++k
)
1701 dump_printf (MSG_NOTE
, "%d ", k
);
1702 dump_printf (MSG_NOTE
, "\n");
1705 /* In case of reduction every load permutation is allowed, since the order
1706 of the reduction statements is not important (as opposed to the case of
1707 grouped stores). The only condition we need to check is that all the
1708 load nodes are of the same size and have the same permutation (and then
1709 rearrange all the nodes of the SLP instance according to this
1712 /* Check that all the load nodes are of the same size. */
1713 /* ??? Can't we assert this? */
1714 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1715 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1718 node
= SLP_INSTANCE_TREE (slp_instn
);
1719 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1721 /* Reduction (there are no data-refs in the root).
1722 In reduction chain the order of the loads is not important. */
1723 if (!STMT_VINFO_DATA_REF (stmt_info
)
1724 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1725 vect_attempt_slp_rearrange_stmts (slp_instn
);
1727 /* In basic block vectorization we allow any subchain of an interleaving
1729 FORNOW: not supported in loop SLP because of realignment compications. */
1730 if (STMT_VINFO_BB_VINFO (stmt_info
))
1732 /* Check whether the loads in an instance form a subchain and thus
1733 no permutation is necessary. */
1734 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1736 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1738 bool subchain_p
= true;
1739 stmt_vec_info next_load_info
= NULL
;
1740 stmt_vec_info load_info
;
1741 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1744 && (next_load_info
!= load_info
1745 || DR_GROUP_GAP (load_info
) != 1))
1750 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1753 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1756 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1757 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1758 unsigned HOST_WIDE_INT nunits
;
1759 unsigned k
, maxk
= 0;
1760 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1763 /* In BB vectorization we may not actually use a loaded vector
1764 accessing elements in excess of DR_GROUP_SIZE. */
1765 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1766 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1767 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1769 if (dump_enabled_p ())
1770 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1771 "BB vectorization with gaps at the end of "
1772 "a load is not supported\n");
1776 /* Verify the permutation can be generated. */
1779 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1780 1, slp_instn
, true, &n_perms
))
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1785 "unsupported load permutation\n");
1793 /* For loop vectorization verify we can generate the permutation. Be
1794 conservative about the vectorization factor, there are permutations
1795 that will use three vector inputs only starting from a specific factor
1796 and the vectorization factor is not yet final.
1797 ??? The SLP instance unrolling factor might not be the maximum one. */
1800 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1801 LOOP_VINFO_VECT_FACTOR
1802 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1803 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1804 if (node
->load_permutation
.exists ()
1805 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1806 slp_instn
, true, &n_perms
))
1813 /* Find the last store in SLP INSTANCE. */
1816 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1818 stmt_vec_info last
= NULL
;
1819 stmt_vec_info stmt_vinfo
;
1821 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1823 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1824 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1830 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1831 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1832 (also containing the first GROUP1_SIZE stmts, since stores are
1833 consecutive), the second containing the remainder.
1834 Return the first stmt in the second group. */
1836 static stmt_vec_info
1837 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1839 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1840 gcc_assert (group1_size
> 0);
1841 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1842 gcc_assert (group2_size
> 0);
1843 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1845 stmt_vec_info stmt_info
= first_vinfo
;
1846 for (unsigned i
= group1_size
; i
> 1; i
--)
1848 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1849 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1851 /* STMT is now the last element of the first group. */
1852 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1853 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1855 DR_GROUP_SIZE (group2
) = group2_size
;
1856 for (stmt_info
= group2
; stmt_info
;
1857 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1859 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1860 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1863 /* For the second group, the DR_GROUP_GAP is that before the original group,
1864 plus skipping over the first vector. */
1865 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1867 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1868 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1870 if (dump_enabled_p ())
1871 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1872 group1_size
, group2_size
);
1877 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1878 statements and a vector of NUNITS elements. */
1881 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1883 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1886 /* Analyze an SLP instance starting from a group of grouped stores. Call
1887 vect_build_slp_tree to build a tree of packed stmts if possible.
1888 Return FALSE if it's impossible to SLP any stmt in the loop. */
1891 vect_analyze_slp_instance (vec_info
*vinfo
,
1892 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1894 slp_instance new_instance
;
1896 unsigned int group_size
;
1897 tree vectype
, scalar_type
= NULL_TREE
;
1899 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1900 vec
<stmt_vec_info
> scalar_stmts
;
1902 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1904 scalar_type
= TREE_TYPE (DR_REF (dr
));
1905 vectype
= get_vectype_for_scalar_type (scalar_type
);
1906 group_size
= DR_GROUP_SIZE (stmt_info
);
1908 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1910 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1911 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1912 group_size
= REDUC_GROUP_SIZE (stmt_info
);
1916 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1917 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1918 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1923 if (dump_enabled_p ())
1924 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1925 "Build SLP failed: unsupported data-type %T\n",
1930 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1932 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1933 scalar_stmts
.create (group_size
);
1934 stmt_vec_info next_info
= stmt_info
;
1935 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1937 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1940 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
1941 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
1944 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1946 /* Collect the reduction stmts and store them in
1947 SLP_TREE_SCALAR_STMTS. */
1950 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
1951 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
1953 /* Mark the first element of the reduction chain as reduction to properly
1954 transform the node. In the reduction analysis phase only the last
1955 element of the chain is marked as reduction. */
1956 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
1957 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
1958 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
1962 /* Collect reduction statements. */
1963 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1964 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
1965 scalar_stmts
.safe_push (next_info
);
1968 /* Build the tree for the SLP instance. */
1969 bool *matches
= XALLOCAVEC (bool, group_size
);
1970 unsigned npermutes
= 0;
1971 scalar_stmts_to_slp_tree_map_t
*bst_map
1972 = new scalar_stmts_to_slp_tree_map_t ();
1973 poly_uint64 max_nunits
= nunits
;
1974 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
1975 &max_nunits
, matches
, &npermutes
,
1976 NULL
, max_tree_size
, bst_map
);
1977 /* The map keeps a reference on SLP nodes built, release that. */
1978 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
1979 it
!= bst_map
->end (); ++it
)
1981 vect_free_slp_tree ((*it
).second
, false);
1985 /* Calculate the unrolling factor based on the smallest type. */
1986 poly_uint64 unrolling_factor
1987 = calculate_unrolling_factor (max_nunits
, group_size
);
1989 if (maybe_ne (unrolling_factor
, 1U)
1990 && is_a
<bb_vec_info
> (vinfo
))
1992 unsigned HOST_WIDE_INT const_max_nunits
;
1993 if (!max_nunits
.is_constant (&const_max_nunits
)
1994 || const_max_nunits
> group_size
)
1996 if (dump_enabled_p ())
1997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1998 "Build SLP failed: store group "
1999 "size not a multiple of the vector size "
2000 "in basic block SLP\n");
2001 vect_free_slp_tree (node
, false);
2004 /* Fatal mismatch. */
2005 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2006 vect_free_slp_tree (node
, false);
2010 /* Create a new SLP instance. */
2011 new_instance
= XNEW (struct _slp_instance
);
2012 SLP_INSTANCE_TREE (new_instance
) = node
;
2013 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2014 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2015 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2016 vect_gather_slp_loads (new_instance
, node
);
2018 /* Compute the load permutation. */
2020 bool loads_permuted
= false;
2021 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2023 vec
<unsigned> load_permutation
;
2025 stmt_vec_info load_info
;
2026 bool this_load_permuted
= false;
2027 load_permutation
.create (group_size
);
2028 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2029 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2030 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2032 int load_place
= vect_get_place_in_interleaving_chain
2033 (load_info
, first_stmt_info
);
2034 gcc_assert (load_place
!= -1);
2035 if (load_place
!= j
)
2036 this_load_permuted
= true;
2037 load_permutation
.safe_push (load_place
);
2039 if (!this_load_permuted
2040 /* The load requires permutation when unrolling exposes
2041 a gap either because the group is larger than the SLP
2042 group-size or because there is a gap between the groups. */
2043 && (known_eq (unrolling_factor
, 1U)
2044 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2045 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2047 load_permutation
.release ();
2050 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2051 loads_permuted
= true;
2056 if (!vect_supported_load_permutation_p (new_instance
))
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2060 "Build SLP failed: unsupported load "
2061 "permutation %G", stmt_info
->stmt
);
2062 vect_free_slp_instance (new_instance
, false);
2067 /* If the loads and stores can be handled with load/store-lan
2068 instructions do not generate this SLP instance. */
2069 if (is_a
<loop_vec_info
> (vinfo
)
2071 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2074 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2076 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2077 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2078 /* Use SLP for strided accesses (or if we can't load-lanes). */
2079 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2080 || ! vect_load_lanes_supported
2081 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2082 DR_GROUP_SIZE (stmt_vinfo
), false))
2085 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2089 "Built SLP cancelled: can use "
2090 "load/store-lanes\n");
2091 vect_free_slp_instance (new_instance
, false);
2096 vinfo
->slp_instances
.safe_push (new_instance
);
2098 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_NOTE
, vect_location
,
2101 "Final SLP tree for instance:\n");
2102 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2110 /* Failed to SLP. */
2111 /* Free the allocated memory. */
2112 scalar_stmts
.release ();
2115 /* For basic block SLP, try to break the group up into multiples of the
2117 unsigned HOST_WIDE_INT const_nunits
;
2118 if (is_a
<bb_vec_info
> (vinfo
)
2119 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2120 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2121 && nunits
.is_constant (&const_nunits
))
2123 /* We consider breaking the group only on VF boundaries from the existing
2125 for (i
= 0; i
< group_size
; i
++)
2126 if (!matches
[i
]) break;
2128 if (i
>= const_nunits
&& i
< group_size
)
2130 /* Split into two groups at the first vector boundary before i. */
2131 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2132 unsigned group1_size
= i
& ~(const_nunits
- 1);
2134 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2136 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2138 /* If the first non-match was in the middle of a vector,
2139 skip the rest of that vector. */
2140 if (group1_size
< i
)
2142 i
= group1_size
+ const_nunits
;
2144 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2147 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2150 /* Even though the first vector did not all match, we might be able to SLP
2151 (some) of the remainder. FORNOW ignore this possibility. */
2158 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2159 trees of packed scalar stmts if SLP is possible. */
2162 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2165 stmt_vec_info first_element
;
2167 DUMP_VECT_SCOPE ("vect_analyze_slp");
2169 /* Find SLP sequences starting from groups of grouped stores. */
2170 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2171 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2173 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2175 if (loop_vinfo
->reduction_chains
.length () > 0)
2177 /* Find SLP sequences starting from reduction chains. */
2178 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2179 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2182 /* Dissolve reduction chain group. */
2183 stmt_vec_info vinfo
= first_element
;
2186 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2187 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2188 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2191 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2195 /* Find SLP sequences starting from groups of reductions. */
2196 if (loop_vinfo
->reductions
.length () > 1)
2197 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2201 return opt_result::success ();
2205 /* For each possible SLP instance decide whether to SLP it and calculate overall
2206 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2207 least one instance. */
2210 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2213 poly_uint64 unrolling_factor
= 1;
2214 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2215 slp_instance instance
;
2216 int decided_to_slp
= 0;
2218 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2220 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2222 /* FORNOW: SLP if you can. */
2223 /* All unroll factors have the form current_vector_size * X for some
2224 rational X, so they must have a common multiple. */
2226 = force_common_multiple (unrolling_factor
,
2227 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2229 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2230 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2231 loop-based vectorization. Such stmts will be marked as HYBRID. */
2232 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2236 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2238 if (decided_to_slp
&& dump_enabled_p ())
2240 dump_printf_loc (MSG_NOTE
, vect_location
,
2241 "Decided to SLP %d instances. Unrolling factor ",
2243 dump_dec (MSG_NOTE
, unrolling_factor
);
2244 dump_printf (MSG_NOTE
, "\n");
2247 return (decided_to_slp
> 0);
2251 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2252 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2255 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2256 hash_map
<slp_tree
, unsigned> &visited
)
2258 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2259 imm_use_iterator imm_iter
;
2261 stmt_vec_info use_vinfo
;
2263 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2266 /* We need to union stype over the incoming graph edges but we still
2267 want to limit recursion to stay O(N+E). */
2268 bool only_edge
= (++visited
.get_or_insert (node
) < node
->refcnt
);
2270 /* Propagate hybrid down the SLP tree. */
2271 if (stype
== hybrid
)
2273 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2275 else if (!only_edge
)
2277 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2278 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2279 /* If we get a pattern stmt here we have to use the LHS of the
2280 original stmt for immediate uses. */
2281 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2283 if (gimple_code (stmt
) == GIMPLE_PHI
)
2284 def
= gimple_phi_result (stmt
);
2286 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2288 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2290 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2293 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2294 if (!STMT_SLP_TYPE (use_vinfo
)
2295 && (STMT_VINFO_RELEVANT (use_vinfo
)
2296 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2297 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2298 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2302 "def in non-SLP stmt: %G", use_stmt
);
2309 && !HYBRID_SLP_STMT (stmt_vinfo
))
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2314 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2318 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2319 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2320 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2324 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2326 hash_map
<slp_tree
, unsigned> visited
;
2327 vect_detect_hybrid_slp_stmts (node
, i
, stype
, visited
);
2330 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2333 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2335 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2336 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2341 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2342 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2344 if (dump_enabled_p ())
2345 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2346 def_stmt_info
->stmt
);
2347 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2354 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2357 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2358 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2359 /* If the stmt is in a SLP instance then this isn't a reason
2360 to mark use definitions in other SLP instances as hybrid. */
2361 if (! STMT_SLP_TYPE (use_vinfo
)
2362 && (STMT_VINFO_RELEVANT (use_vinfo
)
2363 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2364 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2365 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2372 /* Find stmts that must be both vectorized and SLPed. */
2375 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2378 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2379 slp_instance instance
;
2381 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2383 /* First walk all pattern stmt in the loop and mark defs of uses as
2384 hybrid because immediate uses in them are not recorded. */
2385 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2387 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2388 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2391 gimple
*stmt
= gsi_stmt (gsi
);
2392 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2393 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2396 memset (&wi
, 0, sizeof (wi
));
2397 wi
.info
= loop_vinfo
;
2398 gimple_stmt_iterator gsi2
2399 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2400 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2401 vect_detect_hybrid_slp_1
, &wi
);
2402 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2403 vect_detect_hybrid_slp_2
,
2404 vect_detect_hybrid_slp_1
, &wi
);
2409 /* Then walk the SLP instance trees marking stmts with uses in
2410 non-SLP stmts as hybrid, also propagating hybrid down the
2411 SLP tree, collecting the above info on-the-fly. */
2412 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2414 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2415 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2421 /* Initialize a bb_vec_info struct for the statements between
2422 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2424 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2425 gimple_stmt_iterator region_end_in
,
2426 vec_info_shared
*shared
)
2427 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2428 bb (gsi_bb (region_begin_in
)),
2429 region_begin (region_begin_in
),
2430 region_end (region_end_in
)
2432 gimple_stmt_iterator gsi
;
2434 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2437 gimple
*stmt
= gsi_stmt (gsi
);
2438 gimple_set_uid (stmt
, 0);
2446 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2447 stmts in the basic block. */
2449 _bb_vec_info::~_bb_vec_info ()
2451 for (gimple_stmt_iterator si
= region_begin
;
2452 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2453 /* Reset region marker. */
2454 gimple_set_uid (gsi_stmt (si
), -1);
2459 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2460 given then that child nodes have already been processed, and that
2461 their def types currently match their SLP node's def type. */
2464 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2465 slp_instance node_instance
,
2466 stmt_vector_for_cost
*cost_vec
)
2468 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2469 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2471 /* For BB vectorization vector types are assigned here.
2472 Memory accesses already got their vector type assigned
2473 in vect_analyze_data_refs. */
2474 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2476 && ! STMT_VINFO_DATA_REF (stmt_info
))
2478 tree vectype
, nunits_vectype
;
2479 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2481 /* We checked this when building the node. */
2483 if (vectype
== boolean_type_node
)
2485 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2487 /* vect_get_mask_type_for_stmt has already explained the
2492 stmt_vec_info sstmt_info
;
2494 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2495 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2498 /* Calculate the number of vector statements to be created for the
2499 scalar stmts in this node. For SLP reductions it is equal to the
2500 number of vector statements in the children (which has already been
2501 calculated by the recursive call). Otherwise it is the number of
2502 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2503 VF divided by the number of elements in a vector. */
2504 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2505 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2506 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2507 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2511 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2512 vf
= loop_vinfo
->vectorization_factor
;
2515 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2516 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2517 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2518 = vect_get_num_vectors (vf
* group_size
, vectype
);
2522 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2525 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2526 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2528 Return true if the operations are supported. */
2531 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2532 slp_instance node_instance
,
2533 scalar_stmts_to_slp_tree_map_t
*visited
,
2534 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2535 stmt_vector_for_cost
*cost_vec
)
2540 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2543 /* If we already analyzed the exact same set of scalar stmts we're done.
2544 We share the generated vector stmts for those. */
2546 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2547 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2549 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2550 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2554 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2555 doesn't result in any issue since we throw away the lvisited set
2557 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2559 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2560 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2561 visited
, lvisited
, cost_vec
))
2564 /* ??? We have to catch the case late where two first scalar stmts appear
2565 in multiple SLP children with different def type and fail. Remember
2566 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2567 match it when that is vect_internal_def. */
2568 auto_vec
<vect_def_type
, 4> dt
;
2569 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2570 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2571 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2573 /* Push SLP node def-type to stmt operands. */
2574 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2575 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2576 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2577 = SLP_TREE_DEF_TYPE (child
);
2579 /* Check everything worked out. */
2581 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2582 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2584 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2585 != SLP_TREE_DEF_TYPE (child
))
2588 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) != dt
[j
])
2590 if (!res
&& dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2592 "not vectorized: same operand with different "
2593 "def type in stmt.\n");
2596 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2599 /* Restore def-types. */
2600 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2601 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2607 /* Analyze statements in SLP instances of VINFO. Return true if the
2608 operations are supported. */
2611 vect_slp_analyze_operations (vec_info
*vinfo
)
2613 slp_instance instance
;
2616 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2618 scalar_stmts_to_slp_tree_map_t
*visited
2619 = new scalar_stmts_to_slp_tree_map_t ();
2620 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2622 scalar_stmts_to_slp_tree_map_t lvisited
;
2623 stmt_vector_for_cost cost_vec
;
2624 cost_vec
.create (2);
2625 if (!vect_slp_analyze_node_operations (vinfo
,
2626 SLP_INSTANCE_TREE (instance
),
2627 instance
, visited
, &lvisited
,
2630 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2631 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2632 if (dump_enabled_p ())
2633 dump_printf_loc (MSG_NOTE
, vect_location
,
2634 "removing SLP instance operations starting from: %G",
2636 vect_free_slp_instance (instance
, false);
2637 vinfo
->slp_instances
.ordered_remove (i
);
2638 cost_vec
.release ();
2642 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2643 x
!= lvisited
.end(); ++x
)
2644 visited
->put ((*x
).first
.copy (), (*x
).second
);
2647 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2648 cost_vec
.release ();
2653 return !vinfo
->slp_instances
.is_empty ();
2657 /* Compute the scalar cost of the SLP node NODE and its children
2658 and return it. Do not account defs that are marked in LIFE and
2659 update LIFE according to uses of NODE. */
2662 vect_bb_slp_scalar_cost (basic_block bb
,
2663 slp_tree node
, vec
<bool, va_heap
> *life
,
2664 stmt_vector_for_cost
*cost_vec
,
2665 hash_set
<slp_tree
> &visited
)
2668 stmt_vec_info stmt_info
;
2671 if (visited
.add (node
))
2674 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2676 gimple
*stmt
= stmt_info
->stmt
;
2677 vec_info
*vinfo
= stmt_info
->vinfo
;
2678 ssa_op_iter op_iter
;
2679 def_operand_p def_p
;
2684 /* If there is a non-vectorized use of the defs then the scalar
2685 stmt is kept live in which case we do not account it or any
2686 required defs in the SLP children in the scalar cost. This
2687 way we make the vectorization more costly when compared to
2689 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2691 imm_use_iterator use_iter
;
2693 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2694 if (!is_gimple_debug (use_stmt
))
2696 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2697 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2700 BREAK_FROM_IMM_USE_STMT (use_iter
);
2707 /* Count scalar stmts only once. */
2708 if (gimple_visited_p (stmt
))
2710 gimple_set_visited (stmt
, true);
2712 vect_cost_for_stmt kind
;
2713 if (STMT_VINFO_DATA_REF (stmt_info
))
2715 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2718 kind
= scalar_store
;
2722 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2725 auto_vec
<bool, 20> subtree_life
;
2726 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2728 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2730 /* Do not directly pass LIFE to the recursive call, copy it to
2731 confine changes in the callee to the current child/subtree. */
2732 subtree_life
.safe_splice (*life
);
2733 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
2735 subtree_life
.truncate (0);
2741 vect_bb_slp_scalar_cost (basic_block bb
,
2742 slp_tree node
, vec
<bool, va_heap
> *life
,
2743 stmt_vector_for_cost
*cost_vec
)
2745 hash_set
<slp_tree
> visited
;
2746 vect_bb_slp_scalar_cost (bb
, node
, life
, cost_vec
, visited
);
2749 /* Check if vectorization of the basic block is profitable. */
2752 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2754 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2755 slp_instance instance
;
2757 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2758 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2760 /* Calculate scalar cost. */
2761 stmt_vector_for_cost scalar_costs
;
2762 scalar_costs
.create (0);
2763 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2765 auto_vec
<bool, 20> life
;
2766 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2767 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2768 SLP_INSTANCE_TREE (instance
),
2769 &life
, &scalar_costs
);
2771 void *target_cost_data
= init_cost (NULL
);
2772 add_stmt_costs (target_cost_data
, &scalar_costs
);
2773 scalar_costs
.release ();
2775 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2776 destroy_cost_data (target_cost_data
);
2778 /* Unset visited flag. */
2779 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2780 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2781 gimple_set_visited (gsi_stmt (gsi
), false);
2783 /* Complete the target-specific cost calculation. */
2784 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2785 &vec_inside_cost
, &vec_epilogue_cost
);
2787 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2789 if (dump_enabled_p ())
2791 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2792 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2794 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2795 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2796 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2799 /* Vectorization is profitable if its cost is more than the cost of scalar
2800 version. Note that we err on the vector side for equal cost because
2801 the cost estimate is otherwise quite pessimistic (constant uses are
2802 free on the scalar side but cost a load on the vector side for
2804 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2810 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2811 if so and sets fatal to true if failure is independent of
2812 current_vector_size. */
2815 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2816 gimple_stmt_iterator region_end
,
2817 vec
<data_reference_p
> datarefs
, int n_stmts
,
2818 bool &fatal
, vec_info_shared
*shared
)
2820 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2822 bb_vec_info bb_vinfo
;
2823 slp_instance instance
;
2825 poly_uint64 min_vf
= 2;
2827 /* The first group of checks is independent of the vector size. */
2830 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2834 "not vectorized: too many instructions in "
2836 free_data_refs (datarefs
);
2840 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, shared
);
2844 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2845 bb_vinfo
->shared
->save_datarefs ();
2847 /* Analyze the data references. */
2849 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2851 if (dump_enabled_p ())
2852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2853 "not vectorized: unhandled data-ref in basic "
2860 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2864 "not vectorized: not enough data-refs in "
2871 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2875 "not vectorized: unhandled data access in "
2882 /* If there are no grouped stores in the region there is no need
2883 to continue with pattern recog as vect_analyze_slp will fail
2885 if (bb_vinfo
->grouped_stores
.is_empty ())
2887 if (dump_enabled_p ())
2888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2889 "not vectorized: no grouped stores in "
2896 /* While the rest of the analysis below depends on it in some way. */
2899 vect_pattern_recog (bb_vinfo
);
2901 /* Check the SLP opportunities in the basic block, analyze and build SLP
2903 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2905 if (dump_enabled_p ())
2907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2908 "Failed to SLP the basic block.\n");
2909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2910 "not vectorized: failed to find SLP opportunities "
2911 "in basic block.\n");
2918 vect_record_base_alignments (bb_vinfo
);
2920 /* Analyze and verify the alignment of data references and the
2921 dependence in the SLP instances. */
2922 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2924 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2925 || ! vect_slp_analyze_instance_dependence (instance
))
2927 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2928 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2929 if (dump_enabled_p ())
2930 dump_printf_loc (MSG_NOTE
, vect_location
,
2931 "removing SLP instance operations starting from: %G",
2933 vect_free_slp_instance (instance
, false);
2934 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2938 /* Mark all the statements that we want to vectorize as pure SLP and
2940 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2941 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2945 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
2951 if (!vect_slp_analyze_operations (bb_vinfo
))
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2955 "not vectorized: bad operation in basic block.\n");
2961 /* Cost model: check if the vectorization is worthwhile. */
2962 if (!unlimited_cost_model (NULL
)
2963 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2965 if (dump_enabled_p ())
2966 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2967 "not vectorized: vectorization is not "
2974 if (dump_enabled_p ())
2975 dump_printf_loc (MSG_NOTE
, vect_location
,
2976 "Basic block will be vectorized using SLP\n");
2982 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2983 true if anything in the basic-block was vectorized. */
2986 vect_slp_bb (basic_block bb
)
2988 bb_vec_info bb_vinfo
;
2989 gimple_stmt_iterator gsi
;
2990 bool any_vectorized
= false;
2991 auto_vector_sizes vector_sizes
;
2993 /* Autodetect first vector size we try. */
2994 current_vector_size
= 0;
2995 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
2996 unsigned int next_size
= 0;
2998 gsi
= gsi_start_bb (bb
);
3000 poly_uint64 autodetected_vector_size
= 0;
3003 if (gsi_end_p (gsi
))
3006 gimple_stmt_iterator region_begin
= gsi
;
3007 vec
<data_reference_p
> datarefs
= vNULL
;
3010 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3012 gimple
*stmt
= gsi_stmt (gsi
);
3013 if (is_gimple_debug (stmt
))
3017 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3018 vect_location
= stmt
;
3020 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3024 /* Skip leading unhandled stmts. */
3025 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3031 gimple_stmt_iterator region_end
= gsi
;
3033 bool vectorized
= false;
3035 vec_info_shared shared
;
3036 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3037 datarefs
, insns
, fatal
, &shared
);
3039 && dbg_cnt (vect_slp
))
3041 if (dump_enabled_p ())
3042 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3044 bb_vinfo
->shared
->check_datarefs ();
3045 vect_schedule_slp (bb_vinfo
);
3047 unsigned HOST_WIDE_INT bytes
;
3048 if (dump_enabled_p ())
3050 if (current_vector_size
.is_constant (&bytes
))
3051 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3052 "basic block part vectorized using %wu byte "
3053 "vectors\n", bytes
);
3055 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3056 "basic block part vectorized using variable "
3057 "length vectors\n");
3064 any_vectorized
|= vectorized
;
3067 autodetected_vector_size
= current_vector_size
;
3069 if (next_size
< vector_sizes
.length ()
3070 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3074 || next_size
== vector_sizes
.length ()
3075 || known_eq (current_vector_size
, 0U)
3076 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3077 vector sizes will fail do not bother iterating. */
3080 if (gsi_end_p (region_end
))
3083 /* Skip the unhandled stmt. */
3086 /* And reset vector sizes. */
3087 current_vector_size
= 0;
3092 /* Try the next biggest vector size. */
3093 current_vector_size
= vector_sizes
[next_size
++];
3094 if (dump_enabled_p ())
3096 dump_printf_loc (MSG_NOTE
, vect_location
,
3097 "***** Re-trying analysis with "
3099 dump_dec (MSG_NOTE
, current_vector_size
);
3100 dump_printf (MSG_NOTE
, "\n");
3108 return any_vectorized
;
3112 /* Return 1 if vector type of boolean constant which is OPNUM
3113 operand in statement STMT_VINFO is a boolean vector. */
3116 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
, int opnum
)
3118 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3120 enum vect_def_type dt
;
3122 /* For comparison and COND_EXPR type is chosen depending
3123 on the other comparison operand. */
3124 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3126 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3128 op
= gimple_assign_rhs1 (stmt
);
3130 op
= gimple_assign_rhs2 (stmt
);
3132 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3135 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3138 if (code
== COND_EXPR
)
3140 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3141 tree cond
= gimple_assign_rhs1 (stmt
);
3143 if (TREE_CODE (cond
) == SSA_NAME
)
3146 op
= TREE_OPERAND (cond
, 1);
3148 op
= TREE_OPERAND (cond
, 0);
3150 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3153 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3156 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3159 /* Build a variable-length vector in which the elements in ELTS are repeated
3160 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3161 RESULTS and add any new instructions to SEQ.
3163 The approach we use is:
3165 (1) Find a vector mode VM with integer elements of mode IM.
3167 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3168 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3169 from small vectors to IM.
3171 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3173 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3174 correct byte contents.
3176 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3178 We try to find the largest IM for which this sequence works, in order
3179 to cut down on the number of interleaves. */
3182 duplicate_and_interleave (gimple_seq
*seq
, tree vector_type
, vec
<tree
> elts
,
3183 unsigned int nresults
, vec
<tree
> &results
)
3185 unsigned int nelts
= elts
.length ();
3186 tree element_type
= TREE_TYPE (vector_type
);
3188 /* (1) Find a vector mode VM with integer elements of mode IM. */
3189 unsigned int nvectors
= 1;
3190 tree new_vector_type
;
3192 if (!can_duplicate_and_interleave_p (nelts
, TYPE_MODE (element_type
),
3193 &nvectors
, &new_vector_type
,
3197 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3198 unsigned int partial_nelts
= nelts
/ nvectors
;
3199 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3201 tree_vector_builder partial_elts
;
3202 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3203 pieces
.quick_grow (nvectors
* 2);
3204 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3206 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3207 ELTS' has mode IM. */
3208 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3209 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3210 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3211 tree t
= gimple_build_vector (seq
, &partial_elts
);
3212 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3213 TREE_TYPE (new_vector_type
), t
);
3215 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3216 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3219 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3220 correct byte contents.
3222 We need to repeat the following operation log2(nvectors) times:
3224 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3225 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3227 However, if each input repeats every N elements and the VF is
3228 a multiple of N * 2, the HI result is the same as the LO. */
3229 unsigned int in_start
= 0;
3230 unsigned int out_start
= nvectors
;
3231 unsigned int hi_start
= nvectors
/ 2;
3232 /* A bound on the number of outputs needed to produce NRESULTS results
3233 in the final iteration. */
3234 unsigned int noutputs_bound
= nvectors
* nresults
;
3235 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3237 noutputs_bound
/= 2;
3238 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3239 for (unsigned int i
= 0; i
< limit
; ++i
)
3242 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3245 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3249 tree output
= make_ssa_name (new_vector_type
);
3250 tree input1
= pieces
[in_start
+ (i
/ 2)];
3251 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3252 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3255 gimple_seq_add_stmt (seq
, stmt
);
3256 pieces
[out_start
+ i
] = output
;
3258 std::swap (in_start
, out_start
);
3261 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3262 results
.reserve (nresults
);
3263 for (unsigned int i
= 0; i
< nresults
; ++i
)
3265 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3266 pieces
[in_start
+ i
]));
3268 results
.quick_push (results
[i
- nvectors
]);
3272 /* For constant and loop invariant defs of SLP_NODE this function returns
3273 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3274 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3275 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3276 REDUC_INDEX is the index of the reduction operand in the statements, unless
3280 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3281 vec
<tree
> *vec_oprnds
,
3282 unsigned int op_num
, unsigned int number_of_vectors
)
3284 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3285 stmt_vec_info stmt_vinfo
= stmts
[0];
3286 gimple
*stmt
= stmt_vinfo
->stmt
;
3287 unsigned HOST_WIDE_INT nunits
;
3289 unsigned j
, number_of_places_left_in_vector
;
3292 int group_size
= stmts
.length ();
3293 unsigned int vec_num
, i
;
3294 unsigned number_of_copies
= 1;
3296 voprnds
.create (number_of_vectors
);
3297 bool constant_p
, is_store
;
3298 tree neutral_op
= NULL
;
3299 enum tree_code code
= gimple_expr_code (stmt
);
3300 gimple_seq ctor_seq
= NULL
;
3301 auto_vec
<tree
, 16> permute_results
;
3303 /* Check if vector type is a boolean vector. */
3304 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3305 && vect_mask_constant_operand_p (stmt_vinfo
, op_num
))
3307 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3309 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3311 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3314 op
= gimple_assign_rhs1 (stmt
);
3321 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3322 created vectors. It is greater than 1 if unrolling is performed.
3324 For example, we have two scalar operands, s1 and s2 (e.g., group of
3325 strided accesses of size two), while NUNITS is four (i.e., four scalars
3326 of this type can be packed in a vector). The output vector will contain
3327 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3330 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3331 containing the operands.
3333 For example, NUNITS is four as before, and the group size is 8
3334 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3335 {s5, s6, s7, s8}. */
3337 /* When using duplicate_and_interleave, we just need one element for
3338 each scalar statement. */
3339 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3340 nunits
= group_size
;
3342 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3344 number_of_places_left_in_vector
= nunits
;
3346 tree_vector_builder
elts (vector_type
, nunits
, 1);
3347 elts
.quick_grow (nunits
);
3348 bool place_after_defs
= false;
3349 for (j
= 0; j
< number_of_copies
; j
++)
3351 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt_vinfo
); i
--)
3353 stmt
= stmt_vinfo
->stmt
;
3355 op
= gimple_assign_rhs1 (stmt
);
3362 tree cond
= gimple_assign_rhs1 (stmt
);
3363 if (TREE_CODE (cond
) == SSA_NAME
)
3364 op
= gimple_op (stmt
, op_num
+ 1);
3365 else if (op_num
== 0 || op_num
== 1)
3366 op
= TREE_OPERAND (cond
, op_num
);
3370 op
= gimple_assign_rhs2 (stmt
);
3372 op
= gimple_assign_rhs3 (stmt
);
3378 op
= gimple_call_arg (stmt
, op_num
);
3385 op
= gimple_op (stmt
, op_num
+ 1);
3386 /* Unlike the other binary operators, shifts/rotates have
3387 the shift count being int, instead of the same type as
3388 the lhs, so make sure the scalar is the right type if
3389 we are dealing with vectors of
3390 long long/long/short/char. */
3391 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3392 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3396 op
= gimple_op (stmt
, op_num
+ 1);
3401 /* Create 'vect_ = {op0,op1,...,opn}'. */
3402 number_of_places_left_in_vector
--;
3404 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3406 if (CONSTANT_CLASS_P (op
))
3408 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3410 /* Can't use VIEW_CONVERT_EXPR for booleans because
3411 of possibly different sizes of scalar value and
3413 if (integer_zerop (op
))
3414 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3415 else if (integer_onep (op
))
3416 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3421 op
= fold_unary (VIEW_CONVERT_EXPR
,
3422 TREE_TYPE (vector_type
), op
);
3423 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3427 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3429 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3432 = build_all_ones_cst (TREE_TYPE (vector_type
));
3434 = build_zero_cst (TREE_TYPE (vector_type
));
3435 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3436 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3442 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3445 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3448 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3452 elts
[number_of_places_left_in_vector
] = op
;
3453 if (!CONSTANT_CLASS_P (op
))
3455 if (TREE_CODE (orig_op
) == SSA_NAME
3456 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3457 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3458 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3459 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3460 place_after_defs
= true;
3462 if (number_of_places_left_in_vector
== 0)
3465 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3466 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3467 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3470 if (vec_oprnds
->is_empty ())
3471 duplicate_and_interleave (&ctor_seq
, vector_type
, elts
,
3474 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3477 gimple_stmt_iterator gsi
;
3478 if (place_after_defs
)
3480 stmt_vec_info last_stmt_info
3481 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3482 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3483 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3487 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3489 if (ctor_seq
!= NULL
)
3491 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3492 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3495 voprnds
.quick_push (init
);
3496 place_after_defs
= false;
3497 number_of_places_left_in_vector
= nunits
;
3499 elts
.new_vector (vector_type
, nunits
, 1);
3500 elts
.quick_grow (nunits
);
3505 /* Since the vectors are created in the reverse order, we should invert
3507 vec_num
= voprnds
.length ();
3508 for (j
= vec_num
; j
!= 0; j
--)
3510 vop
= voprnds
[j
- 1];
3511 vec_oprnds
->quick_push (vop
);
3516 /* In case that VF is greater than the unrolling factor needed for the SLP
3517 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3518 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3519 to replicate the vectors. */
3520 while (number_of_vectors
> vec_oprnds
->length ())
3522 tree neutral_vec
= NULL
;
3527 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3529 vec_oprnds
->quick_push (neutral_vec
);
3533 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3534 vec_oprnds
->quick_push (vop
);
3540 /* Get vectorized definitions from SLP_NODE that contains corresponding
3541 vectorized def-stmts. */
3544 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3547 stmt_vec_info vec_def_stmt_info
;
3550 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3552 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3554 gcc_assert (vec_def_stmt_info
);
3555 if (gphi
*vec_def_phi
= dyn_cast
<gphi
*> (vec_def_stmt_info
->stmt
))
3556 vec_oprnd
= gimple_phi_result (vec_def_phi
);
3558 vec_oprnd
= gimple_get_lhs (vec_def_stmt_info
->stmt
);
3559 vec_oprnds
->quick_push (vec_oprnd
);
3564 /* Get vectorized definitions for SLP_NODE.
3565 If the scalar definitions are loop invariants or constants, collect them and
3566 call vect_get_constant_vectors() to create vector stmts.
3567 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3568 must be stored in the corresponding child of SLP_NODE, and we call
3569 vect_get_slp_vect_defs () to retrieve them. */
3572 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3573 vec
<vec
<tree
> > *vec_oprnds
)
3575 int number_of_vects
= 0, i
;
3576 unsigned int child_index
= 0;
3577 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3578 slp_tree child
= NULL
;
3581 bool vectorized_defs
;
3583 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3584 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3586 /* For each operand we check if it has vectorized definitions in a child
3587 node or we need to create them (for invariants and constants). We
3588 check if the LHS of the first stmt of the next child matches OPRND.
3589 If it does, we found the correct child. Otherwise, we call
3590 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3591 to check this child node for the next operand. */
3592 vectorized_defs
= false;
3593 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3595 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3597 /* We have to check both pattern and original def, if available. */
3598 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3600 stmt_vec_info first_def_info
= SLP_TREE_SCALAR_STMTS (child
)[0];
3601 stmt_vec_info related
= STMT_VINFO_RELATED_STMT (first_def_info
);
3604 if (gphi
*first_def
= dyn_cast
<gphi
*> (first_def_info
->stmt
))
3605 first_def_op
= gimple_phi_result (first_def
);
3607 first_def_op
= gimple_get_lhs (first_def_info
->stmt
);
3608 if (operand_equal_p (oprnd
, first_def_op
, 0)
3610 && operand_equal_p (oprnd
,
3611 gimple_get_lhs (related
->stmt
), 0)))
3613 /* The number of vector defs is determined by the number of
3614 vector statements in the node from which we get those
3616 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3617 vectorized_defs
= true;
3625 if (!vectorized_defs
)
3629 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3630 /* Number of vector stmts was calculated according to LHS in
3631 vect_schedule_slp_instance (), fix it by replacing LHS with
3632 RHS, if necessary. See vect_get_smallest_scalar_type () for
3634 vect_get_smallest_scalar_type (first_stmt_info
, &lhs_size_unit
,
3636 if (rhs_size_unit
!= lhs_size_unit
)
3638 number_of_vects
*= rhs_size_unit
;
3639 number_of_vects
/= lhs_size_unit
;
3644 /* Allocate memory for vectorized defs. */
3646 vec_defs
.create (number_of_vects
);
3648 /* For reduction defs we call vect_get_constant_vectors (), since we are
3649 looking for initial loop invariant values. */
3650 if (vectorized_defs
)
3651 /* The defs are already vectorized. */
3652 vect_get_slp_vect_defs (child
, &vec_defs
);
3654 /* Build vectors from scalar defs. */
3655 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3658 vec_oprnds
->quick_push (vec_defs
);
3662 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3663 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3664 permute statements for the SLP node NODE of the SLP instance
3665 SLP_NODE_INSTANCE. */
3668 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3669 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3670 slp_instance slp_node_instance
, bool analyze_only
,
3673 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3674 vec_info
*vinfo
= stmt_info
->vinfo
;
3676 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3677 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3678 unsigned int mask_element
;
3681 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3684 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3686 mode
= TYPE_MODE (vectype
);
3687 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3689 /* Initialize the vect stmts of NODE to properly insert the generated
3692 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3693 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3694 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3696 /* Generate permutation masks for every NODE. Number of masks for each NODE
3697 is equal to GROUP_SIZE.
3698 E.g., we have a group of three nodes with three loads from the same
3699 location in each node, and the vector size is 4. I.e., we have a
3700 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3701 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3702 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3705 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3706 The last mask is illegal since we assume two operands for permute
3707 operation, and the mask element values can't be outside that range.
3708 Hence, the last mask must be converted into {2,5,5,5}.
3709 For the first two permutations we need the first and the second input
3710 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3711 we need the second and the third vectors: {b1,c1,a2,b2} and
3714 int vect_stmts_counter
= 0;
3715 unsigned int index
= 0;
3716 int first_vec_index
= -1;
3717 int second_vec_index
= -1;
3721 vec_perm_builder mask
;
3722 unsigned int nelts_to_build
;
3723 unsigned int nvectors_per_build
;
3724 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3725 && multiple_p (nunits
, group_size
));
3728 /* A single vector contains a whole number of copies of the node, so:
3729 (a) all permutes can use the same mask; and
3730 (b) the permutes only need a single vector input. */
3731 mask
.new_vector (nunits
, group_size
, 3);
3732 nelts_to_build
= mask
.encoded_nelts ();
3733 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3737 /* We need to construct a separate mask for each vector statement. */
3738 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3739 if (!nunits
.is_constant (&const_nunits
)
3740 || !vf
.is_constant (&const_vf
))
3742 mask
.new_vector (const_nunits
, const_nunits
, 1);
3743 nelts_to_build
= const_vf
* group_size
;
3744 nvectors_per_build
= 1;
3747 unsigned int count
= mask
.encoded_nelts ();
3748 mask
.quick_grow (count
);
3749 vec_perm_indices indices
;
3751 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3753 unsigned int iter_num
= j
/ group_size
;
3754 unsigned int stmt_num
= j
% group_size
;
3755 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3756 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3759 first_vec_index
= 0;
3764 /* Enforced before the loop when !repeating_p. */
3765 unsigned int const_nunits
= nunits
.to_constant ();
3766 vec_index
= i
/ const_nunits
;
3767 mask_element
= i
% const_nunits
;
3768 if (vec_index
== first_vec_index
3769 || first_vec_index
== -1)
3771 first_vec_index
= vec_index
;
3773 else if (vec_index
== second_vec_index
3774 || second_vec_index
== -1)
3776 second_vec_index
= vec_index
;
3777 mask_element
+= const_nunits
;
3781 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3783 "permutation requires at "
3784 "least three vectors %G",
3786 gcc_assert (analyze_only
);
3790 gcc_assert (mask_element
< 2 * const_nunits
);
3793 if (mask_element
!= index
)
3795 mask
[index
++] = mask_element
;
3797 if (index
== count
&& !noop_p
)
3799 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3800 if (!can_vec_perm_const_p (mode
, indices
))
3802 if (dump_enabled_p ())
3804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3806 "unsupported vect permute { ");
3807 for (i
= 0; i
< count
; ++i
)
3809 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3810 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3812 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3814 gcc_assert (analyze_only
);
3825 tree mask_vec
= NULL_TREE
;
3828 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3830 if (second_vec_index
== -1)
3831 second_vec_index
= first_vec_index
;
3833 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3835 /* Generate the permute statement if necessary. */
3836 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3837 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3838 stmt_vec_info perm_stmt_info
;
3841 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3843 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3845 perm_dest
= make_ssa_name (perm_dest
);
3847 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3848 first_vec
, second_vec
,
3851 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3855 /* If mask was NULL_TREE generate the requested
3856 identity transform. */
3857 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3859 /* Store the vector statement in NODE. */
3860 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3866 first_vec_index
= -1;
3867 second_vec_index
= -1;
3875 /* Vectorize SLP instance tree in postorder. */
3878 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3879 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3881 gimple_stmt_iterator si
;
3882 stmt_vec_info stmt_info
;
3883 unsigned int group_size
;
3888 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3891 /* See if we have already vectorized the node in the graph of the
3893 if (SLP_TREE_VEC_STMTS (node
).exists ())
3896 /* See if we have already vectorized the same set of stmts and reuse their
3897 vectorized stmts across instances. */
3898 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3900 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3904 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3905 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3906 vect_schedule_slp_instance (child
, instance
, bst_map
);
3908 /* Push SLP node def-type to stmts. */
3909 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3910 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3912 stmt_vec_info child_stmt_info
;
3913 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3914 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3917 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3919 /* VECTYPE is the type of the destination. */
3920 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3921 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3922 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3924 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3925 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_NOTE
, vect_location
,
3929 "------>vectorizing SLP node starting from: %G",
3932 /* Vectorized stmts go before the last scalar stmt which is where
3933 all uses are ready. */
3934 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3935 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3937 /* Mark the first element of the reduction chain as reduction to properly
3938 transform the node. In the analysis phase only the last element of the
3939 chain is marked as reduction. */
3940 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3941 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
3942 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == stmt_info
)
3944 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3945 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3948 /* Handle two-operation SLP nodes by vectorizing the group with
3949 both operations and then performing a merge. */
3950 if (SLP_TREE_TWO_OPERATORS (node
))
3952 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3953 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3954 enum tree_code ocode
= ERROR_MARK
;
3955 stmt_vec_info ostmt_info
;
3956 vec_perm_builder
mask (group_size
, group_size
, 1);
3957 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3959 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3960 if (gimple_assign_rhs_code (ostmt
) != code0
)
3962 mask
.quick_push (1);
3963 ocode
= gimple_assign_rhs_code (ostmt
);
3966 mask
.quick_push (0);
3968 if (ocode
!= ERROR_MARK
)
3970 vec
<stmt_vec_info
> v0
;
3971 vec
<stmt_vec_info
> v1
;
3973 tree tmask
= NULL_TREE
;
3974 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3975 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3976 SLP_TREE_VEC_STMTS (node
).truncate (0);
3977 gimple_assign_set_rhs_code (stmt
, ocode
);
3978 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3979 gimple_assign_set_rhs_code (stmt
, code0
);
3980 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3981 SLP_TREE_VEC_STMTS (node
).truncate (0);
3982 tree meltype
= build_nonstandard_integer_type
3983 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3984 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3986 for (j
= 0; j
< v0
.length (); ++j
)
3988 /* Enforced by vect_build_slp_tree, which rejects variable-length
3989 vectors for SLP_TREE_TWO_OPERATORS. */
3990 unsigned int const_nunits
= nunits
.to_constant ();
3991 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3992 for (l
= 0; l
< const_nunits
; ++l
)
3994 if (k
>= group_size
)
3996 tree t
= build_int_cst (meltype
,
3997 mask
[k
++] * const_nunits
+ l
);
3998 melts
.quick_push (t
);
4000 tmask
= melts
.build ();
4002 /* ??? Not all targets support a VEC_PERM_EXPR with a
4003 constant mask that would translate to a vec_merge RTX
4004 (with their vec_perm_const_ok). We can either not
4005 vectorize in that case or let veclower do its job.
4006 Unfortunately that isn't too great and at least for
4007 plus/minus we'd eventually like to match targets
4008 vector addsub instructions. */
4010 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4012 gimple_assign_lhs (v0
[j
]->stmt
),
4013 gimple_assign_lhs (v1
[j
]->stmt
),
4015 SLP_TREE_VEC_STMTS (node
).quick_push
4016 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4023 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4025 /* Restore stmt def-types. */
4026 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4027 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4029 stmt_vec_info child_stmt_info
;
4030 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4031 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4035 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4036 For loop vectorization this is done in vectorizable_call, but for SLP
4037 it needs to be deferred until end of vect_schedule_slp, because multiple
4038 SLP instances may refer to the same scalar stmt. */
4041 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4044 gimple_stmt_iterator gsi
;
4048 stmt_vec_info stmt_info
;
4050 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4053 if (visited
.add (node
))
4056 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4057 vect_remove_slp_scalar_calls (child
, visited
);
4059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4061 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4062 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4064 if (is_pattern_stmt_p (stmt_info
)
4065 || !PURE_SLP_STMT (stmt_info
))
4067 lhs
= gimple_call_lhs (stmt
);
4068 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4069 gsi
= gsi_for_stmt (stmt
);
4070 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4071 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4076 vect_remove_slp_scalar_calls (slp_tree node
)
4078 hash_set
<slp_tree
> visited
;
4079 vect_remove_slp_scalar_calls (node
, visited
);
4082 /* Generate vector code for all SLP instances in the loop/basic block. */
4085 vect_schedule_slp (vec_info
*vinfo
)
4087 vec
<slp_instance
> slp_instances
;
4088 slp_instance instance
;
4091 scalar_stmts_to_slp_tree_map_t
*bst_map
4092 = new scalar_stmts_to_slp_tree_map_t ();
4093 slp_instances
= vinfo
->slp_instances
;
4094 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4096 /* Schedule the tree of INSTANCE. */
4097 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4099 if (dump_enabled_p ())
4100 dump_printf_loc (MSG_NOTE
, vect_location
,
4101 "vectorizing stmts using SLP.\n");
4105 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4107 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4108 stmt_vec_info store_info
;
4111 /* Remove scalar call stmts. Do not do this for basic-block
4112 vectorization as not all uses may be vectorized.
4113 ??? Why should this be necessary? DCE should be able to
4114 remove the stmts itself.
4115 ??? For BB vectorization we can as well remove scalar
4116 stmts starting from the SLP tree root if they have no
4118 if (is_a
<loop_vec_info
> (vinfo
))
4119 vect_remove_slp_scalar_calls (root
);
4121 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4122 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4124 if (!STMT_VINFO_DATA_REF (store_info
))
4127 store_info
= vect_orig_stmt (store_info
);
4128 /* Free the attached stmt_vec_info and remove the stmt. */
4129 vinfo
->remove_stmt (store_info
);