1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
54 vect_free_slp_tree (slp_tree node
, bool final_p
)
59 if (--node
->refcnt
!= 0)
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
63 vect_free_slp_tree (child
, final_p
);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
71 stmt_vec_info stmt_info
;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
79 SLP_TREE_CHILDREN (node
).release ();
80 SLP_TREE_SCALAR_STMTS (node
).release ();
81 SLP_TREE_SCALAR_OPS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
128 SLP_TREE_CHILDREN (node
).create (nops
);
129 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
130 SLP_TREE_TWO_OPERATORS (node
) = false;
131 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
133 node
->max_nunits
= 1;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
149 node
= XNEW (struct _slp_tree
);
150 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_VEC_STMTS (node
).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
154 SLP_TREE_CHILDREN (node
) = vNULL
;
155 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
156 SLP_TREE_TWO_OPERATORS (node
) = false;
157 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
159 node
->max_nunits
= 1;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec
<stmt_vec_info
> def_stmts
;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
178 enum vect_def_type first_dt
;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 static vec
<slp_oprnd_info
>
186 vect_create_oprnd_info (int nops
, int group_size
)
189 slp_oprnd_info oprnd_info
;
190 vec
<slp_oprnd_info
> oprnds_info
;
192 oprnds_info
.create (nops
);
193 for (i
= 0; i
< nops
; i
++)
195 oprnd_info
= XNEW (struct _slp_oprnd_info
);
196 oprnd_info
->def_stmts
.create (group_size
);
197 oprnd_info
->ops
.create (group_size
);
198 oprnd_info
->first_dt
= vect_uninitialized_def
;
199 oprnd_info
->first_op_type
= NULL_TREE
;
200 oprnd_info
->any_pattern
= false;
201 oprnds_info
.quick_push (oprnd_info
);
208 /* Free operands info. */
211 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
214 slp_oprnd_info oprnd_info
;
216 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
218 oprnd_info
->def_stmts
.release ();
219 oprnd_info
->ops
.release ();
220 XDELETE (oprnd_info
);
223 oprnds_info
.release ();
227 /* Return true if STMTS contains a pattern statement. */
230 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
232 stmt_vec_info stmt_info
;
234 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
235 if (is_pattern_stmt_p (stmt_info
))
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
246 stmt_vec_info first_stmt_info
)
248 stmt_vec_info next_stmt_info
= first_stmt_info
;
251 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
256 if (next_stmt_info
== stmt_info
)
258 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
260 result
+= DR_GROUP_GAP (next_stmt_info
);
262 while (next_stmt_info
);
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
274 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
275 tree elt_type
, unsigned int *nvectors_out
,
276 tree
*vector_type_out
,
279 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
280 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
283 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
284 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
285 unsigned int nvectors
= 1;
288 scalar_int_mode int_mode
;
289 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
290 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type
= build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode
), 1);
296 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
298 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
300 GET_MODE_SIZE (base_vector_mode
)))
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
306 vec_perm_builder
sel1 (nelts
, 2, 3);
307 vec_perm_builder
sel2 (nelts
, 2, 3);
308 poly_int64 half_nelts
= exact_div (nelts
, 2);
309 for (unsigned int i
= 0; i
< 3; ++i
)
312 sel1
.quick_push (i
+ nelts
);
313 sel2
.quick_push (half_nelts
+ i
);
314 sel2
.quick_push (half_nelts
+ i
+ nelts
);
316 vec_perm_indices
indices1 (sel1
, 2, nelts
);
317 vec_perm_indices
indices2 (sel2
, 2, nelts
);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
322 *nvectors_out
= nvectors
;
324 *vector_type_out
= vector_type
;
327 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
329 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
336 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
355 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
356 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
357 vec
<slp_oprnd_info
> *oprnds_info
)
359 stmt_vec_info stmt_info
= stmts
[stmt_num
];
361 unsigned int i
, number_of_oprnds
;
362 enum vect_def_type dt
= vect_uninitialized_def
;
363 slp_oprnd_info oprnd_info
;
364 int first_op_idx
= 1;
365 unsigned int commutative_op
= -1U;
366 bool first_op_cond
= false;
367 bool first
= stmt_num
== 0;
369 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
371 number_of_oprnds
= gimple_call_num_args (stmt
);
373 if (gimple_call_internal_p (stmt
))
375 internal_fn ifn
= gimple_call_internal_fn (stmt
);
376 commutative_op
= first_commutative_argument (ifn
);
378 /* Masked load, only look at mask. */
379 if (ifn
== IFN_MASK_LOAD
)
381 number_of_oprnds
= 1;
382 /* Mask operand index. */
387 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
389 enum tree_code code
= gimple_assign_rhs_code (stmt
);
390 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code
== COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
396 first_op_cond
= true;
400 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
405 bool swapped
= (*swap
!= 0);
406 gcc_assert (!swapped
|| first_op_cond
);
407 for (i
= 0; i
< number_of_oprnds
; i
++)
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map
= maps
[*swap
];
417 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
418 first_op_idx
), map
[i
]);
420 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
423 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
424 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
425 oprnd
= TREE_OPERAND (oprnd
, 0);
427 oprnd_info
= (*oprnds_info
)[i
];
429 stmt_vec_info def_stmt_info
;
430 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
434 "Build SLP failed: can't analyze def for %T\n",
440 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
441 oprnd_info
->any_pattern
= true;
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info
)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
449 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
451 dt
= vect_reduction_def
;
452 oprnd_info
->first_dt
= dt
;
453 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type
= TREE_TYPE (oprnd
);
463 if ((oprnd_info
->first_dt
!= dt
464 && !(oprnd_info
->first_dt
== vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info
)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
468 && !STMT_VINFO_DATA_REF (def_stmt_info
)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
471 && !((oprnd_info
->first_dt
== vect_external_def
472 || oprnd_info
->first_dt
== vect_constant_def
)
473 && (dt
== vect_external_def
474 || dt
== vect_constant_def
)))
475 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
476 || (!STMT_VINFO_DATA_REF (stmt_info
)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
479 || STMT_VINFO_DATA_REF (def_stmt_info
)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
482 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
484 /* Try swapping operands if we got a mismatch. */
485 if (i
== commutative_op
&& !swapped
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "trying swapped operands\n");
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
496 "Build SLP failed: different types\n");
500 if ((dt
== vect_constant_def
501 || dt
== vect_external_def
)
502 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
503 && (TREE_CODE (type
) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd
);
515 /* Check the types of the definitions. */
518 case vect_external_def
:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info
->first_dt
= vect_external_def
;
522 case vect_constant_def
:
523 oprnd_info
->def_stmts
.quick_push (NULL
);
524 oprnd_info
->ops
.quick_push (oprnd
);
527 case vect_internal_def
:
528 case vect_reduction_def
:
529 if (oprnd_info
->first_dt
== vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info
)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
532 && !STMT_VINFO_DATA_REF (def_stmt_info
)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
541 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
542 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
546 case vect_induction_def
:
547 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
548 oprnd_info
->ops
.quick_push (oprnd
);
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
555 "Build SLP failed: illegal type of def %T\n",
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info
->stmt
);
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
580 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
581 tree cond
= gimple_assign_rhs1 (stmt
);
582 enum tree_code code
= TREE_CODE (cond
);
587 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
588 &TREE_OPERAND (cond
, 1));
589 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
594 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
595 gimple_assign_rhs3_ptr (stmt
));
596 if (STMT_VINFO_REDUC_IDX (stmt_info
) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info
) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info
) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info
) = 1;
600 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
601 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
602 gcc_assert (code
!= ERROR_MARK
);
603 TREE_SET_CODE (cond
, code
);
608 /* Commutative ops need not reflect swapping, ops are in
611 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE
, vect_location
,
613 "swapped operands to match def types in %G",
621 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
626 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
628 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
629 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
638 stmt_vec_info member_info
= first_info
;
639 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
640 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
641 || is_pattern_stmt_p (member_info
))
646 for (member_info
= first_info
; member_info
;
647 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
648 STMT_VINFO_VECTYPE (member_info
) = vectype
;
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
653 && !is_pattern_stmt_p (stmt_info
))
655 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info
->stmt
);
664 dump_printf_loc (MSG_NOTE
, vect_location
,
665 " old vector type: %T\n", old_vectype
);
666 dump_printf_loc (MSG_NOTE
, vect_location
,
667 " new vector type: %T\n", vectype
);
672 /* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
676 vect_update_all_shared_vectypes (vec
<stmt_vec_info
> stmts
)
678 tree vectype
, nunits_vectype
;
679 if (!vect_get_vector_types_for_stmt (stmts
[0], &vectype
,
680 &nunits_vectype
, stmts
.length ()))
683 stmt_vec_info stmt_info
;
685 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
686 if (!vect_update_shared_vectype (stmt_info
, vectype
))
692 /* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
696 compatible_calls_p (gcall
*call1
, gcall
*call2
)
698 unsigned int nargs
= gimple_call_num_args (call1
);
699 if (nargs
!= gimple_call_num_args (call2
))
702 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
705 if (gimple_call_internal_p (call1
))
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
708 TREE_TYPE (gimple_call_lhs (call2
))))
710 for (unsigned int i
= 0; i
< nargs
; ++i
)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
712 TREE_TYPE (gimple_call_arg (call2
, i
))))
717 if (!operand_equal_p (gimple_call_fn (call1
),
718 gimple_call_fn (call2
), 0))
721 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
727 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
728 caller's attempt to find the vector type in STMT_INFO with the narrowest
729 element type. Return true if VECTYPE is nonnull and if it is valid
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
735 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
736 tree vectype
, poly_uint64
*max_nunits
)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: unsupported data-type in %G\n",
744 /* Fatal mismatch. */
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
750 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
751 unsigned HOST_WIDE_INT const_nunits
;
752 if (STMT_VINFO_BB_VINFO (stmt_info
)
753 && (!nunits
.is_constant (&const_nunits
)
754 || const_nunits
> group_size
))
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
760 /* Fatal mismatch. */
764 /* In case of multiple types we need to detect the smallest type. */
765 vect_update_max_nunits (max_nunits
, vectype
);
769 /* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
777 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
778 unsigned int group_size
, tree vectype
,
779 tree_code alt_stmt_code
)
781 unsigned HOST_WIDE_INT count
;
782 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
785 vec_perm_builder
sel (count
, count
, 1);
786 for (unsigned int i
= 0; i
< count
; ++i
)
788 unsigned int elt
= i
;
789 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
790 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
792 sel
.quick_push (elt
);
794 vec_perm_indices
indices (sel
, 2, count
);
795 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
798 /* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
803 carried out or the stmts will never be vectorized by SLP.
805 Note COND_EXPR is possibly isomorphic to another one after swapping its
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
813 vect_build_slp_tree_1 (unsigned char *swap
,
814 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
815 poly_uint64
*max_nunits
, bool *matches
,
819 stmt_vec_info first_stmt_info
= stmts
[0];
820 enum tree_code first_stmt_code
= ERROR_MARK
;
821 enum tree_code alt_stmt_code
= ERROR_MARK
;
822 enum tree_code rhs_code
= ERROR_MARK
;
823 enum tree_code first_cond_code
= ERROR_MARK
;
825 bool need_same_oprnds
= false;
826 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
829 machine_mode optab_op2_mode
;
830 machine_mode vec_mode
;
831 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
834 /* For every stmt in NODE find its def stmt/s. */
835 stmt_vec_info stmt_info
;
836 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
838 vec_info
*vinfo
= stmt_info
->vinfo
;
839 gimple
*stmt
= stmt_info
->stmt
;
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
846 /* Fail to vectorize statements marked as unvectorizable. */
847 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
851 "Build SLP failed: unvectorizable statement %G",
853 /* Fatal mismatch. */
858 lhs
= gimple_get_lhs (stmt
);
859 if (lhs
== NULL_TREE
)
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt
);
865 /* Fatal mismatch. */
871 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
872 &nunits_vectype
, group_size
)
874 && !vect_record_max_nunits (stmt_info
, group_size
,
875 nunits_vectype
, max_nunits
)))
877 /* Fatal mismatch. */
882 gcc_assert (vectype
);
884 if (is_a
<bb_vec_info
> (vinfo
)
885 && !vect_update_shared_vectype (stmt_info
, vectype
))
888 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
891 rhs_code
= CALL_EXPR
;
893 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
895 else if ((gimple_call_internal_p (call_stmt
)
896 && (!vectorizable_internal_fn_p
897 (gimple_call_internal_fn (call_stmt
))))
898 || gimple_call_tail_p (call_stmt
)
899 || gimple_call_noreturn_p (call_stmt
)
900 || !gimple_call_nothrow_p (call_stmt
)
901 || gimple_call_chain (call_stmt
))
903 if (dump_enabled_p ())
904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
905 "Build SLP failed: unsupported call type %G",
907 /* Fatal mismatch. */
914 rhs_code
= gimple_assign_rhs_code (stmt
);
915 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
918 /* Check the operation. */
921 first_stmt_code
= rhs_code
;
923 /* Shift arguments should be equal in all the packed stmts for a
924 vector shift with scalar shift operand. */
925 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
926 || rhs_code
== LROTATE_EXPR
927 || rhs_code
== RROTATE_EXPR
)
929 vec_mode
= TYPE_MODE (vectype
);
931 /* First see if we have a vector/vector shift. */
932 optab
= optab_for_tree_code (rhs_code
, vectype
,
936 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
938 /* No vector/vector shift, try for a vector/scalar shift. */
939 optab
= optab_for_tree_code (rhs_code
, vectype
,
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
946 "Build SLP failed: no optab.\n");
947 /* Fatal mismatch. */
951 icode
= (int) optab_handler (optab
, vec_mode
);
952 if (icode
== CODE_FOR_nothing
)
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
957 "op not supported by target.\n");
958 /* Fatal mismatch. */
962 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
963 if (!VECTOR_MODE_P (optab_op2_mode
))
965 need_same_oprnds
= true;
966 first_op1
= gimple_assign_rhs2 (stmt
);
970 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
972 need_same_oprnds
= true;
973 first_op1
= gimple_assign_rhs2 (stmt
);
976 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
978 need_same_oprnds
= true;
979 first_op1
= gimple_call_arg (call_stmt
, 1);
984 if (first_stmt_code
!= rhs_code
985 && alt_stmt_code
== ERROR_MARK
)
986 alt_stmt_code
= rhs_code
;
987 if (first_stmt_code
!= rhs_code
988 && (first_stmt_code
!= IMAGPART_EXPR
989 || rhs_code
!= REALPART_EXPR
)
990 && (first_stmt_code
!= REALPART_EXPR
991 || rhs_code
!= IMAGPART_EXPR
)
992 /* Handle mismatches in plus/minus by computing both
993 and merging the results. */
994 && !((first_stmt_code
== PLUS_EXPR
995 || first_stmt_code
== MINUS_EXPR
)
996 && (alt_stmt_code
== PLUS_EXPR
997 || alt_stmt_code
== MINUS_EXPR
)
998 && rhs_code
== alt_stmt_code
)
999 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1000 && (first_stmt_code
== ARRAY_REF
1001 || first_stmt_code
== BIT_FIELD_REF
1002 || first_stmt_code
== INDIRECT_REF
1003 || first_stmt_code
== COMPONENT_REF
1004 || first_stmt_code
== MEM_REF
)))
1006 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1009 "Build SLP failed: different operation "
1010 "in stmt %G", stmt
);
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1012 "original stmt %G", first_stmt_info
->stmt
);
1018 if (need_same_oprnds
)
1020 tree other_op1
= (call_stmt
1021 ? gimple_call_arg (call_stmt
, 1)
1022 : gimple_assign_rhs2 (stmt
));
1023 if (!operand_equal_p (first_op1
, other_op1
, 0))
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1027 "Build SLP failed: different shift "
1028 "arguments in %G", stmt
);
1034 if (!load_p
&& rhs_code
== CALL_EXPR
)
1036 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
1037 as_a
<gcall
*> (stmt
)))
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1041 "Build SLP failed: different calls in %G",
1049 /* Grouped store or load. */
1050 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1052 if (REFERENCE_CLASS_P (lhs
))
1060 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1061 if (prev_first_load
)
1063 /* Check that there are no loads from different interleaving
1064 chains in the same node. */
1065 if (prev_first_load
!= first_load
)
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1070 "Build SLP failed: different "
1071 "interleaving chains in one node %G",
1078 prev_first_load
= first_load
;
1080 } /* Grouped access. */
1085 /* Not grouped load. */
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1088 "Build SLP failed: not grouped load %G", stmt
);
1090 /* FORNOW: Not grouped loads are not supported. */
1091 /* Fatal mismatch. */
1096 /* Not memory operation. */
1097 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1098 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1099 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1100 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1101 && rhs_code
!= VIEW_CONVERT_EXPR
1102 && rhs_code
!= CALL_EXPR
)
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1106 "Build SLP failed: operation unsupported %G",
1108 /* Fatal mismatch. */
1113 if (rhs_code
== COND_EXPR
)
1115 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1116 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1117 enum tree_code swap_code
= ERROR_MARK
;
1118 enum tree_code invert_code
= ERROR_MARK
;
1121 first_cond_code
= TREE_CODE (cond_expr
);
1122 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1124 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1125 swap_code
= swap_tree_comparison (cond_code
);
1126 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1129 if (first_cond_code
== cond_code
)
1131 /* Isomorphic can be achieved by swapping. */
1132 else if (first_cond_code
== swap_code
)
1134 /* Isomorphic can be achieved by inverting. */
1135 else if (first_cond_code
== invert_code
)
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1141 "Build SLP failed: different"
1142 " operation %G", stmt
);
1152 for (i
= 0; i
< group_size
; ++i
)
1156 /* If we allowed a two-operation SLP node verify the target can cope
1157 with the permute we are going to use. */
1158 if (alt_stmt_code
!= ERROR_MARK
1159 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1161 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1162 vectype
, alt_stmt_code
))
1164 for (i
= 0; i
< group_size
; ++i
)
1165 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1168 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1171 "Build SLP failed: different operation "
1172 "in stmt %G", stmts
[i
]->stmt
);
1173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1174 "original stmt %G", first_stmt_info
->stmt
);
1179 *two_operators
= true;
1185 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1186 Note we never remove apart from at destruction time so we do not
1187 need a special value for deleted that differs from empty. */
1190 typedef vec
<stmt_vec_info
> value_type
;
1191 typedef vec
<stmt_vec_info
> compare_type
;
1192 static inline hashval_t
hash (value_type
);
1193 static inline bool equal (value_type existing
, value_type candidate
);
1194 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1195 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1196 static const bool empty_zero_p
= true;
1197 static inline void mark_empty (value_type
&x
) { x
.release (); }
1198 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1199 static inline void remove (value_type
&x
) { x
.release (); }
1202 bst_traits::hash (value_type x
)
1205 for (unsigned i
= 0; i
< x
.length (); ++i
)
1206 h
.add_int (gimple_uid (x
[i
]->stmt
));
1210 bst_traits::equal (value_type existing
, value_type candidate
)
1212 if (existing
.length () != candidate
.length ())
1214 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1215 if (existing
[i
] != candidate
[i
])
1220 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1221 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1222 scalar_stmts_to_slp_tree_map_t
;
1225 vect_build_slp_tree_2 (vec_info
*vinfo
,
1226 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1227 poly_uint64
*max_nunits
,
1228 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1229 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1232 vect_build_slp_tree (vec_info
*vinfo
,
1233 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1234 poly_uint64
*max_nunits
,
1235 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1236 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1238 if (slp_tree
*leader
= bst_map
->get (stmts
))
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1242 *leader
? "" : "failed ", *leader
);
1245 (*leader
)->refcnt
++;
1246 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1250 poly_uint64 this_max_nunits
= 1;
1251 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1253 matches
, npermutes
, tree_size
, bst_map
);
1256 res
->max_nunits
= this_max_nunits
;
1257 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1258 /* Keep a reference for the bst_map use. */
1261 bst_map
->put (stmts
.copy (), res
);
1265 /* Recursively build an SLP tree starting from NODE.
1266 Fail (and return a value not equal to zero) if def-stmts are not
1267 isomorphic, require data permutation or are of unsupported types of
1268 operation. Otherwise, return 0.
1269 The value returned is the depth in the SLP tree where a mismatch
1273 vect_build_slp_tree_2 (vec_info
*vinfo
,
1274 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1275 poly_uint64
*max_nunits
,
1276 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1277 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1279 unsigned nops
, i
, this_tree_size
= 0;
1280 poly_uint64 this_max_nunits
= *max_nunits
;
1285 stmt_vec_info stmt_info
= stmts
[0];
1286 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1287 nops
= gimple_call_num_args (stmt
);
1288 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1290 nops
= gimple_num_ops (stmt
) - 1;
1291 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1294 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1299 /* If the SLP node is a PHI (induction or reduction), terminate
1301 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1303 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1304 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1305 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1308 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1309 /* Induction from different IVs is not supported. */
1310 if (def_type
== vect_induction_def
)
1312 stmt_vec_info other_info
;
1313 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1314 if (stmt_info
!= other_info
)
1317 else if (def_type
== vect_reduction_def
1318 || def_type
== vect_double_reduction_def
1319 || def_type
== vect_nested_cycle
)
1321 /* Else def types have to match. */
1322 stmt_vec_info other_info
;
1323 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1324 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1330 node
= vect_create_new_slp_node (stmts
);
1335 bool two_operators
= false;
1336 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1337 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1338 &this_max_nunits
, matches
, &two_operators
))
1341 /* If the SLP node is a load, terminate the recursion unless masked. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1343 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1345 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1348 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1353 *max_nunits
= this_max_nunits
;
1355 node
= vect_create_new_slp_node (stmts
);
1356 /* And compute the load permutation. Whether it is actually
1357 a permutation depends on the unrolling factor which is
1359 vec
<unsigned> load_permutation
;
1361 stmt_vec_info load_info
;
1362 load_permutation
.create (group_size
);
1363 stmt_vec_info first_stmt_info
1364 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1365 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1367 int load_place
= vect_get_place_in_interleaving_chain
1368 (load_info
, first_stmt_info
);
1369 gcc_assert (load_place
!= -1);
1370 load_permutation
.safe_push (load_place
);
1372 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1377 /* Get at the operands, verifying they are compatible. */
1378 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1379 slp_oprnd_info oprnd_info
;
1380 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1382 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1383 stmts
, i
, &oprnds_info
);
1385 matches
[(res
== -1) ? 0 : i
] = false;
1389 for (i
= 0; i
< group_size
; ++i
)
1392 vect_free_oprnd_info (oprnds_info
);
1396 auto_vec
<slp_tree
, 4> children
;
1398 stmt_info
= stmts
[0];
1400 /* Create SLP_TREE nodes for the definition node/s. */
1401 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1404 unsigned old_tree_size
= this_tree_size
;
1407 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1409 /* COND_EXPR have one too many eventually if the condition
1411 gcc_assert (i
== 3 && nops
== 4);
1415 if (oprnd_info
->first_dt
!= vect_internal_def
1416 && oprnd_info
->first_dt
!= vect_reduction_def
1417 && oprnd_info
->first_dt
!= vect_induction_def
)
1419 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1420 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1421 oprnd_info
->ops
= vNULL
;
1422 children
.safe_push (invnode
);
1426 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1427 group_size
, &this_max_nunits
,
1429 &this_tree_size
, bst_map
)) != NULL
)
1431 /* If we have all children of a non-unary child built up from
1432 scalars then just throw that away and build it up this node
1434 if (is_a
<bb_vec_info
> (vinfo
)
1435 && SLP_TREE_CHILDREN (child
).length () > 1
1436 /* ??? Rejecting patterns this way doesn't work. We'd have to
1437 do extra work to cancel the pattern so the uses see the
1439 && !oprnd_info
->any_pattern
)
1441 slp_tree grandchild
;
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1444 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1447 && vect_update_all_shared_vectypes (oprnd_info
->def_stmts
))
1450 this_tree_size
= old_tree_size
;
1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1452 vect_free_slp_tree (grandchild
, false);
1453 SLP_TREE_CHILDREN (child
).truncate (0);
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE
, vect_location
,
1457 "Building parent vector operands from "
1458 "scalars instead\n");
1459 oprnd_info
->def_stmts
= vNULL
;
1460 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1461 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1462 oprnd_info
->ops
= vNULL
;
1464 children
.safe_push (child
);
1469 oprnd_info
->def_stmts
= vNULL
;
1470 children
.safe_push (child
);
1474 /* If the SLP build failed fatally and we analyze a basic-block
1475 simply treat nodes we fail to build as externally defined
1476 (and thus build vectors from the scalar defs).
1477 The cost model will reject outright expensive cases.
1478 ??? This doesn't treat cases where permutation ultimatively
1479 fails (or we don't try permutation below). Ideally we'd
1480 even compute a permutation that will end up with the maximum
1482 if (is_a
<bb_vec_info
> (vinfo
)
1484 /* ??? Rejecting patterns this way doesn't work. We'd have to
1485 do extra work to cancel the pattern so the uses see the
1487 && !is_pattern_stmt_p (stmt_info
)
1488 && !oprnd_info
->any_pattern
1489 && vect_update_all_shared_vectypes (oprnd_info
->def_stmts
))
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_NOTE
, vect_location
,
1493 "Building vector operands from scalars\n");
1495 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1496 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1497 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1498 children
.safe_push (child
);
1499 oprnd_info
->ops
= vNULL
;
1500 oprnd_info
->def_stmts
= vNULL
;
1504 /* If the SLP build for operand zero failed and operand zero
1505 and one can be commutated try that for the scalar stmts
1506 that failed the match. */
1508 /* A first scalar stmt mismatch signals a fatal mismatch. */
1510 /* ??? For COND_EXPRs we can swap the comparison operands
1511 as well as the arms under some constraints. */
1513 && oprnds_info
[1]->first_dt
== vect_internal_def
1514 && is_gimple_assign (stmt_info
->stmt
)
1515 /* Swapping operands for reductions breaks assumptions later on. */
1516 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1517 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1518 /* Do so only if the number of not successful permutes was nor more
1519 than a cut-ff as re-trying the recursive match on
1520 possibly each level of the tree would expose exponential
1524 /* See whether we can swap the matching or the non-matching
1526 bool swap_not_matching
= true;
1529 for (j
= 0; j
< group_size
; ++j
)
1531 if (matches
[j
] != !swap_not_matching
)
1533 stmt_vec_info stmt_info
= stmts
[j
];
1534 /* Verify if we can swap operands of this stmt. */
1535 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1537 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1539 if (!swap_not_matching
)
1541 swap_not_matching
= false;
1546 while (j
!= group_size
);
1548 /* Swap mismatched definition stmts. */
1549 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE
, vect_location
,
1551 "Re-trying with swapped operands of stmts ");
1552 for (j
= 0; j
< group_size
; ++j
)
1553 if (matches
[j
] == !swap_not_matching
)
1555 std::swap (oprnds_info
[0]->def_stmts
[j
],
1556 oprnds_info
[1]->def_stmts
[j
]);
1557 std::swap (oprnds_info
[0]->ops
[j
],
1558 oprnds_info
[1]->ops
[j
]);
1559 if (dump_enabled_p ())
1560 dump_printf (MSG_NOTE
, "%d ", j
);
1562 if (dump_enabled_p ())
1563 dump_printf (MSG_NOTE
, "\n");
1564 /* And try again with scratch 'matches' ... */
1565 bool *tem
= XALLOCAVEC (bool, group_size
);
1566 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1567 group_size
, &this_max_nunits
,
1569 &this_tree_size
, bst_map
)) != NULL
)
1571 /* If we have all children of a non-unary child built up from
1572 scalars then just throw that away and build it up this node
1574 if (is_a
<bb_vec_info
> (vinfo
)
1575 && SLP_TREE_CHILDREN (child
).length () > 1
1576 /* ??? Rejecting patterns this way doesn't work. We'd have
1577 to do extra work to cancel the pattern so the uses see the
1579 && !oprnd_info
->any_pattern
)
1582 slp_tree grandchild
;
1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1585 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1588 && (vect_update_all_shared_vectypes
1589 (oprnd_info
->def_stmts
)))
1592 this_tree_size
= old_tree_size
;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1594 vect_free_slp_tree (grandchild
, false);
1595 SLP_TREE_CHILDREN (child
).truncate (0);
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE
, vect_location
,
1599 "Building parent vector operands from "
1600 "scalars instead\n");
1601 oprnd_info
->def_stmts
= vNULL
;
1602 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1603 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1604 oprnd_info
->ops
= vNULL
;
1606 children
.safe_push (child
);
1611 oprnd_info
->def_stmts
= vNULL
;
1612 children
.safe_push (child
);
1620 gcc_assert (child
== NULL
);
1621 FOR_EACH_VEC_ELT (children
, j
, child
)
1622 vect_free_slp_tree (child
, false);
1623 vect_free_oprnd_info (oprnds_info
);
1627 vect_free_oprnd_info (oprnds_info
);
1629 *tree_size
+= this_tree_size
+ 1;
1630 *max_nunits
= this_max_nunits
;
1632 node
= vect_create_new_slp_node (stmts
);
1633 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1634 SLP_TREE_CHILDREN (node
).splice (children
);
1638 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1641 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1642 slp_tree node
, hash_set
<slp_tree
> &visited
)
1645 stmt_vec_info stmt_info
;
1649 if (visited
.add (node
))
1652 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1653 dump_user_location_t user_loc
= loc
.get_user_location ();
1654 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1655 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1657 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1660 estimated_poly_value (node
->max_nunits
));
1661 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1662 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1663 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1666 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1667 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1668 dump_printf (metadata
, "%T%s ", op
,
1669 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1670 dump_printf (metadata
, "}\n");
1672 if (SLP_TREE_CHILDREN (node
).is_empty ())
1674 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1676 dump_printf (dump_kind
, " %p", (void *)child
);
1677 dump_printf (dump_kind
, "\n");
1678 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1679 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1683 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1686 hash_set
<slp_tree
> visited
;
1687 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1690 /* Mark the tree rooted at NODE with PURE_SLP. */
1693 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1696 stmt_vec_info stmt_info
;
1699 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1702 if (visited
.add (node
))
1705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1706 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1708 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1709 vect_mark_slp_stmts (child
, visited
);
1713 vect_mark_slp_stmts (slp_tree node
)
1715 hash_set
<slp_tree
> visited
;
1716 vect_mark_slp_stmts (node
, visited
);
1719 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1722 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1725 stmt_vec_info stmt_info
;
1728 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1731 if (visited
.add (node
))
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1736 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1737 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1738 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1741 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1742 vect_mark_slp_stmts_relevant (child
, visited
);
1746 vect_mark_slp_stmts_relevant (slp_tree node
)
1748 hash_set
<slp_tree
> visited
;
1749 vect_mark_slp_stmts_relevant (node
, visited
);
1753 /* Rearrange the statements of NODE according to PERMUTATION. */
1756 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1757 vec
<unsigned> permutation
,
1758 hash_set
<slp_tree
> &visited
)
1763 if (visited
.add (node
))
1766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1767 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1769 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1771 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1772 vec
<stmt_vec_info
> tmp_stmts
;
1773 tmp_stmts
.create (group_size
);
1774 tmp_stmts
.quick_grow (group_size
);
1775 stmt_vec_info stmt_info
;
1776 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1777 tmp_stmts
[permutation
[i
]] = stmt_info
;
1778 SLP_TREE_SCALAR_STMTS (node
).release ();
1779 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1781 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1783 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1785 tmp_ops
.create (group_size
);
1786 tmp_ops
.quick_grow (group_size
);
1788 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1789 tmp_ops
[permutation
[i
]] = op
;
1790 SLP_TREE_SCALAR_OPS (node
).release ();
1791 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1796 /* Attempt to reorder stmts in a reduction chain so that we don't
1797 require any load permutation. Return true if that was possible,
1798 otherwise return false. */
1801 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1803 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1806 slp_tree node
, load
;
1808 /* Compare all the permutation sequences to the first one. We know
1809 that at least one load is permuted. */
1810 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1811 if (!node
->load_permutation
.exists ())
1813 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1815 if (!load
->load_permutation
.exists ())
1817 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1818 if (lidx
!= node
->load_permutation
[j
])
1822 /* Check that the loads in the first sequence are different and there
1823 are no gaps between them. */
1824 auto_sbitmap
load_index (group_size
);
1825 bitmap_clear (load_index
);
1826 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1828 if (lidx
>= group_size
)
1830 if (bitmap_bit_p (load_index
, lidx
))
1833 bitmap_set_bit (load_index
, lidx
);
1835 for (i
= 0; i
< group_size
; i
++)
1836 if (!bitmap_bit_p (load_index
, i
))
1839 /* This permutation is valid for reduction. Since the order of the
1840 statements in the nodes is not important unless they are memory
1841 accesses, we can rearrange the statements in all the nodes
1842 according to the order of the loads. */
1843 hash_set
<slp_tree
> visited
;
1844 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1845 node
->load_permutation
, visited
);
1847 /* We are done, no actual permutations need to be generated. */
1848 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1849 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1851 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1852 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1853 /* But we have to keep those permutations that are required because
1854 of handling of gaps. */
1855 if (known_eq (unrolling_factor
, 1U)
1856 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1857 && DR_GROUP_GAP (first_stmt_info
) == 0))
1858 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1860 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1861 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1867 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1870 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1871 hash_set
<slp_tree
> &visited
)
1873 if (visited
.add (node
))
1876 if (SLP_TREE_CHILDREN (node
).length () == 0)
1878 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1880 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1881 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1882 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1883 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1889 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1890 vect_gather_slp_loads (inst
, child
, visited
);
1895 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1897 hash_set
<slp_tree
> visited
;
1898 vect_gather_slp_loads (inst
, node
, visited
);
1901 /* Check if the required load permutations in the SLP instance
1902 SLP_INSTN are supported. */
1905 vect_supported_load_permutation_p (slp_instance slp_instn
)
1907 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1908 unsigned int i
, j
, k
, next
;
1911 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1914 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1915 if (node
->load_permutation
.exists ())
1916 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1917 dump_printf (MSG_NOTE
, "%d ", next
);
1919 for (k
= 0; k
< group_size
; ++k
)
1920 dump_printf (MSG_NOTE
, "%d ", k
);
1921 dump_printf (MSG_NOTE
, "\n");
1924 /* In case of reduction every load permutation is allowed, since the order
1925 of the reduction statements is not important (as opposed to the case of
1926 grouped stores). The only condition we need to check is that all the
1927 load nodes are of the same size and have the same permutation (and then
1928 rearrange all the nodes of the SLP instance according to this
1931 /* Check that all the load nodes are of the same size. */
1932 /* ??? Can't we assert this? */
1933 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1934 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1937 node
= SLP_INSTANCE_TREE (slp_instn
);
1938 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1940 /* Reduction (there are no data-refs in the root).
1941 In reduction chain the order of the loads is not important. */
1942 if (!STMT_VINFO_DATA_REF (stmt_info
)
1943 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1944 vect_attempt_slp_rearrange_stmts (slp_instn
);
1946 /* In basic block vectorization we allow any subchain of an interleaving
1948 FORNOW: not supported in loop SLP because of realignment compications. */
1949 if (STMT_VINFO_BB_VINFO (stmt_info
))
1951 /* Check whether the loads in an instance form a subchain and thus
1952 no permutation is necessary. */
1953 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1955 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1957 bool subchain_p
= true;
1958 stmt_vec_info next_load_info
= NULL
;
1959 stmt_vec_info load_info
;
1960 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1963 && (next_load_info
!= load_info
1964 || DR_GROUP_GAP (load_info
) != 1))
1969 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1972 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1975 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1976 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1977 unsigned HOST_WIDE_INT nunits
;
1978 unsigned k
, maxk
= 0;
1979 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1982 /* In BB vectorization we may not actually use a loaded vector
1983 accessing elements in excess of DR_GROUP_SIZE. */
1984 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1985 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1986 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1988 if (dump_enabled_p ())
1989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1990 "BB vectorization with gaps at the end of "
1991 "a load is not supported\n");
1995 /* Verify the permutation can be generated. */
1998 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1999 1, slp_instn
, true, &n_perms
))
2001 if (dump_enabled_p ())
2002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2004 "unsupported load permutation\n");
2012 /* For loop vectorization verify we can generate the permutation. Be
2013 conservative about the vectorization factor, there are permutations
2014 that will use three vector inputs only starting from a specific factor
2015 and the vectorization factor is not yet final.
2016 ??? The SLP instance unrolling factor might not be the maximum one. */
2019 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
2020 LOOP_VINFO_VECT_FACTOR
2021 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
2022 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
2023 if (node
->load_permutation
.exists ()
2024 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
2025 slp_instn
, true, &n_perms
))
2032 /* Find the last store in SLP INSTANCE. */
2035 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
2037 stmt_vec_info last
= NULL
;
2038 stmt_vec_info stmt_vinfo
;
2040 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2042 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2043 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
2049 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2050 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2051 (also containing the first GROUP1_SIZE stmts, since stores are
2052 consecutive), the second containing the remainder.
2053 Return the first stmt in the second group. */
2055 static stmt_vec_info
2056 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
2058 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
2059 gcc_assert (group1_size
> 0);
2060 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
2061 gcc_assert (group2_size
> 0);
2062 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
2064 stmt_vec_info stmt_info
= first_vinfo
;
2065 for (unsigned i
= group1_size
; i
> 1; i
--)
2067 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2068 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2070 /* STMT is now the last element of the first group. */
2071 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2072 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
2074 DR_GROUP_SIZE (group2
) = group2_size
;
2075 for (stmt_info
= group2
; stmt_info
;
2076 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
2078 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
2079 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2082 /* For the second group, the DR_GROUP_GAP is that before the original group,
2083 plus skipping over the first vector. */
2084 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
2086 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2087 DR_GROUP_GAP (first_vinfo
) += group2_size
;
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2091 group1_size
, group2_size
);
2096 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2097 statements and a vector of NUNITS elements. */
2100 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2102 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2105 /* Analyze an SLP instance starting from a group of grouped stores. Call
2106 vect_build_slp_tree to build a tree of packed stmts if possible.
2107 Return FALSE if it's impossible to SLP any stmt in the loop. */
2110 vect_analyze_slp_instance (vec_info
*vinfo
,
2111 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2112 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2114 slp_instance new_instance
;
2116 unsigned int group_size
;
2117 tree vectype
, scalar_type
= NULL_TREE
;
2119 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2120 vec
<stmt_vec_info
> scalar_stmts
;
2121 bool constructor
= false;
2123 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2125 scalar_type
= TREE_TYPE (DR_REF (dr
));
2126 group_size
= DR_GROUP_SIZE (stmt_info
);
2127 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2129 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2131 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2132 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2133 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2135 else if (is_gimple_assign (stmt_info
->stmt
)
2136 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2138 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2139 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2144 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2145 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2146 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2151 if (dump_enabled_p ())
2152 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2153 "Build SLP failed: unsupported data-type %T\n",
2158 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2160 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2161 scalar_stmts
.create (group_size
);
2162 stmt_vec_info next_info
= stmt_info
;
2163 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2165 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2168 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2169 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2172 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2174 /* Collect the reduction stmts and store them in
2175 SLP_TREE_SCALAR_STMTS. */
2178 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2179 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2181 /* Mark the first element of the reduction chain as reduction to properly
2182 transform the node. In the reduction analysis phase only the last
2183 element of the chain is marked as reduction. */
2184 STMT_VINFO_DEF_TYPE (stmt_info
)
2185 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2186 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2187 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2189 else if (constructor
)
2191 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2193 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2195 if (TREE_CODE (val
) == SSA_NAME
)
2197 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2198 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2199 /* Value is defined in another basic block. */
2202 scalar_stmts
.safe_push (def_info
);
2207 if (dump_enabled_p ())
2208 dump_printf_loc (MSG_NOTE
, vect_location
,
2209 "Analyzing vectorizable constructor: %G\n",
2214 /* Collect reduction statements. */
2215 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2216 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2217 scalar_stmts
.safe_push (next_info
);
2220 /* Build the tree for the SLP instance. */
2221 bool *matches
= XALLOCAVEC (bool, group_size
);
2222 unsigned npermutes
= 0;
2223 poly_uint64 max_nunits
= nunits
;
2224 unsigned tree_size
= 0;
2225 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2226 &max_nunits
, matches
, &npermutes
,
2227 &tree_size
, bst_map
);
2230 /* Calculate the unrolling factor based on the smallest type. */
2231 poly_uint64 unrolling_factor
2232 = calculate_unrolling_factor (max_nunits
, group_size
);
2234 if (maybe_ne (unrolling_factor
, 1U)
2235 && is_a
<bb_vec_info
> (vinfo
))
2237 unsigned HOST_WIDE_INT const_max_nunits
;
2238 if (!max_nunits
.is_constant (&const_max_nunits
)
2239 || const_max_nunits
> group_size
)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "Build SLP failed: store group "
2244 "size not a multiple of the vector size "
2245 "in basic block SLP\n");
2246 vect_free_slp_tree (node
, false);
2249 /* Fatal mismatch. */
2250 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2251 vect_free_slp_tree (node
, false);
2255 /* Create a new SLP instance. */
2256 new_instance
= XNEW (class _slp_instance
);
2257 SLP_INSTANCE_TREE (new_instance
) = node
;
2258 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2259 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2260 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2261 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2263 vect_gather_slp_loads (new_instance
, node
);
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_NOTE
, vect_location
,
2266 "SLP size %u vs. limit %u.\n",
2267 tree_size
, max_tree_size
);
2269 /* Compute the load permutation. */
2271 bool loads_permuted
= false;
2272 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2274 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2277 stmt_vec_info load_info
;
2278 bool this_load_permuted
= false;
2279 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2280 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2281 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2282 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2284 this_load_permuted
= true;
2287 if (!this_load_permuted
2288 /* The load requires permutation when unrolling exposes
2289 a gap either because the group is larger than the SLP
2290 group-size or because there is a gap between the groups. */
2291 && (known_eq (unrolling_factor
, 1U)
2292 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2293 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2295 SLP_TREE_LOAD_PERMUTATION (load_node
).release ();
2298 loads_permuted
= true;
2303 if (!vect_supported_load_permutation_p (new_instance
))
2305 if (dump_enabled_p ())
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2307 "Build SLP failed: unsupported load "
2308 "permutation %G", stmt_info
->stmt
);
2309 vect_free_slp_instance (new_instance
, false);
2314 /* If the loads and stores can be handled with load/store-lan
2315 instructions do not generate this SLP instance. */
2316 if (is_a
<loop_vec_info
> (vinfo
)
2318 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2321 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2323 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2324 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2325 /* Use SLP for strided accesses (or if we can't load-lanes). */
2326 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2327 || ! vect_load_lanes_supported
2328 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2329 DR_GROUP_SIZE (stmt_vinfo
), false))
2332 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2334 if (dump_enabled_p ())
2335 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2336 "Built SLP cancelled: can use "
2337 "load/store-lanes\n");
2338 vect_free_slp_instance (new_instance
, false);
2343 /* If this is a reduction chain with a conversion in front
2344 amend the SLP tree with a node for that. */
2346 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2347 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2349 /* Get at the conversion stmt - we know it's the single use
2350 of the last stmt of the reduction chain. */
2351 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2352 use_operand_p use_p
;
2354 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2357 next_info
= vinfo
->lookup_stmt (use_stmt
);
2358 next_info
= vect_stmt_to_vectorize (next_info
);
2359 scalar_stmts
= vNULL
;
2360 scalar_stmts
.create (group_size
);
2361 for (unsigned i
= 0; i
< group_size
; ++i
)
2362 scalar_stmts
.quick_push (next_info
);
2363 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2364 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2365 SLP_INSTANCE_TREE (new_instance
) = conv
;
2366 /* We also have to fake this conversion stmt as SLP reduction
2367 group so we don't have to mess with too much code
2369 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2370 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2373 vinfo
->slp_instances
.safe_push (new_instance
);
2375 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE
, vect_location
,
2378 "Final SLP tree for instance:\n");
2379 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2387 /* Failed to SLP. */
2388 /* Free the allocated memory. */
2389 scalar_stmts
.release ();
2392 /* For basic block SLP, try to break the group up into multiples of the
2394 unsigned HOST_WIDE_INT const_nunits
;
2395 if (is_a
<bb_vec_info
> (vinfo
)
2396 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2397 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2398 && nunits
.is_constant (&const_nunits
))
2400 /* We consider breaking the group only on VF boundaries from the existing
2402 for (i
= 0; i
< group_size
; i
++)
2403 if (!matches
[i
]) break;
2405 if (i
>= const_nunits
&& i
< group_size
)
2407 /* Split into two groups at the first vector boundary before i. */
2408 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2409 unsigned group1_size
= i
& ~(const_nunits
- 1);
2411 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2413 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2415 /* If the first non-match was in the middle of a vector,
2416 skip the rest of that vector. */
2417 if (group1_size
< i
)
2419 i
= group1_size
+ const_nunits
;
2421 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2424 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2425 rest
, max_tree_size
);
2428 /* Even though the first vector did not all match, we might be able to SLP
2429 (some) of the remainder. FORNOW ignore this possibility. */
2436 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2437 trees of packed scalar stmts if SLP is possible. */
2440 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2443 stmt_vec_info first_element
;
2445 DUMP_VECT_SCOPE ("vect_analyze_slp");
2447 scalar_stmts_to_slp_tree_map_t
*bst_map
2448 = new scalar_stmts_to_slp_tree_map_t ();
2450 /* Find SLP sequences starting from groups of grouped stores. */
2451 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2452 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2454 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2456 if (loop_vinfo
->reduction_chains
.length () > 0)
2458 /* Find SLP sequences starting from reduction chains. */
2459 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2460 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2463 /* Dissolve reduction chain group. */
2464 stmt_vec_info vinfo
= first_element
;
2465 stmt_vec_info last
= NULL
;
2468 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2469 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2470 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2474 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2475 /* It can be still vectorized as part of an SLP reduction. */
2476 loop_vinfo
->reductions
.safe_push (last
);
2480 /* Find SLP sequences starting from groups of reductions. */
2481 if (loop_vinfo
->reductions
.length () > 1)
2482 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2486 /* The map keeps a reference on SLP nodes built, release that. */
2487 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2488 it
!= bst_map
->end (); ++it
)
2490 vect_free_slp_tree ((*it
).second
, false);
2493 return opt_result::success ();
2497 /* For each possible SLP instance decide whether to SLP it and calculate overall
2498 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2499 least one instance. */
2502 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2505 poly_uint64 unrolling_factor
= 1;
2506 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2507 slp_instance instance
;
2508 int decided_to_slp
= 0;
2510 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2512 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2514 /* FORNOW: SLP if you can. */
2515 /* All unroll factors have the form:
2517 GET_MODE_SIZE (vinfo->vector_mode) * X
2519 for some rational X, so they must have a common multiple. */
2521 = force_common_multiple (unrolling_factor
,
2522 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2524 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2525 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2526 loop-based vectorization. Such stmts will be marked as HYBRID. */
2527 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2531 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2533 if (decided_to_slp
&& dump_enabled_p ())
2535 dump_printf_loc (MSG_NOTE
, vect_location
,
2536 "Decided to SLP %d instances. Unrolling factor ",
2538 dump_dec (MSG_NOTE
, unrolling_factor
);
2539 dump_printf (MSG_NOTE
, "\n");
2542 return (decided_to_slp
> 0);
2546 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2547 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2550 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2551 hash_map
<slp_tree
, unsigned> &visited
)
2553 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2554 imm_use_iterator imm_iter
;
2556 stmt_vec_info use_vinfo
;
2558 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2561 /* We need to union stype over the incoming graph edges but we still
2562 want to limit recursion to stay O(N+E). */
2563 unsigned visited_cnt
= ++visited
.get_or_insert (node
);
2564 gcc_assert (visited_cnt
<= node
->refcnt
);
2565 bool only_edge
= (visited_cnt
!= node
->refcnt
);
2567 /* Propagate hybrid down the SLP tree. */
2568 if (stype
== hybrid
)
2570 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2572 else if (!only_edge
)
2574 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2575 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2576 /* If we get a pattern stmt here we have to use the LHS of the
2577 original stmt for immediate uses. */
2578 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2580 if (gimple_code (stmt
) == GIMPLE_PHI
)
2581 def
= gimple_phi_result (stmt
);
2583 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2585 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2587 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2590 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2591 if (!STMT_SLP_TYPE (use_vinfo
)
2592 && (STMT_VINFO_RELEVANT (use_vinfo
)
2593 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2594 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2595 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2599 "def in non-SLP stmt: %G", use_stmt
);
2606 && !HYBRID_SLP_STMT (stmt_vinfo
))
2608 if (dump_enabled_p ())
2609 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2611 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2615 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2616 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2617 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2618 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2621 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2624 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2626 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2627 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2632 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2633 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2637 def_stmt_info
->stmt
);
2638 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2645 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2648 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2649 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2650 /* If the stmt is in a SLP instance then this isn't a reason
2651 to mark use definitions in other SLP instances as hybrid. */
2652 if (! STMT_SLP_TYPE (use_vinfo
)
2653 && (STMT_VINFO_RELEVANT (use_vinfo
)
2654 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2655 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2656 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2663 /* Find stmts that must be both vectorized and SLPed. */
2666 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2669 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2670 slp_instance instance
;
2672 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2674 /* First walk all pattern stmt in the loop and mark defs of uses as
2675 hybrid because immediate uses in them are not recorded. */
2676 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2678 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2679 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2682 gimple
*stmt
= gsi_stmt (gsi
);
2683 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2684 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2687 memset (&wi
, 0, sizeof (wi
));
2688 wi
.info
= loop_vinfo
;
2689 gimple_stmt_iterator gsi2
2690 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2691 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2692 vect_detect_hybrid_slp_1
, &wi
);
2693 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2694 vect_detect_hybrid_slp_2
,
2695 vect_detect_hybrid_slp_1
, &wi
);
2700 /* Then walk the SLP instance trees marking stmts with uses in
2701 non-SLP stmts as hybrid, also propagating hybrid down the
2702 SLP tree, collecting the above info on-the-fly. */
2703 for (unsigned j
= 0;; ++j
)
2705 hash_map
<slp_tree
, unsigned> visited
;
2707 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2708 if (j
< SLP_INSTANCE_GROUP_SIZE (instance
))
2711 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2712 j
, pure_slp
, visited
);
2720 /* Initialize a bb_vec_info struct for the statements between
2721 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2723 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2724 gimple_stmt_iterator region_end_in
,
2725 vec_info_shared
*shared
)
2726 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2727 bb (gsi_bb (region_begin_in
)),
2728 region_begin (region_begin_in
),
2729 region_end (region_end_in
)
2731 gimple_stmt_iterator gsi
;
2733 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2736 gimple
*stmt
= gsi_stmt (gsi
);
2737 gimple_set_uid (stmt
, 0);
2745 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2746 stmts in the basic block. */
2748 _bb_vec_info::~_bb_vec_info ()
2750 for (gimple_stmt_iterator si
= region_begin
;
2751 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2752 /* Reset region marker. */
2753 gimple_set_uid (gsi_stmt (si
), -1);
2758 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2759 given then that child nodes have already been processed, and that
2760 their def types currently match their SLP node's def type. */
2763 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2764 slp_instance node_instance
,
2765 stmt_vector_for_cost
*cost_vec
)
2767 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2768 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2770 /* Calculate the number of vector statements to be created for the
2771 scalar stmts in this node. For SLP reductions it is equal to the
2772 number of vector statements in the children (which has already been
2773 calculated by the recursive call). Otherwise it is the number of
2774 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2775 VF divided by the number of elements in a vector. */
2776 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2777 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2779 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2780 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2782 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2783 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2790 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2791 vf
= loop_vinfo
->vectorization_factor
;
2794 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2795 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2796 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2797 = vect_get_num_vectors (vf
* group_size
, vectype
);
2801 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2804 /* Try to build NODE from scalars, returning true on success.
2805 NODE_INSTANCE is the SLP instance that contains NODE. */
2808 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2809 slp_instance node_instance
)
2811 stmt_vec_info stmt_info
;
2814 if (!is_a
<bb_vec_info
> (vinfo
)
2815 || node
== SLP_INSTANCE_TREE (node_instance
)
2816 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_NOTE
, vect_location
,
2821 "Building vector operands from scalars instead\n");
2823 /* Don't remove and free the child nodes here, since they could be
2824 referenced by other structures. The analysis and scheduling phases
2825 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2826 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2827 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2828 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2829 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2831 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2832 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2837 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2838 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2840 Return true if the operations are supported. */
2843 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2844 slp_instance node_instance
,
2845 hash_set
<slp_tree
> &visited
,
2846 hash_set
<slp_tree
> &lvisited
,
2847 stmt_vector_for_cost
*cost_vec
)
2852 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2855 /* If we already analyzed the exact same set of scalar stmts we're done.
2856 We share the generated vector stmts for those.
2857 The SLP graph is acyclic so not caching whether we failed or succeeded
2858 doesn't result in any issue since we throw away the lvisited set
2860 if (visited
.contains (node
)
2861 || lvisited
.add (node
))
2864 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2865 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2866 visited
, lvisited
, cost_vec
))
2869 /* ??? We have to catch the case late where two first scalar stmts appear
2870 in multiple SLP children with different def type and fail. Remember
2871 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2872 match it when that is vect_internal_def. */
2873 auto_vec
<vect_def_type
, 4> dt
;
2874 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2875 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2876 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2877 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2879 /* Push SLP node def-type to stmt operands. */
2880 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2881 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2882 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2883 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2884 = SLP_TREE_DEF_TYPE (child
);
2886 /* Check everything worked out. */
2888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2889 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2891 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2893 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2894 != SLP_TREE_DEF_TYPE (child
))
2897 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2901 if (!res
&& dump_enabled_p ())
2902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2903 "not vectorized: same operand with different "
2904 "def type in stmt.\n");
2907 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2910 /* Restore def-types. */
2911 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2912 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2913 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2915 /* If this node can't be vectorized, try pruning the tree here rather
2916 than felling the whole thing. */
2917 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2924 /* Analyze statements in SLP instances of VINFO. Return true if the
2925 operations are supported. */
2928 vect_slp_analyze_operations (vec_info
*vinfo
)
2930 slp_instance instance
;
2933 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2935 hash_set
<slp_tree
> visited
;
2936 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2938 hash_set
<slp_tree
> lvisited
;
2939 stmt_vector_for_cost cost_vec
;
2940 cost_vec
.create (2);
2941 if (!vect_slp_analyze_node_operations (vinfo
,
2942 SLP_INSTANCE_TREE (instance
),
2943 instance
, visited
, lvisited
,
2945 /* Instances with a root stmt require vectorized defs for the
2947 || (SLP_INSTANCE_ROOT_STMT (instance
)
2948 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2949 != vect_internal_def
)))
2951 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2952 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_NOTE
, vect_location
,
2955 "removing SLP instance operations starting from: %G",
2957 vect_free_slp_instance (instance
, false);
2958 vinfo
->slp_instances
.ordered_remove (i
);
2959 cost_vec
.release ();
2963 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
2964 x
!= lvisited
.end(); ++x
)
2968 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2969 cost_vec
.release ();
2973 return !vinfo
->slp_instances
.is_empty ();
2977 /* Compute the scalar cost of the SLP node NODE and its children
2978 and return it. Do not account defs that are marked in LIFE and
2979 update LIFE according to uses of NODE. */
2982 vect_bb_slp_scalar_cost (basic_block bb
,
2983 slp_tree node
, vec
<bool, va_heap
> *life
,
2984 stmt_vector_for_cost
*cost_vec
,
2985 hash_set
<slp_tree
> &visited
)
2988 stmt_vec_info stmt_info
;
2991 if (visited
.add (node
))
2994 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2996 gimple
*stmt
= stmt_info
->stmt
;
2997 vec_info
*vinfo
= stmt_info
->vinfo
;
2998 ssa_op_iter op_iter
;
2999 def_operand_p def_p
;
3004 /* If there is a non-vectorized use of the defs then the scalar
3005 stmt is kept live in which case we do not account it or any
3006 required defs in the SLP children in the scalar cost. This
3007 way we make the vectorization more costly when compared to
3009 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3011 imm_use_iterator use_iter
;
3013 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3014 if (!is_gimple_debug (use_stmt
))
3016 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3017 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
3020 BREAK_FROM_IMM_USE_STMT (use_iter
);
3027 /* Count scalar stmts only once. */
3028 if (gimple_visited_p (stmt
))
3030 gimple_set_visited (stmt
, true);
3032 vect_cost_for_stmt kind
;
3033 if (STMT_VINFO_DATA_REF (stmt_info
))
3035 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
3038 kind
= scalar_store
;
3040 else if (vect_nop_conversion_p (stmt_info
))
3044 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
3047 auto_vec
<bool, 20> subtree_life
;
3048 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3050 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3052 /* Do not directly pass LIFE to the recursive call, copy it to
3053 confine changes in the callee to the current child/subtree. */
3054 subtree_life
.safe_splice (*life
);
3055 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
3057 subtree_life
.truncate (0);
3062 /* Check if vectorization of the basic block is profitable. */
3065 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3067 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3068 slp_instance instance
;
3070 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3071 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3073 /* Calculate scalar cost. */
3074 stmt_vector_for_cost scalar_costs
;
3075 scalar_costs
.create (0);
3076 hash_set
<slp_tree
> visited
;
3077 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3079 auto_vec
<bool, 20> life
;
3080 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
3081 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
3082 SLP_INSTANCE_TREE (instance
),
3083 &life
, &scalar_costs
, visited
);
3085 void *target_cost_data
= init_cost (NULL
);
3086 add_stmt_costs (target_cost_data
, &scalar_costs
);
3087 scalar_costs
.release ();
3089 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3090 destroy_cost_data (target_cost_data
);
3092 /* Unset visited flag. */
3093 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
3094 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3095 gimple_set_visited (gsi_stmt (gsi
), false);
3097 /* Complete the target-specific cost calculation. */
3098 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3099 &vec_inside_cost
, &vec_epilogue_cost
);
3101 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3103 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3106 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3108 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3109 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3110 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3113 /* Vectorization is profitable if its cost is more than the cost of scalar
3114 version. Note that we err on the vector side for equal cost because
3115 the cost estimate is otherwise quite pessimistic (constant uses are
3116 free on the scalar side but cost a load on the vector side for
3118 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3124 /* Find any vectorizable constructors and add them to the grouped_store
3128 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3130 gimple_stmt_iterator gsi
;
3132 for (gsi
= bb_vinfo
->region_begin
;
3133 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3135 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3136 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3139 tree rhs
= gimple_assign_rhs1 (stmt
);
3140 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3141 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3142 CONSTRUCTOR_NELTS (rhs
))
3143 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3144 || uniform_vector_p (rhs
))
3147 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3148 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3152 /* Check if the region described by BB_VINFO can be vectorized, returning
3153 true if so. When returning false, set FATAL to true if the same failure
3154 would prevent vectorization at other vector sizes, false if it is still
3155 worth trying other sizes. N_STMTS is the number of statements in the
3159 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3161 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3163 slp_instance instance
;
3165 poly_uint64 min_vf
= 2;
3167 /* The first group of checks is independent of the vector size. */
3170 /* Analyze the data references. */
3172 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3176 "not vectorized: unhandled data-ref in basic "
3181 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3185 "not vectorized: not enough data-refs in "
3190 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3194 "not vectorized: unhandled data access in "
3199 vect_slp_check_for_constructors (bb_vinfo
);
3201 /* If there are no grouped stores in the region there is no need
3202 to continue with pattern recog as vect_analyze_slp will fail
3204 if (bb_vinfo
->grouped_stores
.is_empty ())
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3208 "not vectorized: no grouped stores in "
3213 /* While the rest of the analysis below depends on it in some way. */
3216 vect_pattern_recog (bb_vinfo
);
3218 /* Check the SLP opportunities in the basic block, analyze and build SLP
3220 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3222 if (dump_enabled_p ())
3224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3225 "Failed to SLP the basic block.\n");
3226 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3227 "not vectorized: failed to find SLP opportunities "
3228 "in basic block.\n");
3233 vect_record_base_alignments (bb_vinfo
);
3235 /* Analyze and verify the alignment of data references and the
3236 dependence in the SLP instances. */
3237 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3239 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3240 || ! vect_slp_analyze_instance_dependence (instance
))
3242 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3243 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE
, vect_location
,
3246 "removing SLP instance operations starting from: %G",
3248 vect_free_slp_instance (instance
, false);
3249 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3253 /* Mark all the statements that we want to vectorize as pure SLP and
3255 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3256 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3257 if (SLP_INSTANCE_ROOT_STMT (instance
))
3258 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3262 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3265 if (!vect_slp_analyze_operations (bb_vinfo
))
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3269 "not vectorized: bad operation in basic block.\n");
3273 /* Cost model: check if the vectorization is worthwhile. */
3274 if (!unlimited_cost_model (NULL
)
3275 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3279 "not vectorized: vectorization is not "
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_NOTE
, vect_location
,
3286 "Basic block will be vectorized using SLP\n");
3290 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3291 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3292 on success. The region has N_STMTS statements and has the datarefs
3293 given by DATAREFS. */
3296 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3297 gimple_stmt_iterator region_end
,
3298 vec
<data_reference_p
> datarefs
,
3299 unsigned int n_stmts
)
3301 bb_vec_info bb_vinfo
;
3302 auto_vector_modes vector_modes
;
3304 /* Autodetect first vector size we try. */
3305 machine_mode next_vector_mode
= VOIDmode
;
3306 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3307 unsigned int mode_i
= 0;
3309 vec_info_shared shared
;
3311 machine_mode autodetected_vector_mode
= VOIDmode
;
3314 bool vectorized
= false;
3316 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3318 bool first_time_p
= shared
.datarefs
.is_empty ();
3319 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3321 bb_vinfo
->shared
->save_datarefs ();
3323 bb_vinfo
->shared
->check_datarefs ();
3324 bb_vinfo
->vector_mode
= next_vector_mode
;
3326 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3327 && dbg_cnt (vect_slp
))
3329 if (dump_enabled_p ())
3331 dump_printf_loc (MSG_NOTE
, vect_location
,
3332 "***** Analysis succeeded with vector mode"
3333 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3334 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3337 bb_vinfo
->shared
->check_datarefs ();
3338 vect_schedule_slp (bb_vinfo
);
3340 unsigned HOST_WIDE_INT bytes
;
3341 if (dump_enabled_p ())
3343 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3344 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3345 "basic block part vectorized using %wu byte "
3346 "vectors\n", bytes
);
3348 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3349 "basic block part vectorized using variable "
3350 "length vectors\n");
3357 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_NOTE
, vect_location
,
3359 "***** Analysis failed with vector mode %s\n",
3360 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3364 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3367 while (mode_i
< vector_modes
.length ()
3368 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_NOTE
, vect_location
,
3372 "***** The result for vector mode %s would"
3374 GET_MODE_NAME (vector_modes
[mode_i
]));
3380 if (mode_i
< vector_modes
.length ()
3381 && VECTOR_MODE_P (autodetected_vector_mode
)
3382 && (related_vector_mode (vector_modes
[mode_i
],
3383 GET_MODE_INNER (autodetected_vector_mode
))
3384 == autodetected_vector_mode
)
3385 && (related_vector_mode (autodetected_vector_mode
,
3386 GET_MODE_INNER (vector_modes
[mode_i
]))
3387 == vector_modes
[mode_i
]))
3389 if (dump_enabled_p ())
3390 dump_printf_loc (MSG_NOTE
, vect_location
,
3391 "***** Skipping vector mode %s, which would"
3392 " repeat the analysis for %s\n",
3393 GET_MODE_NAME (vector_modes
[mode_i
]),
3394 GET_MODE_NAME (autodetected_vector_mode
));
3399 || mode_i
== vector_modes
.length ()
3400 || autodetected_vector_mode
== VOIDmode
3401 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3402 vector sizes will fail do not bother iterating. */
3406 /* Try the next biggest vector size. */
3407 next_vector_mode
= vector_modes
[mode_i
++];
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_NOTE
, vect_location
,
3410 "***** Re-trying analysis with vector mode %s\n",
3411 GET_MODE_NAME (next_vector_mode
));
3415 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3416 true if anything in the basic-block was vectorized. */
3419 vect_slp_bb (basic_block bb
)
3421 gimple_stmt_iterator gsi
;
3422 bool any_vectorized
= false;
3424 gsi
= gsi_start_bb (bb
);
3425 while (!gsi_end_p (gsi
))
3427 gimple_stmt_iterator region_begin
= gsi
;
3428 vec
<data_reference_p
> datarefs
= vNULL
;
3431 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3433 gimple
*stmt
= gsi_stmt (gsi
);
3434 if (is_gimple_debug (stmt
))
3438 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3439 vect_location
= stmt
;
3441 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3445 /* Skip leading unhandled stmts. */
3446 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3452 gimple_stmt_iterator region_end
= gsi
;
3454 if (insns
> param_slp_max_insns_in_bb
)
3456 if (dump_enabled_p ())
3457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3458 "not vectorized: too many instructions in "
3461 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3462 any_vectorized
= true;
3464 if (gsi_end_p (region_end
))
3467 /* Skip the unhandled stmt. */
3471 return any_vectorized
;
3475 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3478 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
, unsigned op_num
)
3480 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3482 enum vect_def_type dt
;
3484 /* For comparison and COND_EXPR type is chosen depending
3485 on the non-constant other comparison operand. */
3486 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3488 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3489 op
= gimple_assign_rhs1 (stmt
);
3491 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3494 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3497 if (code
== COND_EXPR
)
3499 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3500 tree cond
= gimple_assign_rhs1 (stmt
);
3502 if (TREE_CODE (cond
) == SSA_NAME
)
3505 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3511 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3512 op
= TREE_OPERAND (cond
, 0);
3515 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3518 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3521 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3524 /* Build a variable-length vector in which the elements in ELTS are repeated
3525 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3526 RESULTS and add any new instructions to SEQ.
3528 The approach we use is:
3530 (1) Find a vector mode VM with integer elements of mode IM.
3532 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3533 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3534 from small vectors to IM.
3536 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3538 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3539 correct byte contents.
3541 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3543 We try to find the largest IM for which this sequence works, in order
3544 to cut down on the number of interleaves. */
3547 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3548 vec
<tree
> elts
, unsigned int nresults
,
3551 unsigned int nelts
= elts
.length ();
3552 tree element_type
= TREE_TYPE (vector_type
);
3554 /* (1) Find a vector mode VM with integer elements of mode IM. */
3555 unsigned int nvectors
= 1;
3556 tree new_vector_type
;
3558 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3559 &nvectors
, &new_vector_type
,
3563 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3564 unsigned int partial_nelts
= nelts
/ nvectors
;
3565 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3567 tree_vector_builder partial_elts
;
3568 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3569 pieces
.quick_grow (nvectors
* 2);
3570 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3572 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3573 ELTS' has mode IM. */
3574 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3575 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3576 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3577 tree t
= gimple_build_vector (seq
, &partial_elts
);
3578 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3579 TREE_TYPE (new_vector_type
), t
);
3581 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3582 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3585 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3586 correct byte contents.
3588 We need to repeat the following operation log2(nvectors) times:
3590 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3591 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3593 However, if each input repeats every N elements and the VF is
3594 a multiple of N * 2, the HI result is the same as the LO. */
3595 unsigned int in_start
= 0;
3596 unsigned int out_start
= nvectors
;
3597 unsigned int hi_start
= nvectors
/ 2;
3598 /* A bound on the number of outputs needed to produce NRESULTS results
3599 in the final iteration. */
3600 unsigned int noutputs_bound
= nvectors
* nresults
;
3601 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3603 noutputs_bound
/= 2;
3604 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3605 for (unsigned int i
= 0; i
< limit
; ++i
)
3608 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3611 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3615 tree output
= make_ssa_name (new_vector_type
);
3616 tree input1
= pieces
[in_start
+ (i
/ 2)];
3617 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3618 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3621 gimple_seq_add_stmt (seq
, stmt
);
3622 pieces
[out_start
+ i
] = output
;
3624 std::swap (in_start
, out_start
);
3627 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3628 results
.reserve (nresults
);
3629 for (unsigned int i
= 0; i
< nresults
; ++i
)
3631 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3632 pieces
[in_start
+ i
]));
3634 results
.quick_push (results
[i
- nvectors
]);
3638 /* For constant and loop invariant defs of SLP_NODE this function returns
3639 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3640 OP_NODE determines the node for the operand containing the scalar
3644 vect_get_constant_vectors (slp_tree slp_node
, unsigned op_num
,
3645 vec
<tree
> *vec_oprnds
)
3647 slp_tree op_node
= SLP_TREE_CHILDREN (slp_node
)[op_num
];
3648 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3649 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3650 unsigned HOST_WIDE_INT nunits
;
3652 unsigned j
, number_of_places_left_in_vector
;
3655 int group_size
= op_node
->ops
.length ();
3656 unsigned int vec_num
, i
;
3657 unsigned number_of_copies
= 1;
3659 tree neutral_op
= NULL
;
3660 gimple_seq ctor_seq
= NULL
;
3661 auto_vec
<tree
, 16> permute_results
;
3663 /* ??? SLP analysis should compute the vector type for the
3664 constant / invariant and store it in the SLP node. */
3665 tree op
= op_node
->ops
[0];
3666 /* Check if vector type is a boolean vector. */
3667 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3668 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3669 && vect_mask_constant_operand_p (stmt_vinfo
, op_num
))
3670 vector_type
= truth_type_for (stmt_vectype
);
3672 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), op_node
);
3674 /* ??? For lane-reducing ops we should also have the required number
3675 of vector stmts initialized rather than second-guessing here. */
3676 unsigned int number_of_vectors
;
3677 if (is_gimple_assign (stmt_vinfo
->stmt
)
3678 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3679 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3680 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3681 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3684 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3685 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3687 vec_oprnds
->create (number_of_vectors
);
3688 auto_vec
<tree
> voprnds (number_of_vectors
);
3690 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3691 created vectors. It is greater than 1 if unrolling is performed.
3693 For example, we have two scalar operands, s1 and s2 (e.g., group of
3694 strided accesses of size two), while NUNITS is four (i.e., four scalars
3695 of this type can be packed in a vector). The output vector will contain
3696 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3699 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3700 containing the operands.
3702 For example, NUNITS is four as before, and the group size is 8
3703 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3704 {s5, s6, s7, s8}. */
3706 /* When using duplicate_and_interleave, we just need one element for
3707 each scalar statement. */
3708 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3709 nunits
= group_size
;
3711 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3713 number_of_places_left_in_vector
= nunits
;
3715 tree_vector_builder
elts (vector_type
, nunits
, 1);
3716 elts
.quick_grow (nunits
);
3717 bool place_after_defs
= false;
3718 for (j
= 0; j
< number_of_copies
; j
++)
3720 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3722 /* Create 'vect_ = {op0,op1,...,opn}'. */
3723 number_of_places_left_in_vector
--;
3725 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3727 if (CONSTANT_CLASS_P (op
))
3729 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3731 /* Can't use VIEW_CONVERT_EXPR for booleans because
3732 of possibly different sizes of scalar value and
3734 if (integer_zerop (op
))
3735 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3736 else if (integer_onep (op
))
3737 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3742 op
= fold_unary (VIEW_CONVERT_EXPR
,
3743 TREE_TYPE (vector_type
), op
);
3744 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3748 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3750 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3753 = build_all_ones_cst (TREE_TYPE (vector_type
));
3755 = build_zero_cst (TREE_TYPE (vector_type
));
3756 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3757 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3763 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3766 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3769 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3773 elts
[number_of_places_left_in_vector
] = op
;
3774 if (!CONSTANT_CLASS_P (op
))
3776 if (TREE_CODE (orig_op
) == SSA_NAME
3777 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3778 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3779 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3780 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3781 place_after_defs
= true;
3783 if (number_of_places_left_in_vector
== 0)
3786 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3787 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3788 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3791 if (permute_results
.is_empty ())
3792 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3793 elts
, number_of_vectors
,
3795 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3798 gimple_stmt_iterator gsi
;
3799 if (place_after_defs
)
3801 stmt_vec_info last_stmt_info
3802 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3803 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3804 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3808 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3810 if (ctor_seq
!= NULL
)
3812 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3813 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3816 voprnds
.quick_push (init
);
3817 place_after_defs
= false;
3818 number_of_places_left_in_vector
= nunits
;
3820 elts
.new_vector (vector_type
, nunits
, 1);
3821 elts
.quick_grow (nunits
);
3826 /* Since the vectors are created in the reverse order, we should invert
3828 vec_num
= voprnds
.length ();
3829 for (j
= vec_num
; j
!= 0; j
--)
3831 vop
= voprnds
[j
- 1];
3832 vec_oprnds
->quick_push (vop
);
3835 /* In case that VF is greater than the unrolling factor needed for the SLP
3836 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3837 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3838 to replicate the vectors. */
3839 while (number_of_vectors
> vec_oprnds
->length ())
3841 tree neutral_vec
= NULL
;
3846 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3848 vec_oprnds
->quick_push (neutral_vec
);
3852 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3853 vec_oprnds
->quick_push (vop
);
3859 /* Get vectorized definitions from SLP_NODE that contains corresponding
3860 vectorized def-stmts. */
3863 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3865 stmt_vec_info vec_def_stmt_info
;
3868 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3870 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3871 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3875 /* Get N vectorized definitions for SLP_NODE.
3876 If the scalar definitions are loop invariants or constants, collect them and
3877 call vect_get_constant_vectors() to create vector stmts.
3878 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3879 must be stored in the corresponding child of SLP_NODE, and we call
3880 vect_get_slp_vect_defs () to retrieve them. */
3883 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3886 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3888 for (unsigned i
= 0; i
< n
; ++i
)
3890 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3892 vec
<tree
> vec_defs
= vNULL
;
3894 /* For each operand we check if it has vectorized definitions in a child
3895 node or we need to create them (for invariants and constants). */
3896 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3898 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3899 vect_get_slp_vect_defs (child
, &vec_defs
);
3902 vect_get_constant_vectors (slp_node
, i
, &vec_defs
);
3904 vec_oprnds
->quick_push (vec_defs
);
3908 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3909 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3910 permute statements for the SLP node NODE of the SLP instance
3911 SLP_NODE_INSTANCE. */
3914 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3915 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3916 slp_instance slp_node_instance
, bool analyze_only
,
3919 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3920 vec_info
*vinfo
= stmt_info
->vinfo
;
3922 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3923 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3924 unsigned int mask_element
;
3927 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3930 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3932 mode
= TYPE_MODE (vectype
);
3933 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3935 /* Initialize the vect stmts of NODE to properly insert the generated
3938 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3939 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3940 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3942 /* Generate permutation masks for every NODE. Number of masks for each NODE
3943 is equal to GROUP_SIZE.
3944 E.g., we have a group of three nodes with three loads from the same
3945 location in each node, and the vector size is 4. I.e., we have a
3946 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3947 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3948 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3951 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3952 The last mask is illegal since we assume two operands for permute
3953 operation, and the mask element values can't be outside that range.
3954 Hence, the last mask must be converted into {2,5,5,5}.
3955 For the first two permutations we need the first and the second input
3956 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3957 we need the second and the third vectors: {b1,c1,a2,b2} and
3960 int vect_stmts_counter
= 0;
3961 unsigned int index
= 0;
3962 int first_vec_index
= -1;
3963 int second_vec_index
= -1;
3967 vec_perm_builder mask
;
3968 unsigned int nelts_to_build
;
3969 unsigned int nvectors_per_build
;
3970 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3971 && multiple_p (nunits
, group_size
));
3974 /* A single vector contains a whole number of copies of the node, so:
3975 (a) all permutes can use the same mask; and
3976 (b) the permutes only need a single vector input. */
3977 mask
.new_vector (nunits
, group_size
, 3);
3978 nelts_to_build
= mask
.encoded_nelts ();
3979 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3983 /* We need to construct a separate mask for each vector statement. */
3984 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3985 if (!nunits
.is_constant (&const_nunits
)
3986 || !vf
.is_constant (&const_vf
))
3988 mask
.new_vector (const_nunits
, const_nunits
, 1);
3989 nelts_to_build
= const_vf
* group_size
;
3990 nvectors_per_build
= 1;
3993 unsigned int count
= mask
.encoded_nelts ();
3994 mask
.quick_grow (count
);
3995 vec_perm_indices indices
;
3997 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3999 unsigned int iter_num
= j
/ group_size
;
4000 unsigned int stmt_num
= j
% group_size
;
4001 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
4002 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
4005 first_vec_index
= 0;
4010 /* Enforced before the loop when !repeating_p. */
4011 unsigned int const_nunits
= nunits
.to_constant ();
4012 vec_index
= i
/ const_nunits
;
4013 mask_element
= i
% const_nunits
;
4014 if (vec_index
== first_vec_index
4015 || first_vec_index
== -1)
4017 first_vec_index
= vec_index
;
4019 else if (vec_index
== second_vec_index
4020 || second_vec_index
== -1)
4022 second_vec_index
= vec_index
;
4023 mask_element
+= const_nunits
;
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4029 "permutation requires at "
4030 "least three vectors %G",
4032 gcc_assert (analyze_only
);
4036 gcc_assert (mask_element
< 2 * const_nunits
);
4039 if (mask_element
!= index
)
4041 mask
[index
++] = mask_element
;
4043 if (index
== count
&& !noop_p
)
4045 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4046 if (!can_vec_perm_const_p (mode
, indices
))
4048 if (dump_enabled_p ())
4050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4052 "unsupported vect permute { ");
4053 for (i
= 0; i
< count
; ++i
)
4055 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4056 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4058 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4060 gcc_assert (analyze_only
);
4071 tree mask_vec
= NULL_TREE
;
4074 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4076 if (second_vec_index
== -1)
4077 second_vec_index
= first_vec_index
;
4079 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4081 /* Generate the permute statement if necessary. */
4082 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4083 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4084 stmt_vec_info perm_stmt_info
;
4087 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4089 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4091 perm_dest
= make_ssa_name (perm_dest
);
4093 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4094 first_vec
, second_vec
,
4097 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
4101 /* If mask was NULL_TREE generate the requested
4102 identity transform. */
4103 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
4105 /* Store the vector statement in NODE. */
4106 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
4112 first_vec_index
= -1;
4113 second_vec_index
= -1;
4121 /* Vectorize SLP instance tree in postorder. */
4124 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
)
4126 gimple_stmt_iterator si
;
4127 stmt_vec_info stmt_info
;
4128 unsigned int group_size
;
4133 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4136 /* See if we have already vectorized the node in the graph of the
4138 if (SLP_TREE_VEC_STMTS (node
).exists ())
4141 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4142 vect_schedule_slp_instance (child
, instance
);
4144 /* Push SLP node def-type to stmts. */
4145 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4146 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4148 stmt_vec_info child_stmt_info
;
4149 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4150 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4153 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4155 /* VECTYPE is the type of the destination. */
4156 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4157 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4158 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
4160 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4161 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4163 if (dump_enabled_p ())
4164 dump_printf_loc (MSG_NOTE
, vect_location
,
4165 "------>vectorizing SLP node starting from: %G",
4168 /* Vectorized stmts go before the last scalar stmt which is where
4169 all uses are ready. */
4170 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4171 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4173 /* Handle two-operation SLP nodes by vectorizing the group with
4174 both operations and then performing a merge. */
4175 bool done_p
= false;
4176 if (SLP_TREE_TWO_OPERATORS (node
))
4178 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4179 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4180 enum tree_code ocode
= ERROR_MARK
;
4181 stmt_vec_info ostmt_info
;
4182 vec_perm_builder
mask (group_size
, group_size
, 1);
4183 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4185 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4186 if (gimple_assign_rhs_code (ostmt
) != code0
)
4188 mask
.quick_push (1);
4189 ocode
= gimple_assign_rhs_code (ostmt
);
4192 mask
.quick_push (0);
4194 if (ocode
!= ERROR_MARK
)
4196 vec
<stmt_vec_info
> v0
;
4197 vec
<stmt_vec_info
> v1
;
4199 tree tmask
= NULL_TREE
;
4200 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4201 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4202 SLP_TREE_VEC_STMTS (node
).truncate (0);
4203 gimple_assign_set_rhs_code (stmt
, ocode
);
4204 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4205 gimple_assign_set_rhs_code (stmt
, code0
);
4206 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4207 SLP_TREE_VEC_STMTS (node
).truncate (0);
4208 tree meltype
= build_nonstandard_integer_type
4209 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4210 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4212 for (j
= 0; j
< v0
.length (); ++j
)
4214 /* Enforced by vect_build_slp_tree, which rejects variable-length
4215 vectors for SLP_TREE_TWO_OPERATORS. */
4216 unsigned int const_nunits
= nunits
.to_constant ();
4217 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4218 for (l
= 0; l
< const_nunits
; ++l
)
4220 if (k
>= group_size
)
4222 tree t
= build_int_cst (meltype
,
4223 mask
[k
++] * const_nunits
+ l
);
4224 melts
.quick_push (t
);
4226 tmask
= melts
.build ();
4228 /* ??? Not all targets support a VEC_PERM_EXPR with a
4229 constant mask that would translate to a vec_merge RTX
4230 (with their vec_perm_const_ok). We can either not
4231 vectorize in that case or let veclower do its job.
4232 Unfortunately that isn't too great and at least for
4233 plus/minus we'd eventually like to match targets
4234 vector addsub instructions. */
4236 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4238 gimple_assign_lhs (v0
[j
]->stmt
),
4239 gimple_assign_lhs (v1
[j
]->stmt
),
4241 SLP_TREE_VEC_STMTS (node
).quick_push
4242 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4250 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4252 /* Restore stmt def-types. */
4253 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4254 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4256 stmt_vec_info child_stmt_info
;
4257 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4258 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4262 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4263 For loop vectorization this is done in vectorizable_call, but for SLP
4264 it needs to be deferred until end of vect_schedule_slp, because multiple
4265 SLP instances may refer to the same scalar stmt. */
4268 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4271 gimple_stmt_iterator gsi
;
4275 stmt_vec_info stmt_info
;
4277 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4280 if (visited
.add (node
))
4283 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4284 vect_remove_slp_scalar_calls (child
, visited
);
4286 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4288 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4289 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4291 if (is_pattern_stmt_p (stmt_info
)
4292 || !PURE_SLP_STMT (stmt_info
))
4294 lhs
= gimple_call_lhs (stmt
);
4295 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4296 gsi
= gsi_for_stmt (stmt
);
4297 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4298 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4303 vect_remove_slp_scalar_calls (slp_tree node
)
4305 hash_set
<slp_tree
> visited
;
4306 vect_remove_slp_scalar_calls (node
, visited
);
4309 /* Vectorize the instance root. */
4312 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4314 gassign
*rstmt
= NULL
;
4316 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4318 stmt_vec_info child_stmt_info
;
4321 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4323 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4324 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4325 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4326 TREE_TYPE (vect_lhs
)))
4327 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4329 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4333 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4335 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4336 stmt_vec_info child_stmt_info
;
4338 vec
<constructor_elt
, va_gc
> *v
;
4339 vec_alloc (v
, nelts
);
4341 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4343 CONSTRUCTOR_APPEND_ELT (v
,
4345 gimple_get_lhs (child_stmt_info
->stmt
));
4347 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4348 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4349 tree r_constructor
= build_constructor (rtype
, v
);
4350 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4355 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4356 gsi_replace (&rgsi
, rstmt
, true);
4359 /* Generate vector code for all SLP instances in the loop/basic block. */
4362 vect_schedule_slp (vec_info
*vinfo
)
4364 vec
<slp_instance
> slp_instances
;
4365 slp_instance instance
;
4368 slp_instances
= vinfo
->slp_instances
;
4369 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4371 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4372 /* Schedule the tree of INSTANCE. */
4373 vect_schedule_slp_instance (node
, instance
);
4375 if (SLP_INSTANCE_ROOT_STMT (instance
))
4376 vectorize_slp_instance_root_stmt (node
, instance
);
4378 if (dump_enabled_p ())
4379 dump_printf_loc (MSG_NOTE
, vect_location
,
4380 "vectorizing stmts using SLP.\n");
4383 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4385 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4386 stmt_vec_info store_info
;
4389 /* Remove scalar call stmts. Do not do this for basic-block
4390 vectorization as not all uses may be vectorized.
4391 ??? Why should this be necessary? DCE should be able to
4392 remove the stmts itself.
4393 ??? For BB vectorization we can as well remove scalar
4394 stmts starting from the SLP tree root if they have no
4396 if (is_a
<loop_vec_info
> (vinfo
))
4397 vect_remove_slp_scalar_calls (root
);
4399 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4400 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4402 if (!STMT_VINFO_DATA_REF (store_info
))
4405 if (SLP_INSTANCE_ROOT_STMT (instance
))
4408 store_info
= vect_orig_stmt (store_info
);
4409 /* Free the attached stmt_vec_info and remove the stmt. */
4410 vinfo
->remove_stmt (store_info
);