1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
52 static bool vectorizable_slp_permutation (vec_info
*, gimple_stmt_iterator
*,
53 slp_tree
, stmt_vector_for_cost
*);
55 /* Initialize a SLP node. */
57 _slp_tree::_slp_tree ()
59 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
60 SLP_TREE_SCALAR_OPS (this) = vNULL
;
61 SLP_TREE_VEC_STMTS (this) = vNULL
;
62 SLP_TREE_VEC_DEFS (this) = vNULL
;
63 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
64 SLP_TREE_CHILDREN (this) = vNULL
;
65 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
66 SLP_TREE_LANE_PERMUTATION (this) = vNULL
;
67 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
68 SLP_TREE_CODE (this) = ERROR_MARK
;
69 SLP_TREE_VECTYPE (this) = NULL_TREE
;
70 SLP_TREE_REPRESENTATIVE (this) = NULL
;
71 SLP_TREE_REF_COUNT (this) = 1;
76 /* Tear down a SLP node. */
78 _slp_tree::~_slp_tree ()
80 SLP_TREE_CHILDREN (this).release ();
81 SLP_TREE_SCALAR_STMTS (this).release ();
82 SLP_TREE_SCALAR_OPS (this).release ();
83 SLP_TREE_VEC_STMTS (this).release ();
84 SLP_TREE_VEC_DEFS (this).release ();
85 SLP_TREE_LOAD_PERMUTATION (this).release ();
86 SLP_TREE_LANE_PERMUTATION (this).release ();
89 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
92 vect_free_slp_tree (slp_tree node
)
97 if (--SLP_TREE_REF_COUNT (node
) != 0)
100 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
101 vect_free_slp_tree (child
);
106 /* Return a location suitable for dumpings related to the SLP instance. */
109 _slp_instance::location () const
112 return root_stmt
->stmt
;
114 return SLP_TREE_SCALAR_STMTS (root
)[0]->stmt
;
118 /* Free the memory allocated for the SLP instance. */
121 vect_free_slp_instance (slp_instance instance
)
123 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
124 SLP_INSTANCE_LOADS (instance
).release ();
125 instance
->subgraph_entries
.release ();
126 instance
->cost_vec
.release ();
131 /* Create an SLP node for SCALAR_STMTS. */
134 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
136 slp_tree node
= new _slp_tree
;
137 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
138 SLP_TREE_CHILDREN (node
).create (nops
);
139 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
140 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
141 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
145 /* Create an SLP node for OPS. */
148 vect_create_new_slp_node (vec
<tree
> ops
)
150 slp_tree node
= new _slp_tree
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
153 SLP_TREE_LANES (node
) = ops
.length ();
158 /* This structure is used in creation of an SLP tree. Each instance
159 corresponds to the same operand in a group of scalar stmts in an SLP
161 typedef struct _slp_oprnd_info
163 /* Def-stmts for the operands. */
164 vec
<stmt_vec_info
> def_stmts
;
167 /* Information about the first statement, its vector def-type, type, the
168 operand itself in case it's constant, and an indication if it's a pattern
171 enum vect_def_type first_dt
;
176 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
178 static vec
<slp_oprnd_info
>
179 vect_create_oprnd_info (int nops
, int group_size
)
182 slp_oprnd_info oprnd_info
;
183 vec
<slp_oprnd_info
> oprnds_info
;
185 oprnds_info
.create (nops
);
186 for (i
= 0; i
< nops
; i
++)
188 oprnd_info
= XNEW (struct _slp_oprnd_info
);
189 oprnd_info
->def_stmts
.create (group_size
);
190 oprnd_info
->ops
.create (group_size
);
191 oprnd_info
->first_dt
= vect_uninitialized_def
;
192 oprnd_info
->first_op_type
= NULL_TREE
;
193 oprnd_info
->any_pattern
= false;
194 oprnds_info
.quick_push (oprnd_info
);
201 /* Free operands info. */
204 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
207 slp_oprnd_info oprnd_info
;
209 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
211 oprnd_info
->def_stmts
.release ();
212 oprnd_info
->ops
.release ();
213 XDELETE (oprnd_info
);
216 oprnds_info
.release ();
220 /* Return true if STMTS contains a pattern statement. */
223 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
225 stmt_vec_info stmt_info
;
227 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
228 if (is_pattern_stmt_p (stmt_info
))
233 /* Return true when all lanes in the external or constant NODE have
237 vect_slp_tree_uniform_p (slp_tree node
)
239 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
240 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
);
242 /* Pre-exsting vectors. */
243 if (SLP_TREE_SCALAR_OPS (node
).is_empty ())
247 tree op
, first
= NULL_TREE
;
248 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
251 else if (!operand_equal_p (first
, op
, 0))
257 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
258 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
262 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
263 stmt_vec_info first_stmt_info
)
265 stmt_vec_info next_stmt_info
= first_stmt_info
;
268 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
273 if (next_stmt_info
== stmt_info
)
275 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
277 result
+= DR_GROUP_GAP (next_stmt_info
);
279 while (next_stmt_info
);
284 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
285 using the method implemented by duplicate_and_interleave. Return true
286 if so, returning the number of intermediate vectors in *NVECTORS_OUT
287 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
291 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
292 tree elt_type
, unsigned int *nvectors_out
,
293 tree
*vector_type_out
,
296 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
297 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
300 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
301 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
302 unsigned int nvectors
= 1;
305 scalar_int_mode int_mode
;
306 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
307 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
309 /* Get the natural vector type for this SLP group size. */
310 tree int_type
= build_nonstandard_integer_type
311 (GET_MODE_BITSIZE (int_mode
), 1);
313 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
315 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
316 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
317 GET_MODE_SIZE (base_vector_mode
)))
319 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
320 together into elements of type INT_TYPE and using the result
321 to build NVECTORS vectors. */
322 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
323 vec_perm_builder
sel1 (nelts
, 2, 3);
324 vec_perm_builder
sel2 (nelts
, 2, 3);
325 poly_int64 half_nelts
= exact_div (nelts
, 2);
326 for (unsigned int i
= 0; i
< 3; ++i
)
329 sel1
.quick_push (i
+ nelts
);
330 sel2
.quick_push (half_nelts
+ i
);
331 sel2
.quick_push (half_nelts
+ i
+ nelts
);
333 vec_perm_indices
indices1 (sel1
, 2, nelts
);
334 vec_perm_indices
indices2 (sel2
, 2, nelts
);
335 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
336 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
339 *nvectors_out
= nvectors
;
341 *vector_type_out
= vector_type
;
344 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
346 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
353 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
359 /* Return true if DTA and DTB match. */
362 vect_def_types_match (enum vect_def_type dta
, enum vect_def_type dtb
)
365 || ((dta
== vect_external_def
|| dta
== vect_constant_def
)
366 && (dtb
== vect_external_def
|| dtb
== vect_constant_def
)));
369 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
370 they are of a valid type and that they match the defs of the first stmt of
371 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
372 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
373 indicates swap is required for cond_expr stmts. Specifically, *SWAP
374 is 1 if STMT is cond and operands of comparison need to be swapped;
375 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
376 If there is any operand swap in this function, *SWAP is set to non-zero
378 If there was a fatal error return -1; if the error could be corrected by
379 swapping operands of father node of this one, return 1; if everything is
382 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char swap
,
383 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
384 vec
<slp_oprnd_info
> *oprnds_info
)
386 stmt_vec_info stmt_info
= stmts
[stmt_num
];
388 unsigned int i
, number_of_oprnds
;
389 enum vect_def_type dt
= vect_uninitialized_def
;
390 slp_oprnd_info oprnd_info
;
391 int first_op_idx
= 1;
392 unsigned int commutative_op
= -1U;
393 bool first_op_cond
= false;
394 bool first
= stmt_num
== 0;
396 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
398 number_of_oprnds
= gimple_call_num_args (stmt
);
400 if (gimple_call_internal_p (stmt
))
402 internal_fn ifn
= gimple_call_internal_fn (stmt
);
403 commutative_op
= first_commutative_argument (ifn
);
405 /* Masked load, only look at mask. */
406 if (ifn
== IFN_MASK_LOAD
)
408 number_of_oprnds
= 1;
409 /* Mask operand index. */
414 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
416 enum tree_code code
= gimple_assign_rhs_code (stmt
);
417 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
418 /* Swap can only be done for cond_expr if asked to, otherwise we
419 could result in different comparison code to the first stmt. */
420 if (code
== COND_EXPR
421 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
423 first_op_cond
= true;
427 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
432 bool swapped
= (swap
!= 0);
433 gcc_assert (!swapped
|| first_op_cond
);
434 enum vect_def_type
*dts
= XALLOCAVEC (enum vect_def_type
, number_of_oprnds
);
435 for (i
= 0; i
< number_of_oprnds
; i
++)
439 /* Map indicating how operands of cond_expr should be swapped. */
440 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
441 int *map
= maps
[swap
];
444 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
445 first_op_idx
), map
[i
]);
447 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
450 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
451 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
452 oprnd
= TREE_OPERAND (oprnd
, 0);
454 oprnd_info
= (*oprnds_info
)[i
];
456 stmt_vec_info def_stmt_info
;
457 if (!vect_is_simple_use (oprnd
, vinfo
, &dts
[i
], &def_stmt_info
))
459 if (dump_enabled_p ())
460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
461 "Build SLP failed: can't analyze def for %T\n",
467 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
468 oprnd_info
->any_pattern
= true;
470 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
471 oprnd_info
->ops
.quick_push (oprnd
);
475 tree type
= TREE_TYPE (oprnd
);
477 if ((dt
== vect_constant_def
478 || dt
== vect_external_def
)
479 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
480 && (TREE_CODE (type
) == BOOLEAN_TYPE
481 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
484 if (dump_enabled_p ())
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
486 "Build SLP failed: invalid type of def "
487 "for variable-length SLP %T\n", oprnd
);
491 /* For the swapping logic below force vect_reduction_def
492 for the reduction op in a SLP reduction group. */
493 if (!STMT_VINFO_DATA_REF (stmt_info
)
494 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
495 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
497 dts
[i
] = dt
= vect_reduction_def
;
499 /* Check the types of the definition. */
502 case vect_external_def
:
503 case vect_constant_def
:
504 case vect_internal_def
:
505 case vect_reduction_def
:
506 case vect_induction_def
:
510 /* FORNOW: Not supported. */
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "Build SLP failed: illegal type of def %T\n",
518 oprnd_info
->first_dt
= dt
;
519 oprnd_info
->first_op_type
= type
;
525 /* Now match the operand definition types to that of the first stmt. */
526 for (i
= 0; i
< number_of_oprnds
;)
528 oprnd_info
= (*oprnds_info
)[i
];
530 stmt_vec_info def_stmt_info
= oprnd_info
->def_stmts
[stmt_num
];
531 oprnd
= oprnd_info
->ops
[stmt_num
];
532 tree type
= TREE_TYPE (oprnd
);
534 if (!types_compatible_p (oprnd_info
->first_op_type
, type
))
536 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
538 "Build SLP failed: different operand types\n");
542 /* Not first stmt of the group, check that the def-stmt/s match
543 the def-stmt/s of the first stmt. Allow different definition
544 types for reduction chains: the first stmt must be a
545 vect_reduction_def (a phi node), and the rest
546 end in the reduction chain. */
547 if ((!vect_def_types_match (oprnd_info
->first_dt
, dt
)
548 && !(oprnd_info
->first_dt
== vect_reduction_def
549 && !STMT_VINFO_DATA_REF (stmt_info
)
550 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
552 && !STMT_VINFO_DATA_REF (def_stmt_info
)
553 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
554 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
))))
555 || (!STMT_VINFO_DATA_REF (stmt_info
)
556 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
558 || STMT_VINFO_DATA_REF (def_stmt_info
)
559 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
560 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
561 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
563 /* Try swapping operands if we got a mismatch. For BB
564 vectorization only in case it will clearly improve things. */
565 if (i
== commutative_op
&& !swapped
566 && (!is_a
<bb_vec_info
> (vinfo
)
567 || (!vect_def_types_match ((*oprnds_info
)[i
+1]->first_dt
,
569 && (vect_def_types_match (oprnd_info
->first_dt
, dts
[i
+1])
570 || vect_def_types_match
571 ((*oprnds_info
)[i
+1]->first_dt
, dts
[i
])))))
573 if (dump_enabled_p ())
574 dump_printf_loc (MSG_NOTE
, vect_location
,
575 "trying swapped operands\n");
576 std::swap (dts
[i
], dts
[i
+1]);
577 std::swap ((*oprnds_info
)[i
]->def_stmts
[stmt_num
],
578 (*oprnds_info
)[i
+1]->def_stmts
[stmt_num
]);
579 std::swap ((*oprnds_info
)[i
]->ops
[stmt_num
],
580 (*oprnds_info
)[i
+1]->ops
[stmt_num
]);
585 if (is_a
<bb_vec_info
> (vinfo
)
586 && !oprnd_info
->any_pattern
)
588 /* Now for commutative ops we should see whether we can
589 make the other operand matching. */
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
592 "treating operand as external\n");
593 oprnd_info
->first_dt
= dt
= vect_external_def
;
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
599 "Build SLP failed: different types\n");
604 /* Make sure to demote the overall operand to external. */
605 if (dt
== vect_external_def
)
606 oprnd_info
->first_dt
= vect_external_def
;
607 /* For a SLP reduction chain we want to duplicate the reduction to
608 each of the chain members. That gets us a sane SLP graph (still
609 the stmts are not 100% correct wrt the initial values). */
610 else if ((dt
== vect_internal_def
611 || dt
== vect_reduction_def
)
612 && oprnd_info
->first_dt
== vect_reduction_def
613 && !STMT_VINFO_DATA_REF (stmt_info
)
614 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
615 && !STMT_VINFO_DATA_REF (def_stmt_info
)
616 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
617 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
619 oprnd_info
->def_stmts
[stmt_num
] = oprnd_info
->def_stmts
[0];
620 oprnd_info
->ops
[stmt_num
] = oprnd_info
->ops
[0];
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
,
631 "swapped operands to match def types in %G",
638 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
639 Return true if we can, meaning that this choice doesn't conflict with
640 existing SLP nodes that use STMT_INFO. */
643 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
645 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
647 return useless_type_conversion_p (vectype
, old_vectype
);
649 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
651 /* We maintain the invariant that if any statement in the group is
652 used, all other members of the group have the same vector type. */
653 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
654 stmt_vec_info member_info
= first_info
;
655 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
656 if (is_pattern_stmt_p (member_info
)
657 && !useless_type_conversion_p (vectype
,
658 STMT_VINFO_VECTYPE (member_info
)))
663 for (member_info
= first_info
; member_info
;
664 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
665 STMT_VINFO_VECTYPE (member_info
) = vectype
;
669 else if (!is_pattern_stmt_p (stmt_info
))
671 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
678 "Build SLP failed: incompatible vector"
679 " types for: %G", stmt_info
->stmt
);
680 dump_printf_loc (MSG_NOTE
, vect_location
,
681 " old vector type: %T\n", old_vectype
);
682 dump_printf_loc (MSG_NOTE
, vect_location
,
683 " new vector type: %T\n", vectype
);
688 /* Return true if call statements CALL1 and CALL2 are similar enough
689 to be combined into the same SLP group. */
692 compatible_calls_p (gcall
*call1
, gcall
*call2
)
694 unsigned int nargs
= gimple_call_num_args (call1
);
695 if (nargs
!= gimple_call_num_args (call2
))
698 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
701 if (gimple_call_internal_p (call1
))
703 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
704 TREE_TYPE (gimple_call_lhs (call2
))))
706 for (unsigned int i
= 0; i
< nargs
; ++i
)
707 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
708 TREE_TYPE (gimple_call_arg (call2
, i
))))
713 if (!operand_equal_p (gimple_call_fn (call1
),
714 gimple_call_fn (call2
), 0))
717 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
723 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
724 caller's attempt to find the vector type in STMT_INFO with the narrowest
725 element type. Return true if VECTYPE is nonnull and if it is valid
726 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
727 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
728 vect_build_slp_tree. */
731 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
732 unsigned int group_size
,
733 tree vectype
, poly_uint64
*max_nunits
)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "Build SLP failed: unsupported data-type in %G\n",
741 /* Fatal mismatch. */
745 /* If populating the vector type requires unrolling then fail
746 before adjusting *max_nunits for basic-block vectorization. */
747 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
748 unsigned HOST_WIDE_INT const_nunits
;
749 if (is_a
<bb_vec_info
> (vinfo
)
750 && (!nunits
.is_constant (&const_nunits
)
751 || const_nunits
> group_size
))
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
755 "Build SLP failed: unrolling required "
756 "in basic block SLP\n");
757 /* Fatal mismatch. */
761 /* In case of multiple types we need to detect the smallest type. */
762 vect_update_max_nunits (max_nunits
, vectype
);
766 /* Verify if the scalar stmts STMTS are isomorphic, require data
767 permutation or are of unsupported types of operation. Return
768 true if they are, otherwise return false and indicate in *MATCHES
769 which stmts are not isomorphic to the first one. If MATCHES[0]
770 is false then this indicates the comparison could not be
771 carried out or the stmts will never be vectorized by SLP.
773 Note COND_EXPR is possibly isomorphic to another one after swapping its
774 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
775 the first stmt by swapping the two operands of comparison; set SWAP[i]
776 to 2 if stmt I is isormorphic to the first stmt by inverting the code
777 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
778 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
781 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
782 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
783 poly_uint64
*max_nunits
, bool *matches
,
784 bool *two_operators
, tree
*node_vectype
)
787 stmt_vec_info first_stmt_info
= stmts
[0];
788 enum tree_code first_stmt_code
= ERROR_MARK
;
789 enum tree_code alt_stmt_code
= ERROR_MARK
;
790 enum tree_code rhs_code
= ERROR_MARK
;
791 enum tree_code first_cond_code
= ERROR_MARK
;
793 bool need_same_oprnds
= false;
794 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
797 machine_mode optab_op2_mode
;
798 machine_mode vec_mode
;
799 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
800 bool first_stmt_load_p
= false, load_p
= false;
802 /* For every stmt in NODE find its def stmt/s. */
803 stmt_vec_info stmt_info
;
804 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
806 gimple
*stmt
= stmt_info
->stmt
;
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
813 /* Fail to vectorize statements marked as unvectorizable, throw
815 if (!STMT_VINFO_VECTORIZABLE (stmt_info
)
816 || stmt_can_throw_internal (cfun
, stmt
)
817 || gimple_has_volatile_ops (stmt
))
819 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
821 "Build SLP failed: unvectorizable statement %G",
823 /* ??? For BB vectorization we want to commutate operands in a way
824 to shuffle all unvectorizable defs into one operand and have
825 the other still vectorized. The following doesn't reliably
826 work for this though but it's the easiest we can do here. */
827 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
829 /* Fatal mismatch. */
834 lhs
= gimple_get_lhs (stmt
);
835 if (lhs
== NULL_TREE
)
837 if (dump_enabled_p ())
838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
839 "Build SLP failed: not GIMPLE_ASSIGN nor "
840 "GIMPLE_CALL %G", stmt
);
841 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
843 /* Fatal mismatch. */
849 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
850 &nunits_vectype
, group_size
)
852 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
853 nunits_vectype
, max_nunits
)))
855 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
857 /* Fatal mismatch. */
862 gcc_assert (vectype
);
864 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
867 rhs_code
= CALL_EXPR
;
869 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
871 else if ((gimple_call_internal_p (call_stmt
)
872 && (!vectorizable_internal_fn_p
873 (gimple_call_internal_fn (call_stmt
))))
874 || gimple_call_tail_p (call_stmt
)
875 || gimple_call_noreturn_p (call_stmt
)
876 || !gimple_call_nothrow_p (call_stmt
)
877 || gimple_call_chain (call_stmt
))
879 if (dump_enabled_p ())
880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
881 "Build SLP failed: unsupported call type %G",
883 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
885 /* Fatal mismatch. */
892 rhs_code
= gimple_assign_rhs_code (stmt
);
893 load_p
= gimple_vuse (stmt
);
896 /* Check the operation. */
899 *node_vectype
= vectype
;
900 first_stmt_code
= rhs_code
;
901 first_stmt_load_p
= load_p
;
903 /* Shift arguments should be equal in all the packed stmts for a
904 vector shift with scalar shift operand. */
905 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
906 || rhs_code
== LROTATE_EXPR
907 || rhs_code
== RROTATE_EXPR
)
909 vec_mode
= TYPE_MODE (vectype
);
911 /* First see if we have a vector/vector shift. */
912 optab
= optab_for_tree_code (rhs_code
, vectype
,
916 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
918 /* No vector/vector shift, try for a vector/scalar shift. */
919 optab
= optab_for_tree_code (rhs_code
, vectype
,
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
926 "Build SLP failed: no optab.\n");
927 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
929 /* Fatal mismatch. */
933 icode
= (int) optab_handler (optab
, vec_mode
);
934 if (icode
== CODE_FOR_nothing
)
936 if (dump_enabled_p ())
937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
939 "op not supported by target.\n");
940 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
942 /* Fatal mismatch. */
946 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
947 if (!VECTOR_MODE_P (optab_op2_mode
))
949 need_same_oprnds
= true;
950 first_op1
= gimple_assign_rhs2 (stmt
);
954 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
956 need_same_oprnds
= true;
957 first_op1
= gimple_assign_rhs2 (stmt
);
960 && rhs_code
== BIT_FIELD_REF
)
962 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0);
963 if (TREE_CODE (vec
) != SSA_NAME
964 || !types_compatible_p (vectype
, TREE_TYPE (vec
)))
966 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
968 if (dump_enabled_p ())
969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
971 "BIT_FIELD_REF not supported\n");
972 /* Fatal mismatch. */
978 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
980 need_same_oprnds
= true;
981 first_op1
= gimple_call_arg (call_stmt
, 1);
986 if (first_stmt_code
!= rhs_code
987 && alt_stmt_code
== ERROR_MARK
)
988 alt_stmt_code
= rhs_code
;
989 if ((first_stmt_code
!= rhs_code
990 && (first_stmt_code
!= IMAGPART_EXPR
991 || rhs_code
!= REALPART_EXPR
)
992 && (first_stmt_code
!= REALPART_EXPR
993 || rhs_code
!= IMAGPART_EXPR
)
994 /* Handle mismatches in plus/minus by computing both
995 and merging the results. */
996 && !((first_stmt_code
== PLUS_EXPR
997 || first_stmt_code
== MINUS_EXPR
)
998 && (alt_stmt_code
== PLUS_EXPR
999 || alt_stmt_code
== MINUS_EXPR
)
1000 && rhs_code
== alt_stmt_code
)
1001 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1002 && (first_stmt_code
== ARRAY_REF
1003 || first_stmt_code
== BIT_FIELD_REF
1004 || first_stmt_code
== INDIRECT_REF
1005 || first_stmt_code
== COMPONENT_REF
1006 || first_stmt_code
== MEM_REF
)))
1007 || first_stmt_load_p
!= load_p
)
1009 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1012 "Build SLP failed: different operation "
1013 "in stmt %G", stmt
);
1014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1015 "original stmt %G", first_stmt_info
->stmt
);
1021 if (need_same_oprnds
)
1023 tree other_op1
= (call_stmt
1024 ? gimple_call_arg (call_stmt
, 1)
1025 : gimple_assign_rhs2 (stmt
));
1026 if (!operand_equal_p (first_op1
, other_op1
, 0))
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1030 "Build SLP failed: different shift "
1031 "arguments in %G", stmt
);
1037 && first_stmt_code
== BIT_FIELD_REF
1038 && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info
->stmt
), 0)
1039 != TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0)))
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1043 "Build SLP failed: different BIT_FIELD_REF "
1044 "arguments in %G", stmt
);
1049 if (!load_p
&& rhs_code
== CALL_EXPR
)
1051 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
1052 as_a
<gcall
*> (stmt
)))
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1056 "Build SLP failed: different calls in %G",
1063 if (!types_compatible_p (vectype
, *node_vectype
))
1065 if (dump_enabled_p ())
1066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1067 "Build SLP failed: different vector type "
1074 /* Grouped store or load. */
1075 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1077 if (REFERENCE_CLASS_P (lhs
))
1085 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1086 if (prev_first_load
)
1088 /* Check that there are no loads from different interleaving
1089 chains in the same node. */
1090 if (prev_first_load
!= first_load
)
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1095 "Build SLP failed: different "
1096 "interleaving chains in one node %G",
1103 prev_first_load
= first_load
;
1105 } /* Grouped access. */
1110 /* Not grouped load. */
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1113 "Build SLP failed: not grouped load %G", stmt
);
1115 /* FORNOW: Not grouped loads are not supported. */
1116 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1118 /* Fatal mismatch. */
1123 /* Not memory operation. */
1124 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1125 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1126 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1127 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1128 && rhs_code
!= VIEW_CONVERT_EXPR
1129 && rhs_code
!= CALL_EXPR
1130 && rhs_code
!= BIT_FIELD_REF
)
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1134 "Build SLP failed: operation unsupported %G",
1136 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1138 /* Fatal mismatch. */
1143 if (rhs_code
== COND_EXPR
)
1145 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1146 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1147 enum tree_code swap_code
= ERROR_MARK
;
1148 enum tree_code invert_code
= ERROR_MARK
;
1151 first_cond_code
= TREE_CODE (cond_expr
);
1152 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1154 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1155 swap_code
= swap_tree_comparison (cond_code
);
1156 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1159 if (first_cond_code
== cond_code
)
1161 /* Isomorphic can be achieved by swapping. */
1162 else if (first_cond_code
== swap_code
)
1164 /* Isomorphic can be achieved by inverting. */
1165 else if (first_cond_code
== invert_code
)
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1171 "Build SLP failed: different"
1172 " operation %G", stmt
);
1182 for (i
= 0; i
< group_size
; ++i
)
1186 /* If we allowed a two-operation SLP node verify the target can cope
1187 with the permute we are going to use. */
1188 if (alt_stmt_code
!= ERROR_MARK
1189 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1191 *two_operators
= true;
1197 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1198 Note we never remove apart from at destruction time so we do not
1199 need a special value for deleted that differs from empty. */
1202 typedef vec
<stmt_vec_info
> value_type
;
1203 typedef vec
<stmt_vec_info
> compare_type
;
1204 static inline hashval_t
hash (value_type
);
1205 static inline bool equal (value_type existing
, value_type candidate
);
1206 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1207 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1208 static const bool empty_zero_p
= true;
1209 static inline void mark_empty (value_type
&x
) { x
.release (); }
1210 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1211 static inline void remove (value_type
&x
) { x
.release (); }
1214 bst_traits::hash (value_type x
)
1217 for (unsigned i
= 0; i
< x
.length (); ++i
)
1218 h
.add_int (gimple_uid (x
[i
]->stmt
));
1222 bst_traits::equal (value_type existing
, value_type candidate
)
1224 if (existing
.length () != candidate
.length ())
1226 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1227 if (existing
[i
] != candidate
[i
])
1232 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1233 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1234 scalar_stmts_to_slp_tree_map_t
;
1237 vect_build_slp_tree_2 (vec_info
*vinfo
,
1238 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1239 poly_uint64
*max_nunits
,
1240 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1241 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1244 vect_build_slp_tree (vec_info
*vinfo
,
1245 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1246 poly_uint64
*max_nunits
,
1247 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1248 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1250 if (slp_tree
*leader
= bst_map
->get (stmts
))
1252 if (dump_enabled_p ())
1253 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1254 *leader
? "" : "failed ", *leader
);
1257 SLP_TREE_REF_COUNT (*leader
)++;
1258 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1262 poly_uint64 this_max_nunits
= 1;
1263 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1265 matches
, npermutes
, tree_size
, bst_map
);
1268 res
->max_nunits
= this_max_nunits
;
1269 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1270 /* Keep a reference for the bst_map use. */
1271 SLP_TREE_REF_COUNT (res
)++;
1273 bst_map
->put (stmts
.copy (), res
);
1277 /* Recursively build an SLP tree starting from NODE.
1278 Fail (and return a value not equal to zero) if def-stmts are not
1279 isomorphic, require data permutation or are of unsupported types of
1280 operation. Otherwise, return 0.
1281 The value returned is the depth in the SLP tree where a mismatch
1285 vect_build_slp_tree_2 (vec_info
*vinfo
,
1286 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1287 poly_uint64
*max_nunits
,
1288 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1289 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1291 unsigned nops
, i
, this_tree_size
= 0;
1292 poly_uint64 this_max_nunits
= *max_nunits
;
1297 stmt_vec_info stmt_info
= stmts
[0];
1298 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1299 nops
= gimple_call_num_args (stmt
);
1300 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1302 nops
= gimple_num_ops (stmt
) - 1;
1303 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1306 else if (gphi
*phi
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1307 nops
= gimple_phi_num_args (phi
);
1311 /* If the SLP node is a PHI (induction or reduction), terminate
1313 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1315 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1316 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
1318 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1322 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1323 /* Induction from different IVs is not supported. */
1324 if (def_type
== vect_induction_def
)
1326 stmt_vec_info other_info
;
1327 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1328 if (stmt_info
!= other_info
)
1331 else if (def_type
== vect_reduction_def
1332 || def_type
== vect_double_reduction_def
1333 || def_type
== vect_nested_cycle
)
1335 /* Else def types have to match. */
1336 stmt_vec_info other_info
;
1337 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1338 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1344 node
= vect_create_new_slp_node (stmts
, nops
);
1345 SLP_TREE_VECTYPE (node
) = vectype
;
1350 bool two_operators
= false;
1351 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1352 tree vectype
= NULL_TREE
;
1353 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1354 &this_max_nunits
, matches
, &two_operators
,
1358 /* If the SLP node is a load, terminate the recursion unless masked. */
1359 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1360 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1362 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1365 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1370 *max_nunits
= this_max_nunits
;
1372 node
= vect_create_new_slp_node (stmts
, 0);
1373 SLP_TREE_VECTYPE (node
) = vectype
;
1374 /* And compute the load permutation. Whether it is actually
1375 a permutation depends on the unrolling factor which is
1377 vec
<unsigned> load_permutation
;
1379 stmt_vec_info load_info
;
1380 load_permutation
.create (group_size
);
1381 stmt_vec_info first_stmt_info
1382 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1383 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1385 int load_place
= vect_get_place_in_interleaving_chain
1386 (load_info
, first_stmt_info
);
1387 gcc_assert (load_place
!= -1);
1388 load_permutation
.safe_push (load_place
);
1390 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1394 else if (gimple_assign_single_p (stmt_info
->stmt
)
1395 && !gimple_vuse (stmt_info
->stmt
)
1396 && gimple_assign_rhs_code (stmt_info
->stmt
) == BIT_FIELD_REF
)
1398 /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference
1399 the same SSA name vector of a compatible type to vectype. */
1400 vec
<std::pair
<unsigned, unsigned> > lperm
= vNULL
;
1401 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0);
1402 stmt_vec_info estmt_info
;
1403 FOR_EACH_VEC_ELT (stmts
, i
, estmt_info
)
1405 gassign
*estmt
= as_a
<gassign
*> (estmt_info
->stmt
);
1406 tree bfref
= gimple_assign_rhs1 (estmt
);
1408 if (!known_eq (bit_field_size (bfref
),
1409 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype
))))
1410 || !constant_multiple_p (bit_field_offset (bfref
),
1411 bit_field_size (bfref
), &lane
))
1416 lperm
.safe_push (std::make_pair (0, (unsigned)lane
));
1418 slp_tree vnode
= vect_create_new_slp_node (vNULL
);
1419 SLP_TREE_VECTYPE (vnode
) = TREE_TYPE (vec
);
1420 SLP_TREE_VEC_DEFS (vnode
).safe_push (vec
);
1421 /* We are always building a permutation node even if it is an identity
1422 permute to shield the rest of the vectorizer from the odd node
1423 representing an actual vector without any scalar ops.
1424 ??? We could hide it completely with making the permute node
1426 node
= vect_create_new_slp_node (stmts
, 1);
1427 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1428 SLP_TREE_LANE_PERMUTATION (node
) = lperm
;
1429 SLP_TREE_VECTYPE (node
) = vectype
;
1430 SLP_TREE_CHILDREN (node
).quick_push (vnode
);
1434 /* Get at the operands, verifying they are compatible. */
1435 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1436 slp_oprnd_info oprnd_info
;
1437 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1439 int res
= vect_get_and_check_slp_defs (vinfo
, swap
[i
],
1440 stmts
, i
, &oprnds_info
);
1442 matches
[(res
== -1) ? 0 : i
] = false;
1446 for (i
= 0; i
< group_size
; ++i
)
1449 vect_free_oprnd_info (oprnds_info
);
1454 auto_vec
<slp_tree
, 4> children
;
1456 stmt_info
= stmts
[0];
1458 /* Create SLP_TREE nodes for the definition node/s. */
1459 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1464 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1466 /* COND_EXPR have one too many eventually if the condition
1468 gcc_assert (i
== 3 && nops
== 4);
1472 if (oprnd_info
->first_dt
!= vect_internal_def
1473 && oprnd_info
->first_dt
!= vect_reduction_def
1474 && oprnd_info
->first_dt
!= vect_induction_def
)
1476 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1477 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1478 oprnd_info
->ops
= vNULL
;
1479 children
.safe_push (invnode
);
1483 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1484 group_size
, &this_max_nunits
,
1486 &this_tree_size
, bst_map
)) != NULL
)
1488 oprnd_info
->def_stmts
= vNULL
;
1489 children
.safe_push (child
);
1493 /* If the SLP build for operand zero failed and operand zero
1494 and one can be commutated try that for the scalar stmts
1495 that failed the match. */
1497 /* A first scalar stmt mismatch signals a fatal mismatch. */
1499 /* ??? For COND_EXPRs we can swap the comparison operands
1500 as well as the arms under some constraints. */
1502 && oprnds_info
[1]->first_dt
== vect_internal_def
1503 && is_gimple_assign (stmt_info
->stmt
)
1504 /* Swapping operands for reductions breaks assumptions later on. */
1505 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1506 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1507 /* Do so only if the number of not successful permutes was nor more
1508 than a cut-ff as re-trying the recursive match on
1509 possibly each level of the tree would expose exponential
1513 /* See whether we can swap the matching or the non-matching
1515 bool swap_not_matching
= true;
1518 for (j
= 0; j
< group_size
; ++j
)
1520 if (matches
[j
] != !swap_not_matching
)
1522 stmt_vec_info stmt_info
= stmts
[j
];
1523 /* Verify if we can swap operands of this stmt. */
1524 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1526 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1528 if (!swap_not_matching
)
1530 swap_not_matching
= false;
1535 while (j
!= group_size
);
1537 /* Swap mismatched definition stmts. */
1538 if (dump_enabled_p ())
1539 dump_printf_loc (MSG_NOTE
, vect_location
,
1540 "Re-trying with swapped operands of stmts ");
1541 for (j
= 0; j
< group_size
; ++j
)
1542 if (matches
[j
] == !swap_not_matching
)
1544 std::swap (oprnds_info
[0]->def_stmts
[j
],
1545 oprnds_info
[1]->def_stmts
[j
]);
1546 std::swap (oprnds_info
[0]->ops
[j
],
1547 oprnds_info
[1]->ops
[j
]);
1548 if (dump_enabled_p ())
1549 dump_printf (MSG_NOTE
, "%d ", j
);
1551 if (dump_enabled_p ())
1552 dump_printf (MSG_NOTE
, "\n");
1553 /* And try again with scratch 'matches' ... */
1554 bool *tem
= XALLOCAVEC (bool, group_size
);
1555 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1556 group_size
, &this_max_nunits
,
1558 &this_tree_size
, bst_map
)) != NULL
)
1560 oprnd_info
->def_stmts
= vNULL
;
1561 children
.safe_push (child
);
1564 /* We do not undo the swapping here since it might still be
1565 the better order for the second operand in case we build
1566 the first one from scalars below. */
1571 /* If the SLP build failed and we analyze a basic-block
1572 simply treat nodes we fail to build as externally defined
1573 (and thus build vectors from the scalar defs).
1574 The cost model will reject outright expensive cases.
1575 ??? This doesn't treat cases where permutation ultimatively
1576 fails (or we don't try permutation below). Ideally we'd
1577 even compute a permutation that will end up with the maximum
1579 if (is_a
<bb_vec_info
> (vinfo
)
1580 /* ??? Rejecting patterns this way doesn't work. We'd have to
1581 do extra work to cancel the pattern so the uses see the
1583 && !is_pattern_stmt_p (stmt_info
)
1584 && !oprnd_info
->any_pattern
)
1586 /* But if there's a leading vector sized set of matching stmts
1587 fail here so we can split the group. This matches the condition
1588 vect_analyze_slp_instance uses. */
1589 /* ??? We might want to split here and combine the results to support
1590 multiple vector sizes better. */
1591 for (j
= 0; j
< group_size
; ++j
)
1594 if (!known_ge (j
, TYPE_VECTOR_SUBPARTS (vectype
)))
1596 if (dump_enabled_p ())
1597 dump_printf_loc (MSG_NOTE
, vect_location
,
1598 "Building vector operands from scalars\n");
1600 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1601 children
.safe_push (child
);
1602 oprnd_info
->ops
= vNULL
;
1603 oprnd_info
->def_stmts
= vNULL
;
1608 gcc_assert (child
== NULL
);
1609 FOR_EACH_VEC_ELT (children
, j
, child
)
1610 vect_free_slp_tree (child
);
1611 vect_free_oprnd_info (oprnds_info
);
1615 vect_free_oprnd_info (oprnds_info
);
1617 /* If we have all children of a child built up from uniform scalars
1618 or does more than one possibly expensive vector construction then
1619 just throw that away, causing it built up from scalars.
1620 The exception is the SLP node for the vector store. */
1621 if (is_a
<bb_vec_info
> (vinfo
)
1622 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1623 /* ??? Rejecting patterns this way doesn't work. We'd have to
1624 do extra work to cancel the pattern so the uses see the
1626 && !is_pattern_stmt_p (stmt_info
))
1630 bool all_uniform_p
= true;
1631 unsigned n_vector_builds
= 0;
1632 FOR_EACH_VEC_ELT (children
, j
, child
)
1634 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1635 all_uniform_p
= false;
1636 else if (!vect_slp_tree_uniform_p (child
))
1638 all_uniform_p
= false;
1639 if (SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
1643 if (all_uniform_p
|| n_vector_builds
> 1)
1647 FOR_EACH_VEC_ELT (children
, j
, child
)
1648 vect_free_slp_tree (child
);
1650 if (dump_enabled_p ())
1651 dump_printf_loc (MSG_NOTE
, vect_location
,
1652 "Building parent vector operands from "
1653 "scalars instead\n");
1658 *tree_size
+= this_tree_size
+ 1;
1659 *max_nunits
= this_max_nunits
;
1663 /* ??? We'd likely want to either cache in bst_map sth like
1664 { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or
1665 the true { a+b, a+b, a+b, a+b } ... but there we don't have
1666 explicit stmts to put in so the keying on 'stmts' doesn't
1667 work (but we have the same issue with nodes that use 'ops'). */
1668 slp_tree one
= new _slp_tree
;
1669 slp_tree two
= new _slp_tree
;
1670 SLP_TREE_DEF_TYPE (one
) = vect_internal_def
;
1671 SLP_TREE_DEF_TYPE (two
) = vect_internal_def
;
1672 SLP_TREE_VECTYPE (one
) = vectype
;
1673 SLP_TREE_VECTYPE (two
) = vectype
;
1674 SLP_TREE_CHILDREN (one
).safe_splice (children
);
1675 SLP_TREE_CHILDREN (two
).safe_splice (children
);
1677 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two
), i
, child
)
1678 SLP_TREE_REF_COUNT (child
)++;
1680 /* Here we record the original defs since this
1681 node represents the final lane configuration. */
1682 node
= vect_create_new_slp_node (stmts
, 2);
1683 SLP_TREE_VECTYPE (node
) = vectype
;
1684 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1685 SLP_TREE_CHILDREN (node
).quick_push (one
);
1686 SLP_TREE_CHILDREN (node
).quick_push (two
);
1687 gassign
*stmt
= as_a
<gassign
*> (stmts
[0]->stmt
);
1688 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
1689 enum tree_code ocode
= ERROR_MARK
;
1690 stmt_vec_info ostmt_info
;
1692 FOR_EACH_VEC_ELT (stmts
, i
, ostmt_info
)
1694 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
1695 if (gimple_assign_rhs_code (ostmt
) != code0
)
1697 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (1, i
));
1698 ocode
= gimple_assign_rhs_code (ostmt
);
1702 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (0, i
));
1704 SLP_TREE_CODE (one
) = code0
;
1705 SLP_TREE_CODE (two
) = ocode
;
1706 SLP_TREE_LANES (one
) = stmts
.length ();
1707 SLP_TREE_LANES (two
) = stmts
.length ();
1708 SLP_TREE_REPRESENTATIVE (one
) = stmts
[0];
1709 SLP_TREE_REPRESENTATIVE (two
) = stmts
[j
];
1713 node
= vect_create_new_slp_node (stmts
, nops
);
1714 SLP_TREE_VECTYPE (node
) = vectype
;
1715 SLP_TREE_CHILDREN (node
).splice (children
);
1719 /* Dump a single SLP tree NODE. */
1722 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1727 stmt_vec_info stmt_info
;
1730 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1731 dump_user_location_t user_loc
= loc
.get_user_location ();
1732 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1733 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1735 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1738 estimated_poly_value (node
->max_nunits
),
1739 SLP_TREE_REF_COUNT (node
));
1740 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1741 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1742 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1745 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1746 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1747 dump_printf (metadata
, "%T%s ", op
,
1748 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1749 dump_printf (metadata
, "}\n");
1751 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1753 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1754 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1755 dump_printf (dump_kind
, " %u", j
);
1756 dump_printf (dump_kind
, " }\n");
1758 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1760 dump_printf_loc (metadata
, user_loc
, "\tlane permutation {");
1761 for (i
= 0; i
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++i
)
1762 dump_printf (dump_kind
, " %u[%u]",
1763 SLP_TREE_LANE_PERMUTATION (node
)[i
].first
,
1764 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
);
1765 dump_printf (dump_kind
, " }\n");
1767 if (SLP_TREE_CHILDREN (node
).is_empty ())
1769 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1770 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1771 dump_printf (dump_kind
, " %p", (void *)child
);
1772 dump_printf (dump_kind
, "\n");
1776 debug (slp_tree node
)
1778 debug_dump_context ctx
;
1779 vect_print_slp_tree (MSG_NOTE
,
1780 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1784 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1787 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1788 slp_tree node
, hash_set
<slp_tree
> &visited
)
1793 if (visited
.add (node
))
1796 vect_print_slp_tree (dump_kind
, loc
, node
);
1798 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1799 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1803 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1806 hash_set
<slp_tree
> visited
;
1807 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1810 /* Mark the tree rooted at NODE with PURE_SLP. */
1813 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1816 stmt_vec_info stmt_info
;
1819 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1822 if (visited
.add (node
))
1825 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1826 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1828 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1829 vect_mark_slp_stmts (child
, visited
);
1833 vect_mark_slp_stmts (slp_tree node
)
1835 hash_set
<slp_tree
> visited
;
1836 vect_mark_slp_stmts (node
, visited
);
1839 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1842 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1845 stmt_vec_info stmt_info
;
1848 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1851 if (visited
.add (node
))
1854 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1856 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1857 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1858 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1861 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1862 vect_mark_slp_stmts_relevant (child
, visited
);
1866 vect_mark_slp_stmts_relevant (slp_tree node
)
1868 hash_set
<slp_tree
> visited
;
1869 vect_mark_slp_stmts_relevant (node
, visited
);
1873 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1876 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1877 hash_set
<slp_tree
> &visited
)
1879 if (visited
.add (node
))
1882 if (SLP_TREE_CHILDREN (node
).length () == 0)
1884 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1886 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1887 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1888 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1889 loads
.safe_push (node
);
1895 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1896 vect_gather_slp_loads (loads
, child
, visited
);
1901 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1903 hash_set
<slp_tree
> visited
;
1904 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1908 /* Find the last store in SLP INSTANCE. */
1911 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1913 stmt_vec_info last
= NULL
;
1914 stmt_vec_info stmt_vinfo
;
1916 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1918 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1919 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1925 /* Find the first stmt in NODE. */
1928 vect_find_first_scalar_stmt_in_slp (slp_tree node
)
1930 stmt_vec_info first
= NULL
;
1931 stmt_vec_info stmt_vinfo
;
1933 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1935 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1937 || get_later_stmt (stmt_vinfo
, first
) == first
)
1944 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1945 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1946 (also containing the first GROUP1_SIZE stmts, since stores are
1947 consecutive), the second containing the remainder.
1948 Return the first stmt in the second group. */
1950 static stmt_vec_info
1951 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1953 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1954 gcc_assert (group1_size
> 0);
1955 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1956 gcc_assert (group2_size
> 0);
1957 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1959 stmt_vec_info stmt_info
= first_vinfo
;
1960 for (unsigned i
= group1_size
; i
> 1; i
--)
1962 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1963 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1965 /* STMT is now the last element of the first group. */
1966 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1967 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1969 DR_GROUP_SIZE (group2
) = group2_size
;
1970 for (stmt_info
= group2
; stmt_info
;
1971 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1973 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1974 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1977 /* For the second group, the DR_GROUP_GAP is that before the original group,
1978 plus skipping over the first vector. */
1979 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1981 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1982 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1986 group1_size
, group2_size
);
1991 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1992 statements and a vector of NUNITS elements. */
1995 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1997 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2000 /* Analyze an SLP instance starting from a group of grouped stores. Call
2001 vect_build_slp_tree to build a tree of packed stmts if possible.
2002 Return FALSE if it's impossible to SLP any stmt in the loop. */
2005 vect_analyze_slp_instance (vec_info
*vinfo
,
2006 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2007 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2009 slp_instance new_instance
;
2011 unsigned int group_size
;
2013 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2014 vec
<stmt_vec_info
> scalar_stmts
;
2015 bool constructor
= false;
2017 if (is_a
<bb_vec_info
> (vinfo
))
2018 vect_location
= stmt_info
->stmt
;
2019 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2021 group_size
= DR_GROUP_SIZE (stmt_info
);
2023 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2025 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2026 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2028 else if (is_gimple_assign (stmt_info
->stmt
)
2029 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2031 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2036 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2037 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2040 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2041 scalar_stmts
.create (group_size
);
2042 stmt_vec_info next_info
= stmt_info
;
2043 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2045 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2048 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2049 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2052 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2054 /* Collect the reduction stmts and store them in
2055 SLP_TREE_SCALAR_STMTS. */
2058 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2059 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2061 /* Mark the first element of the reduction chain as reduction to properly
2062 transform the node. In the reduction analysis phase only the last
2063 element of the chain is marked as reduction. */
2064 STMT_VINFO_DEF_TYPE (stmt_info
)
2065 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2066 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2067 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2069 else if (constructor
)
2071 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2073 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2075 if (TREE_CODE (val
) == SSA_NAME
)
2077 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2078 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2079 /* Value is defined in another basic block. */
2082 def_info
= vect_stmt_to_vectorize (def_info
);
2083 scalar_stmts
.safe_push (def_info
);
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_NOTE
, vect_location
,
2090 "Analyzing vectorizable constructor: %G\n",
2095 /* Collect reduction statements. */
2096 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2097 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2098 scalar_stmts
.safe_push (next_info
);
2101 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_NOTE
, vect_location
,
2104 "Starting SLP discovery for\n");
2105 for (i
= 0; i
< scalar_stmts
.length (); ++i
)
2106 dump_printf_loc (MSG_NOTE
, vect_location
,
2107 " %G", scalar_stmts
[i
]->stmt
);
2110 /* Build the tree for the SLP instance. */
2111 bool *matches
= XALLOCAVEC (bool, group_size
);
2112 unsigned npermutes
= 0;
2113 poly_uint64 max_nunits
= 1;
2114 unsigned tree_size
= 0;
2115 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2116 &max_nunits
, matches
, &npermutes
,
2117 &tree_size
, bst_map
);
2120 /* Calculate the unrolling factor based on the smallest type. */
2121 poly_uint64 unrolling_factor
2122 = calculate_unrolling_factor (max_nunits
, group_size
);
2124 if (maybe_ne (unrolling_factor
, 1U)
2125 && is_a
<bb_vec_info
> (vinfo
))
2127 unsigned HOST_WIDE_INT const_max_nunits
;
2128 if (!max_nunits
.is_constant (&const_max_nunits
)
2129 || const_max_nunits
> group_size
)
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2133 "Build SLP failed: store group "
2134 "size not a multiple of the vector size "
2135 "in basic block SLP\n");
2136 vect_free_slp_tree (node
);
2139 /* Fatal mismatch. */
2140 if (dump_enabled_p ())
2141 dump_printf_loc (MSG_NOTE
, vect_location
,
2142 "SLP discovery succeeded but node needs "
2144 memset (matches
, true, group_size
);
2145 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2146 vect_free_slp_tree (node
);
2150 /* Create a new SLP instance. */
2151 new_instance
= XNEW (class _slp_instance
);
2152 SLP_INSTANCE_TREE (new_instance
) = node
;
2153 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2154 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2155 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2156 new_instance
->reduc_phis
= NULL
;
2157 new_instance
->cost_vec
= vNULL
;
2158 new_instance
->subgraph_entries
= vNULL
;
2160 vect_gather_slp_loads (new_instance
, node
);
2161 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_NOTE
, vect_location
,
2163 "SLP size %u vs. limit %u.\n",
2164 tree_size
, max_tree_size
);
2166 /* Check whether any load is possibly permuted. */
2168 bool loads_permuted
= false;
2169 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2171 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2174 stmt_vec_info load_info
;
2175 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2176 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2178 loads_permuted
= true;
2183 /* If the loads and stores can be handled with load/store-lane
2184 instructions do not generate this SLP instance. */
2185 if (is_a
<loop_vec_info
> (vinfo
)
2188 && vect_store_lanes_supported
2189 (STMT_VINFO_VECTYPE (scalar_stmts
[0]), group_size
, false))
2192 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2194 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2195 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2196 /* Use SLP for strided accesses (or if we can't load-lanes). */
2197 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2198 || ! vect_load_lanes_supported
2199 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2200 DR_GROUP_SIZE (stmt_vinfo
), false))
2203 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2205 if (dump_enabled_p ())
2206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2207 "Built SLP cancelled: can use "
2208 "load/store-lanes\n");
2209 vect_free_slp_instance (new_instance
);
2214 /* If this is a reduction chain with a conversion in front
2215 amend the SLP tree with a node for that. */
2217 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2218 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2220 /* Get at the conversion stmt - we know it's the single use
2221 of the last stmt of the reduction chain. */
2222 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2223 use_operand_p use_p
;
2225 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2228 next_info
= vinfo
->lookup_stmt (use_stmt
);
2229 next_info
= vect_stmt_to_vectorize (next_info
);
2230 scalar_stmts
= vNULL
;
2231 scalar_stmts
.create (group_size
);
2232 for (unsigned i
= 0; i
< group_size
; ++i
)
2233 scalar_stmts
.quick_push (next_info
);
2234 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2235 SLP_TREE_VECTYPE (conv
) = STMT_VINFO_VECTYPE (next_info
);
2236 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2237 SLP_INSTANCE_TREE (new_instance
) = conv
;
2238 /* We also have to fake this conversion stmt as SLP reduction
2239 group so we don't have to mess with too much code
2241 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2242 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2245 vinfo
->slp_instances
.safe_push (new_instance
);
2247 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2248 the number of scalar stmts in the root in a few places.
2249 Verify that assumption holds. */
2250 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2251 .length () == group_size
);
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE
, vect_location
,
2256 "Final SLP tree for instance:\n");
2257 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2258 SLP_INSTANCE_TREE (new_instance
));
2266 /* Failed to SLP. */
2267 /* Free the allocated memory. */
2268 scalar_stmts
.release ();
2271 /* Try to break the group up into pieces. */
2272 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2273 && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
2275 for (i
= 0; i
< group_size
; i
++)
2279 /* For basic block SLP, try to break the group up into multiples of
2281 if (is_a
<bb_vec_info
> (vinfo
)
2282 && (i
> 1 && i
< group_size
))
2285 = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
2286 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
2288 unsigned HOST_WIDE_INT const_nunits
;
2290 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
))
2292 /* Split into two groups at the first vector boundary. */
2293 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2294 unsigned group1_size
= i
& ~(const_nunits
- 1);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE
, vect_location
,
2298 "Splitting SLP group at stmt %u\n", i
);
2299 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2301 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2303 /* If the first non-match was in the middle of a vector,
2304 skip the rest of that vector. Do not bother to re-analyze
2305 single stmt groups. */
2306 if (group1_size
< i
)
2308 i
= group1_size
+ const_nunits
;
2309 if (i
+ 1 < group_size
)
2310 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2312 if (i
+ 1 < group_size
)
2313 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2314 rest
, max_tree_size
);
2319 /* For loop vectorization split into arbitrary pieces of size > 1. */
2320 if (is_a
<loop_vec_info
> (vinfo
)
2321 && (i
> 1 && i
< group_size
))
2323 unsigned group1_size
= i
;
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_NOTE
, vect_location
,
2327 "Splitting SLP group at stmt %u\n", i
);
2329 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2331 /* Loop vectorization cannot handle gaps in stores, make sure
2332 the split group appears as strided. */
2333 STMT_VINFO_STRIDED_P (rest
) = 1;
2334 DR_GROUP_GAP (rest
) = 0;
2335 STMT_VINFO_STRIDED_P (stmt_info
) = 1;
2336 DR_GROUP_GAP (stmt_info
) = 0;
2338 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2340 if (i
+ 1 < group_size
)
2341 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2342 rest
, max_tree_size
);
2347 /* Even though the first vector did not all match, we might be able to SLP
2348 (some) of the remainder. FORNOW ignore this possibility. */
2351 /* Failed to SLP. */
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_NOTE
, vect_location
, "SLP discovery failed\n");
2357 /* Fill in backedge SLP children in the SLP graph. */
2360 vect_analyze_slp_backedges (vec_info
*vinfo
, slp_tree node
,
2361 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2362 hash_set
<slp_tree
> &visited
)
2364 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
2365 || visited
.add (node
))
2370 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2372 vect_analyze_slp_backedges (vinfo
, child
, bst_map
, visited
);
2374 /* Inductions are not vectorized by vectorizing their defining cycle
2375 but by materializing the values from SCEV data. */
2376 if (STMT_VINFO_DEF_TYPE (SLP_TREE_REPRESENTATIVE (node
))
2377 == vect_induction_def
)
2380 if (gphi
*phi
= dyn_cast
<gphi
*> (SLP_TREE_REPRESENTATIVE (node
)->stmt
))
2381 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
2383 auto_vec
<stmt_vec_info
, 64> stmts
;
2385 stmt_vec_info phi_info
;
2386 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, phi_info
)
2388 tree def
= gimple_phi_arg_def (as_a
<gphi
*>(phi_info
->stmt
), i
);
2389 stmt_vec_info def_info
= vinfo
->lookup_def (def
);
2392 stmts
.safe_push (vect_stmt_to_vectorize (def_info
));
2394 if (j
!= SLP_TREE_LANES (node
))
2396 slp_tree
*edge_def
= bst_map
->get (stmts
);
2399 /* ??? We're currently not recording non-backedge children
2400 of PHIs like external reduction initial values. Avoid
2401 NULL entries in SLP_TREE_CHILDREN for those and thus
2402 for now simply only record backedge defs at a
2403 SLP_TREE_CHILDREN index possibly not matching that of
2404 the corresponding PHI argument index. */
2405 SLP_TREE_CHILDREN (node
).quick_push (*edge_def
);
2406 (*edge_def
)->refcnt
++;
2411 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2412 trees of packed scalar stmts if SLP is possible. */
2415 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2418 stmt_vec_info first_element
;
2420 DUMP_VECT_SCOPE ("vect_analyze_slp");
2422 scalar_stmts_to_slp_tree_map_t
*bst_map
2423 = new scalar_stmts_to_slp_tree_map_t ();
2425 /* Find SLP sequences starting from groups of grouped stores. */
2426 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2427 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2429 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2431 if (loop_vinfo
->reduction_chains
.length () > 0)
2433 /* Find SLP sequences starting from reduction chains. */
2434 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2435 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2438 /* Dissolve reduction chain group. */
2439 stmt_vec_info vinfo
= first_element
;
2440 stmt_vec_info last
= NULL
;
2443 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2444 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2445 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2449 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2450 /* It can be still vectorized as part of an SLP reduction. */
2451 loop_vinfo
->reductions
.safe_push (last
);
2455 /* Find SLP sequences starting from groups of reductions. */
2456 if (loop_vinfo
->reductions
.length () > 1)
2457 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2461 /* Fill in backedges. */
2462 slp_instance instance
;
2463 hash_set
<slp_tree
> visited
;
2464 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2465 vect_analyze_slp_backedges (vinfo
, SLP_INSTANCE_TREE (instance
),
2468 /* The map keeps a reference on SLP nodes built, release that. */
2469 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2470 it
!= bst_map
->end (); ++it
)
2472 vect_free_slp_tree ((*it
).second
);
2475 return opt_result::success ();
2478 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2481 vect_slp_build_vertices (hash_set
<slp_tree
> &visited
, slp_tree node
,
2482 vec
<slp_tree
> &vertices
, vec
<int> &leafs
)
2487 if (visited
.add (node
))
2490 node
->vertex
= vertices
.length ();
2491 vertices
.safe_push (node
);
2492 if (SLP_TREE_CHILDREN (node
).is_empty ())
2493 leafs
.safe_push (node
->vertex
);
2495 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2496 vect_slp_build_vertices (visited
, child
, vertices
, leafs
);
2499 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2502 vect_slp_build_vertices (vec_info
*info
, vec
<slp_tree
> &vertices
,
2505 hash_set
<slp_tree
> visited
;
2507 slp_instance instance
;
2508 FOR_EACH_VEC_ELT (info
->slp_instances
, i
, instance
)
2509 vect_slp_build_vertices (visited
, SLP_INSTANCE_TREE (instance
), vertices
,
2513 /* Apply (reverse) bijectite PERM to VEC. */
2517 vect_slp_permute (vec
<unsigned> perm
,
2518 vec
<T
> &vec
, bool reverse
)
2520 auto_vec
<T
, 64> saved
;
2521 saved
.create (vec
.length ());
2522 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2523 saved
.quick_push (vec
[i
]);
2527 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2528 vec
[perm
[i
]] = saved
[i
];
2529 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2530 gcc_assert (vec
[perm
[i
]] == saved
[i
]);
2534 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2535 vec
[i
] = saved
[perm
[i
]];
2536 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2537 gcc_assert (vec
[i
] == saved
[perm
[i
]]);
2541 /* Return whether permutations PERM_A and PERM_B as recorded in the
2542 PERMS vector are equal. */
2545 vect_slp_perms_eq (const vec
<vec
<unsigned> > &perms
,
2546 int perm_a
, int perm_b
)
2548 return (perm_a
== perm_b
2549 || (perms
[perm_a
].length () == perms
[perm_b
].length ()
2550 && memcmp (&perms
[perm_a
][0], &perms
[perm_b
][0],
2551 sizeof (unsigned) * perms
[perm_a
].length ()) == 0));
2554 /* Optimize the SLP graph of VINFO. */
2557 vect_optimize_slp (vec_info
*vinfo
)
2559 if (vinfo
->slp_instances
.is_empty ())
2564 auto_vec
<slp_tree
> vertices
;
2565 auto_vec
<int> leafs
;
2566 vect_slp_build_vertices (vinfo
, vertices
, leafs
);
2568 struct graph
*slpg
= new_graph (vertices
.length ());
2569 FOR_EACH_VEC_ELT (vertices
, i
, node
)
2573 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2574 add_edge (slpg
, i
, child
->vertex
);
2577 /* Compute (reverse) postorder on the inverted graph. */
2579 graphds_dfs (slpg
, &leafs
[0], leafs
.length (), &ipo
, false, NULL
, NULL
);
2581 auto_sbitmap
n_visited (vertices
.length ());
2582 auto_sbitmap
n_materialize (vertices
.length ());
2583 auto_vec
<int> n_perm (vertices
.length ());
2584 auto_vec
<vec
<unsigned> > perms
;
2586 bitmap_clear (n_visited
);
2587 bitmap_clear (n_materialize
);
2588 n_perm
.quick_grow_cleared (vertices
.length ());
2589 perms
.safe_push (vNULL
); /* zero is no permute */
2591 /* Produce initial permutations. */
2592 for (i
= 0; i
< leafs
.length (); ++i
)
2595 slp_tree node
= vertices
[idx
];
2597 /* Handle externals and constants optimistically throughout the
2599 if (SLP_TREE_DEF_TYPE (node
) == vect_external_def
2600 || SLP_TREE_DEF_TYPE (node
) == vect_constant_def
)
2603 /* Loads are the only thing generating permutes and leafs do not
2604 change across iterations. */
2605 bitmap_set_bit (n_visited
, idx
);
2606 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2609 /* If splitting out a SLP_TREE_LANE_PERMUTATION can make the
2610 node unpermuted, record this permute. */
2611 stmt_vec_info dr_stmt
= SLP_TREE_REPRESENTATIVE (node
);
2612 if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt
))
2614 dr_stmt
= DR_GROUP_FIRST_ELEMENT (dr_stmt
);
2615 unsigned imin
= DR_GROUP_SIZE (dr_stmt
) + 1, imax
= 0;
2616 bool any_permute
= false;
2617 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2619 unsigned idx
= SLP_TREE_LOAD_PERMUTATION (node
)[j
];
2620 imin
= MIN (imin
, idx
);
2621 imax
= MAX (imax
, idx
);
2622 if (idx
- SLP_TREE_LOAD_PERMUTATION (node
)[0] != j
)
2625 /* If there's no permute no need to split one out. */
2628 /* If the span doesn't match we'd disrupt VF computation, avoid
2630 if (imax
- imin
+ 1 != SLP_TREE_LANES (node
))
2633 /* For now only handle true permutes, like
2634 vect_attempt_slp_rearrange_stmts did. This allows us to be lazy
2635 when permuting constants and invariants keeping the permute
2637 auto_sbitmap
load_index (SLP_TREE_LANES (node
));
2638 bitmap_clear (load_index
);
2639 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2640 bitmap_set_bit (load_index
, SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
);
2642 for (j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2643 if (!bitmap_bit_p (load_index
, j
))
2645 if (j
!= SLP_TREE_LANES (node
))
2648 vec
<unsigned> perm
= vNULL
;
2649 perm
.safe_grow (SLP_TREE_LANES (node
), true);
2650 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2651 perm
[j
] = SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
;
2652 perms
.safe_push (perm
);
2653 n_perm
[idx
] = perms
.length () - 1;
2656 /* Propagate permutes along the graph and compute materialization points. */
2658 unsigned iteration
= 0;
2664 for (i
= vertices
.length (); i
> 0 ; --i
)
2667 slp_tree node
= vertices
[idx
];
2668 /* For leafs there's nothing to do - we've seeded permutes
2670 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2673 bitmap_set_bit (n_visited
, idx
);
2675 /* We cannot move a permute across a store. */
2676 if (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))
2678 (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))))
2682 for (graph_edge
*succ
= slpg
->vertices
[idx
].succ
;
2683 succ
; succ
= succ
->succ_next
)
2685 int succ_idx
= succ
->dest
;
2686 /* Handle unvisited nodes optimistically. */
2687 /* ??? But for constants once we want to handle non-bijective
2688 permutes we have to verify the permute, when unifying lanes,
2689 will not unify different constants. For example see
2690 gcc.dg/vect/bb-slp-14.c for a case that would break. */
2691 if (!bitmap_bit_p (n_visited
, succ_idx
))
2693 int succ_perm
= n_perm
[succ_idx
];
2694 /* Once we materialize succs permutation its output lanes
2695 appear unpermuted to us. */
2696 if (bitmap_bit_p (n_materialize
, succ_idx
))
2700 else if (succ_perm
== 0)
2705 else if (!vect_slp_perms_eq (perms
, perm
, succ_perm
))
2713 /* Pick up pre-computed leaf values. */
2715 else if (!vect_slp_perms_eq (perms
, perm
, n_perm
[idx
]))
2718 /* Make sure we eventually converge. */
2719 gcc_checking_assert (perm
== 0);
2722 bitmap_clear_bit (n_materialize
, idx
);
2729 /* Elide pruning at materialization points in the first
2730 iteration so every node was visited once at least. */
2734 /* Decide on permute materialization. Look whether there's
2735 a use (pred) edge that is permuted differently than us.
2736 In that case mark ourselves so the permutation is applied. */
2737 bool all_preds_permuted
= slpg
->vertices
[idx
].pred
!= NULL
;
2738 for (graph_edge
*pred
= slpg
->vertices
[idx
].pred
;
2739 pred
; pred
= pred
->pred_next
)
2741 gcc_checking_assert (bitmap_bit_p (n_visited
, pred
->src
));
2742 int pred_perm
= n_perm
[pred
->src
];
2743 if (!vect_slp_perms_eq (perms
, perm
, pred_perm
))
2745 all_preds_permuted
= false;
2749 if (!all_preds_permuted
)
2751 if (!bitmap_bit_p (n_materialize
, idx
))
2753 bitmap_set_bit (n_materialize
, idx
);
2757 while (changed
|| iteration
== 1);
2760 for (i
= 0; i
< vertices
.length (); ++i
)
2762 int perm
= n_perm
[i
];
2766 slp_tree node
= vertices
[i
];
2768 /* First permute invariant/external original successors. */
2771 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2773 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2776 /* If the vector is uniform there's nothing to do. */
2777 if (vect_slp_tree_uniform_p (child
))
2780 /* We can end up sharing some externals via two_operator
2781 handling. Be prepared to unshare those. */
2782 if (child
->refcnt
!= 1)
2784 gcc_assert (slpg
->vertices
[child
->vertex
].pred
->pred_next
);
2785 SLP_TREE_CHILDREN (node
)[j
] = child
2786 = vect_create_new_slp_node
2787 (SLP_TREE_SCALAR_OPS (child
).copy ());
2789 vect_slp_permute (perms
[perm
],
2790 SLP_TREE_SCALAR_OPS (child
), true);
2793 if (bitmap_bit_p (n_materialize
, i
))
2795 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2796 /* For loads simply drop the permutation, the load permutation
2797 already performs the desired permutation. */
2801 if (dump_enabled_p ())
2802 dump_printf_loc (MSG_NOTE
, vect_location
,
2803 "inserting permute node in place of %p\n",
2806 /* Make a copy of NODE and in-place change it to a
2807 VEC_PERM node to permute the lanes of the copy. */
2808 slp_tree copy
= new _slp_tree
;
2809 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
);
2810 SLP_TREE_CHILDREN (node
) = vNULL
;
2811 SLP_TREE_SCALAR_STMTS (copy
)
2812 = SLP_TREE_SCALAR_STMTS (node
).copy ();
2813 vect_slp_permute (perms
[perm
],
2814 SLP_TREE_SCALAR_STMTS (copy
), true);
2815 gcc_assert (!SLP_TREE_SCALAR_OPS (node
).exists ());
2816 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
2817 gcc_assert (!SLP_TREE_LOAD_PERMUTATION (node
).exists ());
2818 SLP_TREE_LANE_PERMUTATION (copy
)
2819 = SLP_TREE_LANE_PERMUTATION (node
);
2820 SLP_TREE_LANE_PERMUTATION (node
) = vNULL
;
2821 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
2823 copy
->max_nunits
= node
->max_nunits
;
2824 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
2825 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
2826 SLP_TREE_CODE (copy
) = SLP_TREE_CODE (node
);
2828 /* Now turn NODE into a VEC_PERM. */
2829 SLP_TREE_CHILDREN (node
).safe_push (copy
);
2830 SLP_TREE_LANE_PERMUTATION (node
).create (SLP_TREE_LANES (node
));
2831 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2832 SLP_TREE_LANE_PERMUTATION (node
)
2833 .quick_push (std::make_pair (0, perms
[perm
][j
]));
2834 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
2839 /* Apply the reverse permutation to our stmts. */
2840 vect_slp_permute (perms
[perm
],
2841 SLP_TREE_SCALAR_STMTS (node
), true);
2842 /* And to the load permutation, which we can simply
2843 make regular by design. */
2844 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2846 /* ??? When we handle non-bijective permutes the idea
2847 is that we can force the load-permutation to be
2848 { min, min + 1, min + 2, ... max }. But then the
2849 scalar defs might no longer match the lane content
2850 which means wrong-code with live lane vectorization.
2851 So we possibly have to have NULL entries for those. */
2852 vect_slp_permute (perms
[perm
],
2853 SLP_TREE_LOAD_PERMUTATION (node
), true);
2858 /* Free the perms vector used for propagation. */
2859 while (!perms
.is_empty ())
2860 perms
.pop ().release ();
2864 /* Now elide load permutations that are not necessary. */
2865 for (i
= 0; i
< leafs
.length (); ++i
)
2868 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2871 /* In basic block vectorization we allow any subchain of an interleaving
2873 FORNOW: not in loop SLP because of realignment complications. */
2874 if (is_a
<bb_vec_info
> (vinfo
))
2876 bool subchain_p
= true;
2877 stmt_vec_info next_load_info
= NULL
;
2878 stmt_vec_info load_info
;
2880 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2883 && (next_load_info
!= load_info
2884 || DR_GROUP_GAP (load_info
) != 1))
2889 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2893 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2899 stmt_vec_info load_info
;
2900 bool this_load_permuted
= false;
2902 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2903 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2905 this_load_permuted
= true;
2908 stmt_vec_info first_stmt_info
2909 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2910 if (!this_load_permuted
2911 /* The load requires permutation when unrolling exposes
2912 a gap either because the group is larger than the SLP
2913 group-size or because there is a gap between the groups. */
2914 && (known_eq (LOOP_VINFO_VECT_FACTOR
2915 (as_a
<loop_vec_info
> (vinfo
)), 1U)
2916 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
2917 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2919 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2927 /* For each possible SLP instance decide whether to SLP it and calculate overall
2928 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2929 least one instance. */
2932 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2935 poly_uint64 unrolling_factor
= 1;
2936 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2937 slp_instance instance
;
2938 int decided_to_slp
= 0;
2940 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2942 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2944 /* FORNOW: SLP if you can. */
2945 /* All unroll factors have the form:
2947 GET_MODE_SIZE (vinfo->vector_mode) * X
2949 for some rational X, so they must have a common multiple. */
2951 = force_common_multiple (unrolling_factor
,
2952 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2954 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2955 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2956 loop-based vectorization. Such stmts will be marked as HYBRID. */
2957 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2961 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2963 if (decided_to_slp
&& dump_enabled_p ())
2965 dump_printf_loc (MSG_NOTE
, vect_location
,
2966 "Decided to SLP %d instances. Unrolling factor ",
2968 dump_dec (MSG_NOTE
, unrolling_factor
);
2969 dump_printf (MSG_NOTE
, "\n");
2972 return (decided_to_slp
> 0);
2975 /* Private data for vect_detect_hybrid_slp. */
2978 loop_vec_info loop_vinfo
;
2979 vec
<stmt_vec_info
> *worklist
;
2982 /* Walker for walk_gimple_op. */
2985 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2987 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2988 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2993 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2996 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2997 if (PURE_SLP_STMT (def_stmt_info
))
2999 if (dump_enabled_p ())
3000 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
3001 def_stmt_info
->stmt
);
3002 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
3003 dat
->worklist
->safe_push (def_stmt_info
);
3009 /* Find stmts that must be both vectorized and SLPed. */
3012 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3014 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
3016 /* All stmts participating in SLP are marked pure_slp, all other
3017 stmts are loop_vect.
3018 First collect all loop_vect stmts into a worklist. */
3019 auto_vec
<stmt_vec_info
> worklist
;
3020 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
3022 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
3023 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
3026 gphi
*phi
= gsi
.phi ();
3027 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
3028 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3029 worklist
.safe_push (stmt_info
);
3031 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
3034 gimple
*stmt
= gsi_stmt (gsi
);
3035 if (is_gimple_debug (stmt
))
3037 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
3038 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3040 for (gimple_stmt_iterator gsi2
3041 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3042 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
3044 stmt_vec_info patt_info
3045 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
3046 if (!STMT_SLP_TYPE (patt_info
)
3047 && STMT_VINFO_RELEVANT (patt_info
))
3048 worklist
.safe_push (patt_info
);
3050 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
3052 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3053 worklist
.safe_push (stmt_info
);
3057 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
3058 mark any SLP vectorized stmt as hybrid.
3059 ??? We're visiting def stmts N times (once for each non-SLP and
3060 once for each hybrid-SLP use). */
3063 dat
.worklist
= &worklist
;
3064 dat
.loop_vinfo
= loop_vinfo
;
3065 memset (&wi
, 0, sizeof (wi
));
3066 wi
.info
= (void *)&dat
;
3067 while (!worklist
.is_empty ())
3069 stmt_vec_info stmt_info
= worklist
.pop ();
3070 /* Since SSA operands are not set up for pattern stmts we need
3071 to use walk_gimple_op. */
3073 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
3078 /* Initialize a bb_vec_info struct for the statements in BBS basic blocks. */
3080 _bb_vec_info::_bb_vec_info (vec
<basic_block
> _bbs
, vec_info_shared
*shared
)
3081 : vec_info (vec_info::bb
, init_cost (NULL
), shared
), bbs (_bbs
)
3083 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3086 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3089 gphi
*phi
= si
.phi ();
3090 gimple_set_uid (phi
, 0);
3093 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3094 !gsi_end_p (gsi
); gsi_next (&gsi
))
3096 gimple
*stmt
= gsi_stmt (gsi
);
3097 gimple_set_uid (stmt
, 0);
3098 if (is_gimple_debug (stmt
))
3106 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
3107 stmts in the basic block. */
3109 _bb_vec_info::~_bb_vec_info ()
3111 /* Reset region marker. */
3112 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3115 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3118 gphi
*phi
= si
.phi ();
3119 gimple_set_uid (phi
, -1);
3121 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3122 !gsi_end_p (gsi
); gsi_next (&gsi
))
3124 gimple
*stmt
= gsi_stmt (gsi
);
3125 gimple_set_uid (stmt
, -1);
3130 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
3131 given then that child nodes have already been processed, and that
3132 their def types currently match their SLP node's def type. */
3135 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
3136 slp_instance node_instance
,
3137 stmt_vector_for_cost
*cost_vec
)
3139 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
3140 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
3142 /* Calculate the number of vector statements to be created for the
3143 scalar stmts in this node. For SLP reductions it is equal to the
3144 number of vector statements in the children (which has already been
3145 calculated by the recursive call). Otherwise it is the number of
3146 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
3147 VF divided by the number of elements in a vector. */
3148 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3149 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
3151 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
3152 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
3154 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3155 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
3162 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3163 vf
= loop_vinfo
->vectorization_factor
;
3166 unsigned int group_size
= SLP_TREE_LANES (node
);
3167 tree vectype
= SLP_TREE_VECTYPE (node
);
3168 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3169 = vect_get_num_vectors (vf
* group_size
, vectype
);
3172 /* Handle purely internal nodes. */
3173 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3174 return vectorizable_slp_permutation (vinfo
, NULL
, node
, cost_vec
);
3176 if (is_a
<bb_vec_info
> (vinfo
)
3177 && !vect_update_shared_vectype (stmt_info
, SLP_TREE_VECTYPE (node
)))
3181 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
3182 node
, node_instance
, cost_vec
);
3185 /* Try to build NODE from scalars, returning true on success.
3186 NODE_INSTANCE is the SLP instance that contains NODE. */
3189 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
3190 slp_instance node_instance
)
3192 stmt_vec_info stmt_info
;
3195 if (!is_a
<bb_vec_info
> (vinfo
)
3196 || node
== SLP_INSTANCE_TREE (node_instance
)
3197 || !SLP_TREE_SCALAR_STMTS (node
).exists ()
3198 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
3201 if (dump_enabled_p ())
3202 dump_printf_loc (MSG_NOTE
, vect_location
,
3203 "Building vector operands from scalars instead\n");
3205 /* Don't remove and free the child nodes here, since they could be
3206 referenced by other structures. The analysis and scheduling phases
3207 (need to) ignore child nodes of anything that isn't vect_internal_def. */
3208 unsigned int group_size
= SLP_TREE_LANES (node
);
3209 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
3210 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
, true);
3211 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3213 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
3214 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
3219 /* Compute the prologue cost for invariant or constant operands represented
3223 vect_prologue_cost_for_slp (slp_tree node
,
3224 stmt_vector_for_cost
*cost_vec
)
3226 /* There's a special case of an existing vector, that costs nothing. */
3227 if (SLP_TREE_SCALAR_OPS (node
).length () == 0
3228 && !SLP_TREE_VEC_DEFS (node
).is_empty ())
3230 /* Without looking at the actual initializer a vector of
3231 constants can be implemented as load from the constant pool.
3232 When all elements are the same we can use a splat. */
3233 tree vectype
= SLP_TREE_VECTYPE (node
);
3234 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
3235 unsigned num_vects_to_check
;
3236 unsigned HOST_WIDE_INT const_nunits
;
3237 unsigned nelt_limit
;
3238 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
3239 && ! multiple_p (const_nunits
, group_size
))
3241 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
3242 nelt_limit
= const_nunits
;
3246 /* If either the vector has variable length or the vectors
3247 are composed of repeated whole groups we only need to
3248 cost construction once. All vectors will be the same. */
3249 num_vects_to_check
= 1;
3250 nelt_limit
= group_size
;
3252 tree elt
= NULL_TREE
;
3254 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
3256 unsigned si
= j
% group_size
;
3258 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
3259 /* ??? We're just tracking whether all operands of a single
3260 vector initializer are the same, ideally we'd check if
3261 we emitted the same one already. */
3262 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
3265 if (nelt
== nelt_limit
)
3267 record_stmt_cost (cost_vec
, 1,
3268 SLP_TREE_DEF_TYPE (node
) == vect_external_def
3269 ? (elt
? scalar_to_vec
: vec_construct
)
3271 NULL
, vectype
, 0, vect_prologue
);
3277 /* Analyze statements contained in SLP tree NODE after recursively analyzing
3278 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
3280 Return true if the operations are supported. */
3283 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
3284 slp_instance node_instance
,
3285 hash_set
<slp_tree
> &visited
,
3286 hash_set
<slp_tree
> &lvisited
,
3287 stmt_vector_for_cost
*cost_vec
)
3292 /* Assume we can code-generate all invariants. */
3293 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3296 /* If we already analyzed the exact same set of scalar stmts we're done.
3297 We share the generated vector stmts for those. */
3298 if (visited
.contains (node
)
3299 || lvisited
.add (node
))
3303 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3305 res
= vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
3306 visited
, lvisited
, cost_vec
);
3312 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
3315 lvisited
.remove (node
);
3317 /* When the node can be vectorized cost invariant nodes it references.
3318 This is not done in DFS order to allow the refering node
3319 vectorizable_* calls to nail down the invariant nodes vector type
3320 and possibly unshare it if it needs a different vector type than
3323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
3324 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
3325 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
3326 /* Perform usual caching, note code-generation still
3327 code-gens these nodes multiple times but we expect
3328 to CSE them later. */
3329 && !visited
.contains (child
)
3330 && !lvisited
.add (child
))
3332 /* ??? After auditing more code paths make a "default"
3333 and push the vector type from NODE to all children
3334 if it is not already set. */
3335 /* Compute the number of vectors to be generated. */
3336 tree vector_type
= SLP_TREE_VECTYPE (child
);
3339 /* For shifts with a scalar argument we don't need
3340 to cost or code-generate anything.
3341 ??? Represent this more explicitely. */
3342 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
3343 == shift_vec_info_type
)
3347 unsigned group_size
= SLP_TREE_LANES (child
);
3349 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3350 vf
= loop_vinfo
->vectorization_factor
;
3351 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
3352 = vect_get_num_vectors (vf
* group_size
, vector_type
);
3353 /* And cost them. */
3354 vect_prologue_cost_for_slp (child
, cost_vec
);
3357 /* If this node or any of its children can't be vectorized, try pruning
3358 the tree here rather than felling the whole thing. */
3359 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
3361 /* We'll need to revisit this for invariant costing and number
3362 of vectorized stmt setting. */
3370 /* Mark lanes of NODE that are live outside of the basic-block vectorized
3371 region and that can be vectorized using vectorizable_live_operation
3372 with STMT_VINFO_LIVE_P. Not handled live operations will cause the
3373 scalar code computing it to be retained. */
3376 vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo
, slp_tree node
,
3377 slp_instance instance
,
3378 stmt_vector_for_cost
*cost_vec
,
3379 hash_set
<stmt_vec_info
> &svisited
)
3382 stmt_vec_info stmt_info
;
3383 stmt_vec_info last_stmt
= vect_find_last_scalar_stmt_in_slp (node
);
3384 bool all_visited
= true;
3385 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3387 if (svisited
.contains (stmt_info
))
3389 all_visited
= false;
3390 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3391 if (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
)
3392 && STMT_VINFO_RELATED_STMT (orig_stmt_info
) != stmt_info
)
3393 /* Only the pattern root stmt computes the original scalar value. */
3395 bool mark_visited
= true;
3396 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3397 ssa_op_iter op_iter
;
3398 def_operand_p def_p
;
3399 FOR_EACH_SSA_DEF_OPERAND (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3401 imm_use_iterator use_iter
;
3403 stmt_vec_info use_stmt_info
;
3404 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3405 if (!is_gimple_debug (use_stmt
))
3407 use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
);
3409 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3411 STMT_VINFO_LIVE_P (stmt_info
) = true;
3412 if (vectorizable_live_operation (bb_vinfo
, stmt_info
,
3413 NULL
, node
, instance
, i
,
3415 /* ??? So we know we can vectorize the live stmt
3416 from one SLP node. If we cannot do so from all
3417 or none consistently we'd have to record which
3418 SLP node (and lane) we want to use for the live
3419 operation. So make sure we can code-generate
3421 mark_visited
= false;
3423 STMT_VINFO_LIVE_P (stmt_info
) = false;
3424 BREAK_FROM_IMM_USE_STMT (use_iter
);
3427 /* We have to verify whether we can insert the lane extract
3428 before all uses. The following is a conservative approximation.
3429 We cannot put this into vectorizable_live_operation because
3430 iterating over all use stmts from inside a FOR_EACH_IMM_USE_STMT
3432 Note that while the fact that we emit code for loads at the
3433 first load should make this a non-problem leafs we construct
3434 from scalars are vectorized after the last scalar def.
3435 ??? If we'd actually compute the insert location during
3436 analysis we could use sth less conservative than the last
3437 scalar stmt in the node for the dominance check. */
3438 /* ??? What remains is "live" uses in vector CTORs in the same
3439 SLP graph which is where those uses can end up code-generated
3440 right after their definition instead of close to their original
3441 use. But that would restrict us to code-generate lane-extracts
3442 from the latest stmt in a node. So we compensate for this
3443 during code-generation, simply not replacing uses for those
3444 hopefully rare cases. */
3445 if (STMT_VINFO_LIVE_P (stmt_info
))
3446 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3447 if (!is_gimple_debug (use_stmt
)
3448 && (!(use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
))
3449 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3450 && !vect_stmt_dominates_stmt_p (last_stmt
->stmt
, use_stmt
))
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3454 "Cannot determine insertion place for "
3456 STMT_VINFO_LIVE_P (stmt_info
) = false;
3457 mark_visited
= true;
3461 svisited
.add (stmt_info
);
3467 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3468 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3469 vect_bb_slp_mark_live_stmts (bb_vinfo
, child
, instance
,
3470 cost_vec
, svisited
);
3473 /* Analyze statements in SLP instances of VINFO. Return true if the
3474 operations are supported. */
3477 vect_slp_analyze_operations (vec_info
*vinfo
)
3479 slp_instance instance
;
3482 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
3484 hash_set
<slp_tree
> visited
;
3485 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
3487 hash_set
<slp_tree
> lvisited
;
3488 stmt_vector_for_cost cost_vec
;
3489 cost_vec
.create (2);
3490 if (is_a
<bb_vec_info
> (vinfo
))
3491 vect_location
= instance
->location ();
3492 if (!vect_slp_analyze_node_operations (vinfo
,
3493 SLP_INSTANCE_TREE (instance
),
3494 instance
, visited
, lvisited
,
3496 /* Instances with a root stmt require vectorized defs for the
3498 || (SLP_INSTANCE_ROOT_STMT (instance
)
3499 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
3500 != vect_internal_def
)))
3502 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3503 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3504 if (dump_enabled_p ())
3505 dump_printf_loc (MSG_NOTE
, vect_location
,
3506 "removing SLP instance operations starting from: %G",
3508 vect_free_slp_instance (instance
);
3509 vinfo
->slp_instances
.ordered_remove (i
);
3510 cost_vec
.release ();
3514 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
3515 x
!= lvisited
.end(); ++x
)
3519 /* For BB vectorization remember the SLP graph entry
3521 if (is_a
<bb_vec_info
> (vinfo
))
3522 instance
->cost_vec
= cost_vec
;
3525 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
3526 cost_vec
.release ();
3531 /* Compute vectorizable live stmts. */
3532 if (bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
))
3534 hash_set
<stmt_vec_info
> svisited
;
3535 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3537 vect_location
= instance
->location ();
3538 vect_bb_slp_mark_live_stmts (bb_vinfo
, SLP_INSTANCE_TREE (instance
),
3539 instance
, &instance
->cost_vec
, svisited
);
3543 return !vinfo
->slp_instances
.is_empty ();
3546 /* Get the SLP instance leader from INSTANCE_LEADER thereby transitively
3547 closing the eventual chain. */
3550 get_ultimate_leader (slp_instance instance
,
3551 hash_map
<slp_instance
, slp_instance
> &instance_leader
)
3553 auto_vec
<slp_instance
*, 8> chain
;
3555 while (*(tem
= instance_leader
.get (instance
)) != instance
)
3557 chain
.safe_push (tem
);
3560 while (!chain
.is_empty ())
3561 *chain
.pop () = instance
;
3565 /* Worker of vect_bb_partition_graph, recurse on NODE. */
3568 vect_bb_partition_graph_r (bb_vec_info bb_vinfo
,
3569 slp_instance instance
, slp_tree node
,
3570 hash_map
<stmt_vec_info
, slp_instance
> &stmt_to_instance
,
3571 hash_map
<slp_instance
, slp_instance
> &instance_leader
,
3572 hash_set
<slp_tree
> &visited
)
3574 stmt_vec_info stmt_info
;
3577 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3580 slp_instance
&stmt_instance
3581 = stmt_to_instance
.get_or_insert (stmt_info
, &existed_p
);
3584 else if (stmt_instance
!= instance
)
3586 /* If we're running into a previously marked stmt make us the
3587 leader of the current ultimate leader. This keeps the
3588 leader chain acyclic and works even when the current instance
3589 connects two previously independent graph parts. */
3590 slp_instance stmt_leader
3591 = get_ultimate_leader (stmt_instance
, instance_leader
);
3592 if (stmt_leader
!= instance
)
3593 instance_leader
.put (stmt_leader
, instance
);
3595 stmt_instance
= instance
;
3598 if (visited
.add (node
))
3602 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3603 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3604 vect_bb_partition_graph_r (bb_vinfo
, instance
, child
, stmt_to_instance
,
3605 instance_leader
, visited
);
3608 /* Partition the SLP graph into pieces that can be costed independently. */
3611 vect_bb_partition_graph (bb_vec_info bb_vinfo
)
3613 DUMP_VECT_SCOPE ("vect_bb_partition_graph");
3615 /* First walk the SLP graph assigning each involved scalar stmt a
3616 corresponding SLP graph entry and upon visiting a previously
3617 marked stmt, make the stmts leader the current SLP graph entry. */
3618 hash_map
<stmt_vec_info
, slp_instance
> stmt_to_instance
;
3619 hash_map
<slp_instance
, slp_instance
> instance_leader
;
3620 hash_set
<slp_tree
> visited
;
3621 slp_instance instance
;
3622 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3624 instance_leader
.put (instance
, instance
);
3625 vect_bb_partition_graph_r (bb_vinfo
,
3626 instance
, SLP_INSTANCE_TREE (instance
),
3627 stmt_to_instance
, instance_leader
,
3631 /* Then collect entries to each independent subgraph. */
3632 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3634 slp_instance leader
= get_ultimate_leader (instance
, instance_leader
);
3635 leader
->subgraph_entries
.safe_push (instance
);
3636 if (dump_enabled_p ()
3637 && leader
!= instance
)
3638 dump_printf_loc (MSG_NOTE
, vect_location
,
3639 "instance %p is leader of %p\n",
3644 /* Compute the scalar cost of the SLP node NODE and its children
3645 and return it. Do not account defs that are marked in LIFE and
3646 update LIFE according to uses of NODE. */
3649 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
3650 slp_tree node
, vec
<bool, va_heap
> *life
,
3651 stmt_vector_for_cost
*cost_vec
,
3652 hash_set
<slp_tree
> &visited
)
3655 stmt_vec_info stmt_info
;
3658 if (visited
.add (node
))
3661 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3663 ssa_op_iter op_iter
;
3664 def_operand_p def_p
;
3669 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3670 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3672 /* If there is a non-vectorized use of the defs then the scalar
3673 stmt is kept live in which case we do not account it or any
3674 required defs in the SLP children in the scalar cost. This
3675 way we make the vectorization more costly when compared to
3677 if (!STMT_VINFO_LIVE_P (stmt_info
))
3679 FOR_EACH_SSA_DEF_OPERAND (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3681 imm_use_iterator use_iter
;
3683 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3684 if (!is_gimple_debug (use_stmt
))
3686 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3689 (vect_stmt_to_vectorize (use_stmt_info
)))
3692 BREAK_FROM_IMM_USE_STMT (use_iter
);
3700 /* Count scalar stmts only once. */
3701 if (gimple_visited_p (orig_stmt
))
3703 gimple_set_visited (orig_stmt
, true);
3705 vect_cost_for_stmt kind
;
3706 if (STMT_VINFO_DATA_REF (orig_stmt_info
))
3708 if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info
)))
3711 kind
= scalar_store
;
3713 else if (vect_nop_conversion_p (orig_stmt_info
))
3717 record_stmt_cost (cost_vec
, 1, kind
, orig_stmt_info
, 0, vect_body
);
3720 auto_vec
<bool, 20> subtree_life
;
3721 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3723 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3725 /* Do not directly pass LIFE to the recursive call, copy it to
3726 confine changes in the callee to the current child/subtree. */
3727 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3729 subtree_life
.safe_grow_cleared (SLP_TREE_LANES (child
), true);
3730 for (unsigned j
= 0;
3731 j
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++j
)
3733 auto perm
= SLP_TREE_LANE_PERMUTATION (node
)[j
];
3734 if (perm
.first
== i
)
3735 subtree_life
[perm
.second
] = (*life
)[j
];
3740 gcc_assert (SLP_TREE_LANES (node
) == SLP_TREE_LANES (child
));
3741 subtree_life
.safe_splice (*life
);
3743 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3745 subtree_life
.truncate (0);
3750 /* Check if vectorization of the basic block is profitable for the
3751 subgraph denoted by SLP_INSTANCES. */
3754 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
,
3755 vec
<slp_instance
> slp_instances
)
3757 slp_instance instance
;
3759 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3760 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3762 void *vect_target_cost_data
= init_cost (NULL
);
3764 /* Calculate scalar cost and sum the cost for the vector stmts
3765 previously collected. */
3766 stmt_vector_for_cost scalar_costs
;
3767 scalar_costs
.create (0);
3768 hash_set
<slp_tree
> visited
;
3769 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3771 auto_vec
<bool, 20> life
;
3772 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)),
3774 vect_bb_slp_scalar_cost (bb_vinfo
,
3775 SLP_INSTANCE_TREE (instance
),
3776 &life
, &scalar_costs
, visited
);
3777 add_stmt_costs (bb_vinfo
, vect_target_cost_data
, &instance
->cost_vec
);
3778 instance
->cost_vec
.release ();
3780 /* Unset visited flag. */
3781 stmt_info_for_cost
*si
;
3782 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
3783 gimple_set_visited (si
->stmt_info
->stmt
, false);
3785 void *scalar_target_cost_data
= init_cost (NULL
);
3786 add_stmt_costs (bb_vinfo
, scalar_target_cost_data
, &scalar_costs
);
3787 scalar_costs
.release ();
3789 finish_cost (scalar_target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3790 destroy_cost_data (scalar_target_cost_data
);
3792 /* Complete the target-specific vector cost calculation. */
3793 finish_cost (vect_target_cost_data
, &vec_prologue_cost
,
3794 &vec_inside_cost
, &vec_epilogue_cost
);
3795 destroy_cost_data (vect_target_cost_data
);
3797 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3799 if (dump_enabled_p ())
3801 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3802 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3804 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3805 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3806 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3809 /* Vectorization is profitable if its cost is more than the cost of scalar
3810 version. Note that we err on the vector side for equal cost because
3811 the cost estimate is otherwise quite pessimistic (constant uses are
3812 free on the scalar side but cost a load on the vector side for
3814 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3820 /* Find any vectorizable constructors and add them to the grouped_store
3824 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3826 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
3827 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
3828 !gsi_end_p (gsi
); gsi_next (&gsi
))
3830 gassign
*assign
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3831 if (!assign
|| gimple_assign_rhs_code (assign
) != CONSTRUCTOR
)
3834 tree rhs
= gimple_assign_rhs1 (assign
);
3835 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3836 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3837 CONSTRUCTOR_NELTS (rhs
))
3838 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3839 || uniform_vector_p (rhs
))
3842 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (assign
);
3843 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3847 /* Check if the region described by BB_VINFO can be vectorized, returning
3848 true if so. When returning false, set FATAL to true if the same failure
3849 would prevent vectorization at other vector sizes, false if it is still
3850 worth trying other sizes. N_STMTS is the number of statements in the
3854 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
,
3855 vec
<int> *dataref_groups
)
3857 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3859 slp_instance instance
;
3861 poly_uint64 min_vf
= 2;
3863 /* The first group of checks is independent of the vector size. */
3866 /* Analyze the data references. */
3868 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3870 if (dump_enabled_p ())
3871 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3872 "not vectorized: unhandled data-ref in basic "
3877 if (!vect_analyze_data_ref_accesses (bb_vinfo
, dataref_groups
))
3879 if (dump_enabled_p ())
3880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3881 "not vectorized: unhandled data access in "
3886 vect_slp_check_for_constructors (bb_vinfo
);
3888 /* If there are no grouped stores and no constructors in the region
3889 there is no need to continue with pattern recog as vect_analyze_slp
3890 will fail anyway. */
3891 if (bb_vinfo
->grouped_stores
.is_empty ())
3893 if (dump_enabled_p ())
3894 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3895 "not vectorized: no grouped stores in "
3900 /* While the rest of the analysis below depends on it in some way. */
3903 vect_pattern_recog (bb_vinfo
);
3905 /* Check the SLP opportunities in the basic block, analyze and build SLP
3907 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3909 if (dump_enabled_p ())
3911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3912 "Failed to SLP the basic block.\n");
3913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3914 "not vectorized: failed to find SLP opportunities "
3915 "in basic block.\n");
3920 /* Optimize permutations. */
3921 vect_optimize_slp (bb_vinfo
);
3923 vect_record_base_alignments (bb_vinfo
);
3925 /* Analyze and verify the alignment of data references and the
3926 dependence in the SLP instances. */
3927 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3929 vect_location
= instance
->location ();
3930 if (! vect_slp_analyze_instance_alignment (bb_vinfo
, instance
)
3931 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3933 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3934 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3935 if (dump_enabled_p ())
3936 dump_printf_loc (MSG_NOTE
, vect_location
,
3937 "removing SLP instance operations starting from: %G",
3939 vect_free_slp_instance (instance
);
3940 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3944 /* Mark all the statements that we want to vectorize as pure SLP and
3946 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3947 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3948 if (SLP_INSTANCE_ROOT_STMT (instance
))
3949 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3953 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3956 if (!vect_slp_analyze_operations (bb_vinfo
))
3958 if (dump_enabled_p ())
3959 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3960 "not vectorized: bad operation in basic block.\n");
3964 vect_bb_partition_graph (bb_vinfo
);
3969 /* Subroutine of vect_slp_bb. Try to vectorize the statements for all
3970 basic blocks in BBS, returning true on success.
3971 The region has N_STMTS statements and has the datarefs given by DATAREFS. */
3974 vect_slp_region (vec
<basic_block
> bbs
, vec
<data_reference_p
> datarefs
,
3975 vec
<int> *dataref_groups
, unsigned int n_stmts
)
3977 bb_vec_info bb_vinfo
;
3978 auto_vector_modes vector_modes
;
3980 /* Autodetect first vector size we try. */
3981 machine_mode next_vector_mode
= VOIDmode
;
3982 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3983 unsigned int mode_i
= 0;
3985 vec_info_shared shared
;
3987 machine_mode autodetected_vector_mode
= VOIDmode
;
3990 bool vectorized
= false;
3992 bb_vinfo
= new _bb_vec_info (bbs
, &shared
);
3994 bool first_time_p
= shared
.datarefs
.is_empty ();
3995 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3997 bb_vinfo
->shared
->save_datarefs ();
3999 bb_vinfo
->shared
->check_datarefs ();
4000 bb_vinfo
->vector_mode
= next_vector_mode
;
4002 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
, dataref_groups
)
4003 && dbg_cnt (vect_slp
))
4005 if (dump_enabled_p ())
4007 dump_printf_loc (MSG_NOTE
, vect_location
,
4008 "***** Analysis succeeded with vector mode"
4009 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
4010 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
4013 bb_vinfo
->shared
->check_datarefs ();
4016 slp_instance instance
;
4017 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo
), i
, instance
)
4019 if (instance
->subgraph_entries
.is_empty ())
4022 vect_location
= instance
->location ();
4023 if (!unlimited_cost_model (NULL
)
4024 && !vect_bb_vectorization_profitable_p
4025 (bb_vinfo
, instance
->subgraph_entries
))
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4029 "not vectorized: vectorization is not "
4034 if (!vectorized
&& dump_enabled_p ())
4035 dump_printf_loc (MSG_NOTE
, vect_location
,
4036 "Basic block will be vectorized "
4040 vect_schedule_slp (bb_vinfo
, instance
->subgraph_entries
);
4042 unsigned HOST_WIDE_INT bytes
;
4043 if (dump_enabled_p ())
4046 (bb_vinfo
->vector_mode
).is_constant (&bytes
))
4047 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4048 "basic block part vectorized using %wu "
4049 "byte vectors\n", bytes
);
4051 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4052 "basic block part vectorized using "
4053 "variable length vectors\n");
4059 if (dump_enabled_p ())
4060 dump_printf_loc (MSG_NOTE
, vect_location
,
4061 "***** Analysis failed with vector mode %s\n",
4062 GET_MODE_NAME (bb_vinfo
->vector_mode
));
4066 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
4069 while (mode_i
< vector_modes
.length ()
4070 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_NOTE
, vect_location
,
4074 "***** The result for vector mode %s would"
4076 GET_MODE_NAME (vector_modes
[mode_i
]));
4082 if (mode_i
< vector_modes
.length ()
4083 && VECTOR_MODE_P (autodetected_vector_mode
)
4084 && (related_vector_mode (vector_modes
[mode_i
],
4085 GET_MODE_INNER (autodetected_vector_mode
))
4086 == autodetected_vector_mode
)
4087 && (related_vector_mode (autodetected_vector_mode
,
4088 GET_MODE_INNER (vector_modes
[mode_i
]))
4089 == vector_modes
[mode_i
]))
4091 if (dump_enabled_p ())
4092 dump_printf_loc (MSG_NOTE
, vect_location
,
4093 "***** Skipping vector mode %s, which would"
4094 " repeat the analysis for %s\n",
4095 GET_MODE_NAME (vector_modes
[mode_i
]),
4096 GET_MODE_NAME (autodetected_vector_mode
));
4101 || mode_i
== vector_modes
.length ()
4102 || autodetected_vector_mode
== VOIDmode
4103 /* If vect_slp_analyze_bb_1 signaled that analysis for all
4104 vector sizes will fail do not bother iterating. */
4108 /* Try the next biggest vector size. */
4109 next_vector_mode
= vector_modes
[mode_i
++];
4110 if (dump_enabled_p ())
4111 dump_printf_loc (MSG_NOTE
, vect_location
,
4112 "***** Re-trying analysis with vector mode %s\n",
4113 GET_MODE_NAME (next_vector_mode
));
4118 /* Main entry for the BB vectorizer. Analyze and transform BBS, returns
4119 true if anything in the basic-block was vectorized. */
4122 vect_slp_bbs (vec
<basic_block
> bbs
)
4124 vec
<data_reference_p
> datarefs
= vNULL
;
4125 auto_vec
<int> dataref_groups
;
4127 int current_group
= 0;
4129 for (unsigned i
= 0; i
< bbs
.length (); i
++)
4131 basic_block bb
= bbs
[i
];
4132 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
4135 gimple
*stmt
= gsi_stmt (gsi
);
4136 if (is_gimple_debug (stmt
))
4141 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
4142 vect_location
= stmt
;
4144 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
,
4145 &dataref_groups
, current_group
))
4148 if (insns
> param_slp_max_insns_in_bb
)
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4152 "not vectorized: too many instructions in "
4158 return vect_slp_region (bbs
, datarefs
, &dataref_groups
, insns
);
4161 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4162 true if anything in the basic-block was vectorized. */
4165 vect_slp_bb (basic_block bb
)
4167 auto_vec
<basic_block
> bbs
;
4169 return vect_slp_bbs (bbs
);
4172 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4173 true if anything in the basic-block was vectorized. */
4176 vect_slp_function (function
*fun
)
4179 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (fun
));
4180 unsigned n
= pre_and_rev_post_order_compute_fn (fun
, NULL
, rpo
, false);
4182 /* For the moment split the function into pieces to avoid making
4183 the iteration on the vector mode moot. Split at points we know
4184 to not handle well which is CFG merges (SLP discovery doesn't
4185 handle non-loop-header PHIs) and loop exits. Since pattern
4186 recog requires reverse iteration to visit uses before defs
4187 simply chop RPO into pieces. */
4188 auto_vec
<basic_block
> bbs
;
4189 for (unsigned i
= 0; i
< n
; i
++)
4191 basic_block bb
= BASIC_BLOCK_FOR_FN (fun
, rpo
[i
]);
4193 /* Split when a basic block has multiple predecessors or when the
4194 edge into it exits a loop (because of implementation issues with
4195 respect to placement of CTORs for externals). */
4198 if (!single_pred_p (bb
)
4199 || ((e
= single_pred_edge (bb
)),
4200 loop_exit_edge_p (e
->src
->loop_father
, e
)))
4202 /* Split when a BB is not dominated by the first block. */
4203 else if (!bbs
.is_empty ()
4204 && !dominated_by_p (CDI_DOMINATORS
, bb
, bbs
[0]))
4207 if (split
&& !bbs
.is_empty ())
4209 r
|= vect_slp_bbs (bbs
);
4211 bbs
.quick_push (bb
);
4216 /* When we have a stmt ending this block we have to insert on
4217 edges when inserting after it. Avoid this for now. */
4218 if (gimple
*last
= last_stmt (bb
))
4219 if (is_ctrl_altering_stmt (last
))
4221 r
|= vect_slp_bbs (bbs
);
4226 if (!bbs
.is_empty ())
4227 r
|= vect_slp_bbs (bbs
);
4234 /* Build a variable-length vector in which the elements in ELTS are repeated
4235 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
4236 RESULTS and add any new instructions to SEQ.
4238 The approach we use is:
4240 (1) Find a vector mode VM with integer elements of mode IM.
4242 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4243 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
4244 from small vectors to IM.
4246 (3) Duplicate each ELTS'[I] into a vector of mode VM.
4248 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
4249 correct byte contents.
4251 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
4253 We try to find the largest IM for which this sequence works, in order
4254 to cut down on the number of interleaves. */
4257 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
4258 vec
<tree
> elts
, unsigned int nresults
,
4261 unsigned int nelts
= elts
.length ();
4262 tree element_type
= TREE_TYPE (vector_type
);
4264 /* (1) Find a vector mode VM with integer elements of mode IM. */
4265 unsigned int nvectors
= 1;
4266 tree new_vector_type
;
4268 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
4269 &nvectors
, &new_vector_type
,
4273 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
4274 unsigned int partial_nelts
= nelts
/ nvectors
;
4275 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
4277 tree_vector_builder partial_elts
;
4278 auto_vec
<tree
, 32> pieces (nvectors
* 2);
4279 pieces
.quick_grow (nvectors
* 2);
4280 for (unsigned int i
= 0; i
< nvectors
; ++i
)
4282 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4283 ELTS' has mode IM. */
4284 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
4285 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
4286 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
4287 tree t
= gimple_build_vector (seq
, &partial_elts
);
4288 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
4289 TREE_TYPE (new_vector_type
), t
);
4291 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
4292 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
4295 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
4296 correct byte contents.
4298 We need to repeat the following operation log2(nvectors) times:
4300 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
4301 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
4303 However, if each input repeats every N elements and the VF is
4304 a multiple of N * 2, the HI result is the same as the LO. */
4305 unsigned int in_start
= 0;
4306 unsigned int out_start
= nvectors
;
4307 unsigned int hi_start
= nvectors
/ 2;
4308 /* A bound on the number of outputs needed to produce NRESULTS results
4309 in the final iteration. */
4310 unsigned int noutputs_bound
= nvectors
* nresults
;
4311 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
4313 noutputs_bound
/= 2;
4314 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
4315 for (unsigned int i
= 0; i
< limit
; ++i
)
4318 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
4321 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
4325 tree output
= make_ssa_name (new_vector_type
);
4326 tree input1
= pieces
[in_start
+ (i
/ 2)];
4327 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
4328 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
4331 gimple_seq_add_stmt (seq
, stmt
);
4332 pieces
[out_start
+ i
] = output
;
4334 std::swap (in_start
, out_start
);
4337 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
4338 results
.reserve (nresults
);
4339 for (unsigned int i
= 0; i
< nresults
; ++i
)
4341 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
4342 pieces
[in_start
+ i
]));
4344 results
.quick_push (results
[i
- nvectors
]);
4348 /* For constant and loop invariant defs in OP_NODE this function creates
4349 vector defs that will be used in the vectorized stmts and stores them
4350 to SLP_TREE_VEC_DEFS of OP_NODE. */
4353 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
4355 unsigned HOST_WIDE_INT nunits
;
4357 unsigned j
, number_of_places_left_in_vector
;
4360 int group_size
= op_node
->ops
.length ();
4361 unsigned int vec_num
, i
;
4362 unsigned number_of_copies
= 1;
4364 gimple_seq ctor_seq
= NULL
;
4365 auto_vec
<tree
, 16> permute_results
;
4367 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
4368 vector_type
= SLP_TREE_VECTYPE (op_node
);
4370 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
4371 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
4372 auto_vec
<tree
> voprnds (number_of_vectors
);
4374 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4375 created vectors. It is greater than 1 if unrolling is performed.
4377 For example, we have two scalar operands, s1 and s2 (e.g., group of
4378 strided accesses of size two), while NUNITS is four (i.e., four scalars
4379 of this type can be packed in a vector). The output vector will contain
4380 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4383 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
4384 containing the operands.
4386 For example, NUNITS is four as before, and the group size is 8
4387 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4388 {s5, s6, s7, s8}. */
4390 /* When using duplicate_and_interleave, we just need one element for
4391 each scalar statement. */
4392 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
4393 nunits
= group_size
;
4395 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
4397 number_of_places_left_in_vector
= nunits
;
4399 tree_vector_builder
elts (vector_type
, nunits
, 1);
4400 elts
.quick_grow (nunits
);
4401 stmt_vec_info insert_after
= NULL
;
4402 for (j
= 0; j
< number_of_copies
; j
++)
4405 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
4407 /* Create 'vect_ = {op0,op1,...,opn}'. */
4408 number_of_places_left_in_vector
--;
4410 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
4412 if (CONSTANT_CLASS_P (op
))
4414 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4416 /* Can't use VIEW_CONVERT_EXPR for booleans because
4417 of possibly different sizes of scalar value and
4419 if (integer_zerop (op
))
4420 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
4421 else if (integer_onep (op
))
4422 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
4427 op
= fold_unary (VIEW_CONVERT_EXPR
,
4428 TREE_TYPE (vector_type
), op
);
4429 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
4433 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
4435 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4438 = build_all_ones_cst (TREE_TYPE (vector_type
));
4440 = build_zero_cst (TREE_TYPE (vector_type
));
4441 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
4442 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
4448 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
4451 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
4454 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
4458 elts
[number_of_places_left_in_vector
] = op
;
4459 if (!CONSTANT_CLASS_P (op
))
4461 /* For BB vectorization we have to compute an insert location
4462 when a def is inside the analyzed region since we cannot
4463 simply insert at the BB start in this case. */
4464 stmt_vec_info opdef
;
4465 if (TREE_CODE (orig_op
) == SSA_NAME
4466 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
4467 && is_a
<bb_vec_info
> (vinfo
)
4468 && (opdef
= vinfo
->lookup_def (orig_op
)))
4471 insert_after
= opdef
;
4473 insert_after
= get_later_stmt (insert_after
, opdef
);
4476 if (number_of_places_left_in_vector
== 0)
4479 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
4480 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
4481 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
4484 if (permute_results
.is_empty ())
4485 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
4486 elts
, number_of_vectors
,
4488 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
4490 if (!gimple_seq_empty_p (ctor_seq
))
4494 gimple_stmt_iterator gsi
;
4495 if (gimple_code (insert_after
->stmt
) == GIMPLE_PHI
)
4497 gsi
= gsi_after_labels (gimple_bb (insert_after
->stmt
));
4498 gsi_insert_seq_before (&gsi
, ctor_seq
,
4499 GSI_CONTINUE_LINKING
);
4501 else if (!stmt_ends_bb_p (insert_after
->stmt
))
4503 gsi
= gsi_for_stmt (insert_after
->stmt
);
4504 gsi_insert_seq_after (&gsi
, ctor_seq
,
4505 GSI_CONTINUE_LINKING
);
4509 /* When we want to insert after a def where the
4510 defining stmt throws then insert on the fallthru
4512 edge e
= find_fallthru_edge
4513 (gimple_bb (insert_after
->stmt
)->succs
);
4515 = gsi_insert_seq_on_edge_immediate (e
, ctor_seq
);
4516 gcc_assert (!new_bb
);
4520 vinfo
->insert_seq_on_entry (NULL
, ctor_seq
);
4523 voprnds
.quick_push (vec_cst
);
4524 insert_after
= NULL
;
4525 number_of_places_left_in_vector
= nunits
;
4527 elts
.new_vector (vector_type
, nunits
, 1);
4528 elts
.quick_grow (nunits
);
4533 /* Since the vectors are created in the reverse order, we should invert
4535 vec_num
= voprnds
.length ();
4536 for (j
= vec_num
; j
!= 0; j
--)
4538 vop
= voprnds
[j
- 1];
4539 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
4542 /* In case that VF is greater than the unrolling factor needed for the SLP
4543 group of stmts, NUMBER_OF_VECTORS to be created is greater than
4544 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
4545 to replicate the vectors. */
4546 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
4547 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
4549 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
4552 /* Get the Ith vectorized definition from SLP_NODE. */
4555 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
4557 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
4558 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]);
4560 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
4563 /* Get the vectorized definitions of SLP_NODE in *VEC_DEFS. */
4566 vect_get_slp_defs (slp_tree slp_node
, vec
<tree
> *vec_defs
)
4568 vec_defs
->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
4569 if (SLP_TREE_DEF_TYPE (slp_node
) == vect_internal_def
)
4572 gimple
*vec_def_stmt
;
4573 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), j
, vec_def_stmt
)
4574 vec_defs
->quick_push (gimple_get_lhs (vec_def_stmt
));
4577 vec_defs
->splice (SLP_TREE_VEC_DEFS (slp_node
));
4580 /* Get N vectorized definitions for SLP_NODE. */
4583 vect_get_slp_defs (vec_info
*,
4584 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
4587 n
= SLP_TREE_CHILDREN (slp_node
).length ();
4589 for (unsigned i
= 0; i
< n
; ++i
)
4591 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
4592 vec
<tree
> vec_defs
= vNULL
;
4593 vect_get_slp_defs (child
, &vec_defs
);
4594 vec_oprnds
->quick_push (vec_defs
);
4598 /* Generate vector permute statements from a list of loads in DR_CHAIN.
4599 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
4600 permute statements for the SLP node NODE. */
4603 vect_transform_slp_perm_load (vec_info
*vinfo
,
4604 slp_tree node
, vec
<tree
> dr_chain
,
4605 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
4606 bool analyze_only
, unsigned *n_perms
)
4608 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4610 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4611 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
4612 unsigned int mask_element
;
4615 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4618 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
4620 mode
= TYPE_MODE (vectype
);
4621 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4623 /* Initialize the vect stmts of NODE to properly insert the generated
4626 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
4627 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
4628 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
4630 /* Generate permutation masks for every NODE. Number of masks for each NODE
4631 is equal to GROUP_SIZE.
4632 E.g., we have a group of three nodes with three loads from the same
4633 location in each node, and the vector size is 4. I.e., we have a
4634 a0b0c0a1b1c1... sequence and we need to create the following vectors:
4635 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
4636 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
4639 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
4640 The last mask is illegal since we assume two operands for permute
4641 operation, and the mask element values can't be outside that range.
4642 Hence, the last mask must be converted into {2,5,5,5}.
4643 For the first two permutations we need the first and the second input
4644 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
4645 we need the second and the third vectors: {b1,c1,a2,b2} and
4648 int vect_stmts_counter
= 0;
4649 unsigned int index
= 0;
4650 int first_vec_index
= -1;
4651 int second_vec_index
= -1;
4655 vec_perm_builder mask
;
4656 unsigned int nelts_to_build
;
4657 unsigned int nvectors_per_build
;
4658 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
4659 && multiple_p (nunits
, group_size
));
4662 /* A single vector contains a whole number of copies of the node, so:
4663 (a) all permutes can use the same mask; and
4664 (b) the permutes only need a single vector input. */
4665 mask
.new_vector (nunits
, group_size
, 3);
4666 nelts_to_build
= mask
.encoded_nelts ();
4667 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
4671 /* We need to construct a separate mask for each vector statement. */
4672 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
4673 if (!nunits
.is_constant (&const_nunits
)
4674 || !vf
.is_constant (&const_vf
))
4676 mask
.new_vector (const_nunits
, const_nunits
, 1);
4677 nelts_to_build
= const_vf
* group_size
;
4678 nvectors_per_build
= 1;
4681 unsigned int count
= mask
.encoded_nelts ();
4682 mask
.quick_grow (count
);
4683 vec_perm_indices indices
;
4685 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
4687 unsigned int iter_num
= j
/ group_size
;
4688 unsigned int stmt_num
= j
% group_size
;
4689 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
4690 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
4693 first_vec_index
= 0;
4698 /* Enforced before the loop when !repeating_p. */
4699 unsigned int const_nunits
= nunits
.to_constant ();
4700 vec_index
= i
/ const_nunits
;
4701 mask_element
= i
% const_nunits
;
4702 if (vec_index
== first_vec_index
4703 || first_vec_index
== -1)
4705 first_vec_index
= vec_index
;
4707 else if (vec_index
== second_vec_index
4708 || second_vec_index
== -1)
4710 second_vec_index
= vec_index
;
4711 mask_element
+= const_nunits
;
4715 if (dump_enabled_p ())
4716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4717 "permutation requires at "
4718 "least three vectors %G",
4720 gcc_assert (analyze_only
);
4724 gcc_assert (mask_element
< 2 * const_nunits
);
4727 if (mask_element
!= index
)
4729 mask
[index
++] = mask_element
;
4731 if (index
== count
&& !noop_p
)
4733 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4734 if (!can_vec_perm_const_p (mode
, indices
))
4736 if (dump_enabled_p ())
4738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4740 "unsupported vect permute { ");
4741 for (i
= 0; i
< count
; ++i
)
4743 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4744 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4746 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4748 gcc_assert (analyze_only
);
4759 tree mask_vec
= NULL_TREE
;
4762 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4764 if (second_vec_index
== -1)
4765 second_vec_index
= first_vec_index
;
4767 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4769 /* Generate the permute statement if necessary. */
4770 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4771 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4775 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4777 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4779 perm_dest
= make_ssa_name (perm_dest
);
4781 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4782 first_vec
, second_vec
,
4784 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
4788 /* If mask was NULL_TREE generate the requested
4789 identity transform. */
4790 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
4792 /* Store the vector statement in NODE. */
4793 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
4798 first_vec_index
= -1;
4799 second_vec_index
= -1;
4808 /* Vectorize the SLP permutations in NODE as specified
4809 in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
4810 child number and lane number.
4811 Interleaving of two two-lane two-child SLP subtrees (not supported):
4812 [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
4813 A blend of two four-lane two-child SLP subtrees:
4814 [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
4815 Highpart of a four-lane one-child SLP subtree (not supported):
4816 [ { 0, 2 }, { 0, 3 } ]
4817 Where currently only a subset is supported by code generating below. */
4820 vectorizable_slp_permutation (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
4821 slp_tree node
, stmt_vector_for_cost
*cost_vec
)
4823 tree vectype
= SLP_TREE_VECTYPE (node
);
4825 /* ??? We currently only support all same vector input and output types
4826 while the SLP IL should really do a concat + select and thus accept
4827 arbitrary mismatches. */
4830 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4831 if (!types_compatible_p (SLP_TREE_VECTYPE (child
), vectype
))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4835 "Unsupported lane permutation\n");
4839 vec
<std::pair
<unsigned, unsigned> > &perm
= SLP_TREE_LANE_PERMUTATION (node
);
4840 gcc_assert (perm
.length () == SLP_TREE_LANES (node
));
4841 if (dump_enabled_p ())
4843 dump_printf_loc (MSG_NOTE
, vect_location
,
4844 "vectorizing permutation");
4845 for (unsigned i
= 0; i
< perm
.length (); ++i
)
4846 dump_printf (MSG_NOTE
, " op%u[%u]", perm
[i
].first
, perm
[i
].second
);
4847 dump_printf (MSG_NOTE
, "\n");
4850 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4851 if (!nunits
.is_constant ())
4853 unsigned HOST_WIDE_INT vf
= 1;
4854 if (loop_vec_info linfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4855 if (!LOOP_VINFO_VECT_FACTOR (linfo
).is_constant (&vf
))
4857 unsigned olanes
= vf
* SLP_TREE_LANES (node
);
4858 gcc_assert (multiple_p (olanes
, nunits
));
4860 /* Compute the { { SLP operand, vector index}, lane } permutation sequence
4861 from the { SLP operand, scalar lane } permutation as recorded in the
4862 SLP node as intermediate step. This part should already work
4863 with SLP children with arbitrary number of lanes. */
4864 auto_vec
<std::pair
<std::pair
<unsigned, unsigned>, unsigned> > vperm
;
4865 auto_vec
<unsigned> active_lane
;
4866 vperm
.create (olanes
);
4867 active_lane
.safe_grow_cleared (SLP_TREE_CHILDREN (node
).length (), true);
4868 for (unsigned i
= 0; i
< vf
; ++i
)
4870 for (unsigned pi
= 0; pi
< perm
.length (); ++pi
)
4872 std::pair
<unsigned, unsigned> p
= perm
[pi
];
4873 tree vtype
= SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node
)[p
.first
]);
4874 unsigned vnunits
= TYPE_VECTOR_SUBPARTS (vtype
).to_constant ();
4875 unsigned vi
= (active_lane
[p
.first
] + p
.second
) / vnunits
;
4876 unsigned vl
= (active_lane
[p
.first
] + p
.second
) % vnunits
;
4877 vperm
.quick_push (std::make_pair (std::make_pair (p
.first
, vi
), vl
));
4879 /* Advance to the next group. */
4880 for (unsigned j
= 0; j
< SLP_TREE_CHILDREN (node
).length (); ++j
)
4881 active_lane
[j
] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node
)[j
]);
4884 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_NOTE
, vect_location
, "as");
4887 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4889 if (i
!= 0 && multiple_p (i
, TYPE_VECTOR_SUBPARTS (vectype
)))
4890 dump_printf (MSG_NOTE
, ",");
4891 dump_printf (MSG_NOTE
, " vops%u[%u][%u]",
4892 vperm
[i
].first
.first
, vperm
[i
].first
.second
,
4893 vperm
[i
].first
.second
);
4895 dump_printf (MSG_NOTE
, "\n");
4898 /* We can only handle two-vector permutes, everything else should
4899 be lowered on the SLP level. The following is closely inspired
4900 by vect_transform_slp_perm_load and is supposed to eventually
4902 ??? As intermediate step do code-gen in the SLP tree representation
4904 std::pair
<unsigned, unsigned> first_vec
= std::make_pair (-1U, -1U);
4905 std::pair
<unsigned, unsigned> second_vec
= std::make_pair (-1U, -1U);
4906 unsigned int const_nunits
= nunits
.to_constant ();
4907 unsigned int index
= 0;
4908 unsigned int mask_element
;
4909 vec_perm_builder mask
;
4910 mask
.new_vector (const_nunits
, const_nunits
, 1);
4911 unsigned int count
= mask
.encoded_nelts ();
4912 mask
.quick_grow (count
);
4913 vec_perm_indices indices
;
4914 unsigned nperms
= 0;
4915 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4917 mask_element
= vperm
[i
].second
;
4918 if (first_vec
.first
== -1U
4919 || first_vec
== vperm
[i
].first
)
4920 first_vec
= vperm
[i
].first
;
4921 else if (second_vec
.first
== -1U
4922 || second_vec
== vperm
[i
].first
)
4924 second_vec
= vperm
[i
].first
;
4925 mask_element
+= const_nunits
;
4929 if (dump_enabled_p ())
4930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4931 "permutation requires at "
4932 "least three vectors");
4937 mask
[index
++] = mask_element
;
4941 indices
.new_vector (mask
, second_vec
.first
== -1U ? 1 : 2,
4943 bool identity_p
= indices
.series_p (0, 1, 0, 1);
4945 && !can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
4947 if (dump_enabled_p ())
4949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4951 "unsupported vect permute { ");
4952 for (i
= 0; i
< count
; ++i
)
4954 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4955 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4957 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4967 if (second_vec
.first
== -1U)
4968 second_vec
= first_vec
;
4970 /* Generate the permute statement if necessary. */
4971 slp_tree first_node
= SLP_TREE_CHILDREN (node
)[first_vec
.first
];
4973 = vect_get_slp_vect_def (first_node
, first_vec
.second
);
4975 tree perm_dest
= make_ssa_name (vectype
);
4978 slp_tree second_node
4979 = SLP_TREE_CHILDREN (node
)[second_vec
.first
];
4981 = vect_get_slp_vect_def (second_node
, second_vec
.second
);
4982 tree mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4983 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4984 first_def
, second_def
,
4988 /* We need a copy here in case the def was external. */
4989 perm_stmt
= gimple_build_assign (perm_dest
, first_def
);
4990 vect_finish_stmt_generation (vinfo
, NULL
, perm_stmt
, gsi
);
4991 /* Store the vector statement in NODE. */
4992 SLP_TREE_VEC_STMTS (node
).quick_push (perm_stmt
);
4996 first_vec
= std::make_pair (-1U, -1U);
4997 second_vec
= std::make_pair (-1U, -1U);
5002 record_stmt_cost (cost_vec
, nperms
, vec_perm
, NULL
, vectype
, 0, vect_body
);
5007 /* Vectorize SLP instance tree in postorder. */
5010 vect_schedule_slp_instance (vec_info
*vinfo
,
5011 slp_tree node
, slp_instance instance
,
5012 hash_set
<slp_tree
> &visited
)
5014 gimple_stmt_iterator si
;
5018 /* See if we have already vectorized the node in the graph of the
5020 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
5021 && SLP_TREE_VEC_STMTS (node
).exists ())
5022 || SLP_TREE_VEC_DEFS (node
).exists ())
5025 /* Vectorize externals and constants. */
5026 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
5027 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
5029 /* ??? vectorizable_shift can end up using a scalar operand which is
5030 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
5031 node in this case. */
5032 if (!SLP_TREE_VECTYPE (node
))
5035 vect_create_constant_vectors (vinfo
, node
);
5039 /* ??? If we'd have a way to mark backedges that would be cheaper. */
5040 if (visited
.add (node
))
5043 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5044 vect_schedule_slp_instance (vinfo
, child
, instance
, visited
);
5046 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
5047 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
5049 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_NOTE
, vect_location
,
5052 "------>vectorizing SLP node starting from: %G",
5055 if (STMT_VINFO_DATA_REF (stmt_info
)
5056 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
5058 /* Vectorized loads go before the first scalar load to make it
5059 ready early, vectorized stores go before the last scalar
5060 stmt which is where all uses are ready. */
5061 stmt_vec_info last_stmt_info
= NULL
;
5062 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
5063 last_stmt_info
= vect_find_first_scalar_stmt_in_slp (node
);
5064 else /* DR_IS_WRITE */
5065 last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
5066 si
= gsi_for_stmt (last_stmt_info
->stmt
);
5068 else if ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
5069 == cycle_phi_info_type
)
5070 || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
5071 == induc_vec_info_type
))
5073 /* For reduction and induction PHIs we do not use the
5074 insertion iterator. */
5079 /* Emit other stmts after the children vectorized defs which is
5080 earliest possible. */
5081 gimple
*last_stmt
= NULL
;
5082 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5083 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
5085 /* For fold-left reductions we are retaining the scalar
5086 reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
5087 set so the representation isn't perfect. Resort to the
5088 last scalar def here. */
5089 if (SLP_TREE_VEC_STMTS (child
).is_empty ())
5091 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child
))
5092 == cycle_phi_info_type
);
5093 gphi
*phi
= as_a
<gphi
*>
5094 (vect_find_last_scalar_stmt_in_slp (child
)->stmt
);
5096 || vect_stmt_dominates_stmt_p (last_stmt
, phi
))
5099 /* We are emitting all vectorized stmts in the same place and
5100 the last one is the last.
5101 ??? Unless we have a load permutation applied and that
5102 figures to re-use an earlier generated load. */
5105 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vstmt
)
5107 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5110 else if (!SLP_TREE_VECTYPE (child
))
5112 /* For externals we use unvectorized at all scalar defs. */
5115 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (child
), j
, def
)
5116 if (TREE_CODE (def
) == SSA_NAME
5117 && !SSA_NAME_IS_DEFAULT_DEF (def
))
5119 gimple
*stmt
= SSA_NAME_DEF_STMT (def
);
5121 || vect_stmt_dominates_stmt_p (last_stmt
, stmt
))
5127 /* For externals we have to look at all defs since their
5128 insertion place is decided per vector. But beware
5129 of pre-existing vectors where we need to make sure
5130 we do not insert before the region boundary. */
5131 if (SLP_TREE_SCALAR_OPS (child
).is_empty ()
5132 && !vinfo
->lookup_def (SLP_TREE_VEC_DEFS (child
)[0]))
5133 last_stmt
= gsi_stmt (gsi_after_labels
5134 (as_a
<bb_vec_info
> (vinfo
)->bbs
[0]));
5139 FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child
), j
, vdef
)
5140 if (TREE_CODE (vdef
) == SSA_NAME
5141 && !SSA_NAME_IS_DEFAULT_DEF (vdef
))
5143 gimple
*vstmt
= SSA_NAME_DEF_STMT (vdef
);
5145 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5150 /* This can happen when all children are pre-existing vectors or
5153 last_stmt
= vect_find_first_scalar_stmt_in_slp (node
)->stmt
;
5154 if (is_a
<gphi
*> (last_stmt
))
5155 si
= gsi_after_labels (gimple_bb (last_stmt
));
5158 si
= gsi_for_stmt (last_stmt
);
5163 bool done_p
= false;
5165 /* Handle purely internal nodes. */
5166 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
5168 /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
5169 be shared with different SLP nodes (but usually it's the same
5170 operation apart from the case the stmt is only there for denoting
5171 the actual scalar lane defs ...). So do not call vect_transform_stmt
5172 but open-code it here (partly). */
5173 bool done
= vectorizable_slp_permutation (vinfo
, &si
, node
, NULL
);
5178 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
5181 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
5182 For loop vectorization this is done in vectorizable_call, but for SLP
5183 it needs to be deferred until end of vect_schedule_slp, because multiple
5184 SLP instances may refer to the same scalar stmt. */
5187 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
5188 slp_tree node
, hash_set
<slp_tree
> &visited
)
5191 gimple_stmt_iterator gsi
;
5195 stmt_vec_info stmt_info
;
5197 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
5200 if (visited
.add (node
))
5203 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5204 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
5206 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
5208 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
5209 if (!stmt
|| gimple_bb (stmt
) == NULL
)
5211 if (is_pattern_stmt_p (stmt_info
)
5212 || !PURE_SLP_STMT (stmt_info
))
5214 lhs
= gimple_call_lhs (stmt
);
5215 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
5216 gsi
= gsi_for_stmt (stmt
);
5217 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
5218 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
5223 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
5225 hash_set
<slp_tree
> visited
;
5226 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
5229 /* Vectorize the instance root. */
5232 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
5234 gassign
*rstmt
= NULL
;
5236 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
5241 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5243 tree vect_lhs
= gimple_get_lhs (child_stmt
);
5244 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5245 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
5246 TREE_TYPE (vect_lhs
)))
5247 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
5249 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
5253 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
5255 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
5258 vec
<constructor_elt
, va_gc
> *v
;
5259 vec_alloc (v
, nelts
);
5261 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5263 CONSTRUCTOR_APPEND_ELT (v
,
5265 gimple_get_lhs (child_stmt
));
5267 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5268 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
5269 tree r_constructor
= build_constructor (rtype
, v
);
5270 rstmt
= gimple_build_assign (lhs
, r_constructor
);
5275 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
5276 gsi_replace (&rgsi
, rstmt
, true);
5279 /* Generate vector code for SLP_INSTANCES in the loop/basic block. */
5282 vect_schedule_slp (vec_info
*vinfo
, vec
<slp_instance
> slp_instances
)
5284 slp_instance instance
;
5287 hash_set
<slp_tree
> visited
;
5288 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
5290 slp_tree node
= SLP_INSTANCE_TREE (instance
);
5291 if (dump_enabled_p ())
5293 dump_printf_loc (MSG_NOTE
, vect_location
,
5294 "Vectorizing SLP tree:\n");
5295 if (SLP_INSTANCE_ROOT_STMT (instance
))
5296 dump_printf_loc (MSG_NOTE
, vect_location
, "Root stmt: %G",
5297 SLP_INSTANCE_ROOT_STMT (instance
)->stmt
);
5298 vect_print_slp_graph (MSG_NOTE
, vect_location
,
5299 SLP_INSTANCE_TREE (instance
));
5301 /* Schedule the tree of INSTANCE. */
5302 vect_schedule_slp_instance (vinfo
, node
, instance
, visited
);
5304 if (SLP_INSTANCE_ROOT_STMT (instance
))
5305 vectorize_slp_instance_root_stmt (node
, instance
);
5307 if (dump_enabled_p ())
5308 dump_printf_loc (MSG_NOTE
, vect_location
,
5309 "vectorizing stmts using SLP.\n");
5312 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
5314 slp_tree root
= SLP_INSTANCE_TREE (instance
);
5315 stmt_vec_info store_info
;
5318 /* For reductions set the latch values of the vectorized PHIs. */
5319 if (instance
->reduc_phis
5320 && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
5321 (instance
->reduc_phis
)) != FOLD_LEFT_REDUCTION
5322 && STMT_VINFO_REDUC_TYPE (SLP_TREE_REPRESENTATIVE
5323 (instance
->reduc_phis
)) != EXTRACT_LAST_REDUCTION
)
5325 slp_tree slp_node
= root
;
5326 slp_tree phi_node
= instance
->reduc_phis
;
5327 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
5328 edge e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
5329 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
5330 == SLP_TREE_VEC_STMTS (slp_node
).length ());
5331 for (unsigned j
= 0; j
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++j
)
5332 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[j
]),
5333 vect_get_slp_vect_def (slp_node
, j
),
5334 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
5337 /* Remove scalar call stmts. Do not do this for basic-block
5338 vectorization as not all uses may be vectorized.
5339 ??? Why should this be necessary? DCE should be able to
5340 remove the stmts itself.
5341 ??? For BB vectorization we can as well remove scalar
5342 stmts starting from the SLP tree root if they have no
5344 if (is_a
<loop_vec_info
> (vinfo
))
5345 vect_remove_slp_scalar_calls (vinfo
, root
);
5347 /* Remove vectorized stores original scalar stmts. */
5348 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
5350 if (!STMT_VINFO_DATA_REF (store_info
)
5351 || !DR_IS_WRITE (STMT_VINFO_DATA_REF (store_info
)))
5354 store_info
= vect_orig_stmt (store_info
);
5355 /* Free the attached stmt_vec_info and remove the stmt. */
5356 vinfo
->remove_stmt (store_info
);