Fix PR 93568 (thinko)
[official-gcc.git] / gcc / tree-vect-slp.c
blob71a24b78cf4f152fe61efaea6676f819f2e5f80c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
56 int i;
57 slp_tree child;
59 if (--node->refcnt != 0)
60 return;
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
85 free (node);
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
101 /* Create an SLP node for SCALAR_STMTS. */
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
139 return node;
142 /* Create an SLP node for OPS. */
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
147 slp_tree node;
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
161 return node;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
204 return oprnds_info;
208 /* Free operands info. */
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
213 int i;
214 slp_oprnd_info oprnd_info;
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
223 oprnds_info.release ();
227 /* Return true if STMTS contains a pattern statement. */
229 static bool
230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
248 stmt_vec_info next_stmt_info = first_stmt_info;
249 int result = 0;
251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252 return -1;
256 if (next_stmt_info == stmt_info)
257 return result;
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
262 while (next_stmt_info);
264 return -1;
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
273 bool
274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 tree elt_type, unsigned int *nvectors_out,
276 tree *vector_type_out,
277 tree *permutes)
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285 unsigned int nvectors = 1;
286 for (;;)
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
332 return true;
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
354 static int
355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 vec<stmt_vec_info> stmts, unsigned stmt_num,
357 vec<slp_oprnd_info> *oprnds_info)
359 stmt_vec_info stmt_info = stmts[stmt_num];
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
362 enum vect_def_type dt = vect_uninitialized_def;
363 slp_oprnd_info oprnd_info;
364 int first_op_idx = 1;
365 unsigned int commutative_op = -1U;
366 bool first_op_cond = false;
367 bool first = stmt_num == 0;
369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
371 number_of_oprnds = gimple_call_num_args (stmt);
372 first_op_idx = 3;
373 if (gimple_call_internal_p (stmt))
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
389 enum tree_code code = gimple_assign_rhs_code (stmt);
390 number_of_oprnds = gimple_num_ops (stmt) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code == COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
396 first_op_cond = true;
397 number_of_oprnds++;
399 else
400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
402 else
403 return -1;
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
407 for (i = 0; i < number_of_oprnds; i++)
409 again:
410 if (first_op_cond)
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
416 if (i < 2)
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
419 else
420 oprnd = gimple_op (stmt_info->stmt, map[i]);
422 else
423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
427 oprnd_info = (*oprnds_info)[i];
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
437 return -1;
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
443 if (first)
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
452 oprnd_info->first_dt = dt;
453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
455 else
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
484 /* Try swapping operands if we got a mismatch. */
485 if (i == commutative_op && !swapped)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
490 swapped = true;
491 goto again;
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: different types\n");
498 return 1;
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 && (TREE_CODE (type) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 type)))
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
511 return -1;
515 /* Check the types of the definitions. */
516 switch (dt)
518 case vect_external_def:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
527 case vect_internal_def:
528 case vect_reduction_def:
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
545 /* Fallthru. */
546 case vect_induction_def:
547 oprnd_info->def_stmts.quick_push (def_stmt_info);
548 oprnd_info->ops.quick_push (oprnd);
549 break;
551 default:
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
558 return -1;
562 /* Swap operands. */
563 if (swapped)
565 if (first_op_cond)
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info->stmt);
575 return -1;
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
580 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
581 tree cond = gimple_assign_rhs1 (stmt);
582 enum tree_code code = TREE_CODE (cond);
584 /* Swap. */
585 if (*swap == 1)
587 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
588 &TREE_OPERAND (cond, 1));
589 TREE_SET_CODE (cond, swap_tree_comparison (code));
591 /* Invert. */
592 else
594 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
595 gimple_assign_rhs3_ptr (stmt));
596 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
600 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
601 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
602 gcc_assert (code != ERROR_MARK);
603 TREE_SET_CODE (cond, code);
606 else
608 /* Commutative ops need not reflect swapping, ops are in
609 the SLP tree. */
611 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE, vect_location,
613 "swapped operands to match def types in %G",
614 stmt_info->stmt);
617 *swap = swapped;
618 return 0;
621 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
625 static bool
626 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
628 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
629 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
630 return true;
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
638 stmt_vec_info member_info = first_info;
639 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
640 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
641 || is_pattern_stmt_p (member_info))
642 break;
644 if (!member_info)
646 for (member_info = first_info; member_info;
647 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
648 STMT_VINFO_VECTYPE (member_info) = vectype;
649 return true;
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
653 && !is_pattern_stmt_p (stmt_info))
655 STMT_VINFO_VECTYPE (stmt_info) = vectype;
656 return true;
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info->stmt);
664 dump_printf_loc (MSG_NOTE, vect_location,
665 " old vector type: %T\n", old_vectype);
666 dump_printf_loc (MSG_NOTE, vect_location,
667 " new vector type: %T\n", vectype);
669 return false;
672 /* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
675 static bool
676 vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
678 tree vectype, nunits_vectype;
679 if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
680 &nunits_vectype, stmts.length ()))
681 return false;
683 stmt_vec_info stmt_info;
684 unsigned int i;
685 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
686 if (!vect_update_shared_vectype (stmt_info, vectype))
687 return false;
689 return true;
692 /* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
695 static bool
696 compatible_calls_p (gcall *call1, gcall *call2)
698 unsigned int nargs = gimple_call_num_args (call1);
699 if (nargs != gimple_call_num_args (call2))
700 return false;
702 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
703 return false;
705 if (gimple_call_internal_p (call1))
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
708 TREE_TYPE (gimple_call_lhs (call2))))
709 return false;
710 for (unsigned int i = 0; i < nargs; ++i)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
712 TREE_TYPE (gimple_call_arg (call2, i))))
713 return false;
715 else
717 if (!operand_equal_p (gimple_call_fn (call1),
718 gimple_call_fn (call2), 0))
719 return false;
721 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
722 return false;
724 return true;
727 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
728 caller's attempt to find the vector type in STMT_INFO with the narrowest
729 element type. Return true if VECTYPE is nonnull and if it is valid
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
734 static bool
735 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
736 tree vectype, poly_uint64 *max_nunits)
738 if (!vectype)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported data-type in %G\n",
743 stmt_info->stmt);
744 /* Fatal mismatch. */
745 return false;
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
750 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
751 unsigned HOST_WIDE_INT const_nunits;
752 if (STMT_VINFO_BB_VINFO (stmt_info)
753 && (!nunits.is_constant (&const_nunits)
754 || const_nunits > group_size))
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
760 /* Fatal mismatch. */
761 return false;
764 /* In case of multiple types we need to detect the smallest type. */
765 vect_update_max_nunits (max_nunits, vectype);
766 return true;
769 /* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
776 static bool
777 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
778 unsigned int group_size, tree vectype,
779 tree_code alt_stmt_code)
781 unsigned HOST_WIDE_INT count;
782 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
783 return false;
785 vec_perm_builder sel (count, count, 1);
786 for (unsigned int i = 0; i < count; ++i)
788 unsigned int elt = i;
789 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
790 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
791 elt += count;
792 sel.quick_push (elt);
794 vec_perm_indices indices (sel, 2, count);
795 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
798 /* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
803 carried out or the stmts will never be vectorized by SLP.
805 Note COND_EXPR is possibly isomorphic to another one after swapping its
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
812 static bool
813 vect_build_slp_tree_1 (unsigned char *swap,
814 vec<stmt_vec_info> stmts, unsigned int group_size,
815 poly_uint64 *max_nunits, bool *matches,
816 bool *two_operators)
818 unsigned int i;
819 stmt_vec_info first_stmt_info = stmts[0];
820 enum tree_code first_stmt_code = ERROR_MARK;
821 enum tree_code alt_stmt_code = ERROR_MARK;
822 enum tree_code rhs_code = ERROR_MARK;
823 enum tree_code first_cond_code = ERROR_MARK;
824 tree lhs;
825 bool need_same_oprnds = false;
826 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
827 optab optab;
828 int icode;
829 machine_mode optab_op2_mode;
830 machine_mode vec_mode;
831 stmt_vec_info first_load = NULL, prev_first_load = NULL;
832 bool load_p = false;
834 /* For every stmt in NODE find its def stmt/s. */
835 stmt_vec_info stmt_info;
836 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
838 vec_info *vinfo = stmt_info->vinfo;
839 gimple *stmt = stmt_info->stmt;
840 swap[i] = 0;
841 matches[i] = false;
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
846 /* Fail to vectorize statements marked as unvectorizable. */
847 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: unvectorizable statement %G",
852 stmt);
853 /* Fatal mismatch. */
854 matches[0] = false;
855 return false;
858 lhs = gimple_get_lhs (stmt);
859 if (lhs == NULL_TREE)
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt);
865 /* Fatal mismatch. */
866 matches[0] = false;
867 return false;
870 tree nunits_vectype;
871 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
872 &nunits_vectype, group_size)
873 || (nunits_vectype
874 && !vect_record_max_nunits (stmt_info, group_size,
875 nunits_vectype, max_nunits)))
877 /* Fatal mismatch. */
878 matches[0] = false;
879 return false;
882 gcc_assert (vectype);
884 if (is_a <bb_vec_info> (vinfo)
885 && !vect_update_shared_vectype (stmt_info, vectype))
886 continue;
888 gcall *call_stmt = dyn_cast <gcall *> (stmt);
889 if (call_stmt)
891 rhs_code = CALL_EXPR;
893 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
894 load_p = true;
895 else if ((gimple_call_internal_p (call_stmt)
896 && (!vectorizable_internal_fn_p
897 (gimple_call_internal_fn (call_stmt))))
898 || gimple_call_tail_p (call_stmt)
899 || gimple_call_noreturn_p (call_stmt)
900 || !gimple_call_nothrow_p (call_stmt)
901 || gimple_call_chain (call_stmt))
903 if (dump_enabled_p ())
904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
905 "Build SLP failed: unsupported call type %G",
906 call_stmt);
907 /* Fatal mismatch. */
908 matches[0] = false;
909 return false;
912 else
914 rhs_code = gimple_assign_rhs_code (stmt);
915 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
918 /* Check the operation. */
919 if (i == 0)
921 first_stmt_code = rhs_code;
923 /* Shift arguments should be equal in all the packed stmts for a
924 vector shift with scalar shift operand. */
925 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
926 || rhs_code == LROTATE_EXPR
927 || rhs_code == RROTATE_EXPR)
929 vec_mode = TYPE_MODE (vectype);
931 /* First see if we have a vector/vector shift. */
932 optab = optab_for_tree_code (rhs_code, vectype,
933 optab_vector);
935 if (!optab
936 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
938 /* No vector/vector shift, try for a vector/scalar shift. */
939 optab = optab_for_tree_code (rhs_code, vectype,
940 optab_scalar);
942 if (!optab)
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
946 "Build SLP failed: no optab.\n");
947 /* Fatal mismatch. */
948 matches[0] = false;
949 return false;
951 icode = (int) optab_handler (optab, vec_mode);
952 if (icode == CODE_FOR_nothing)
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
956 "Build SLP failed: "
957 "op not supported by target.\n");
958 /* Fatal mismatch. */
959 matches[0] = false;
960 return false;
962 optab_op2_mode = insn_data[icode].operand[2].mode;
963 if (!VECTOR_MODE_P (optab_op2_mode))
965 need_same_oprnds = true;
966 first_op1 = gimple_assign_rhs2 (stmt);
970 else if (rhs_code == WIDEN_LSHIFT_EXPR)
972 need_same_oprnds = true;
973 first_op1 = gimple_assign_rhs2 (stmt);
975 else if (call_stmt
976 && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
978 need_same_oprnds = true;
979 first_op1 = gimple_call_arg (call_stmt, 1);
982 else
984 if (first_stmt_code != rhs_code
985 && alt_stmt_code == ERROR_MARK)
986 alt_stmt_code = rhs_code;
987 if (first_stmt_code != rhs_code
988 && (first_stmt_code != IMAGPART_EXPR
989 || rhs_code != REALPART_EXPR)
990 && (first_stmt_code != REALPART_EXPR
991 || rhs_code != IMAGPART_EXPR)
992 /* Handle mismatches in plus/minus by computing both
993 and merging the results. */
994 && !((first_stmt_code == PLUS_EXPR
995 || first_stmt_code == MINUS_EXPR)
996 && (alt_stmt_code == PLUS_EXPR
997 || alt_stmt_code == MINUS_EXPR)
998 && rhs_code == alt_stmt_code)
999 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
1000 && (first_stmt_code == ARRAY_REF
1001 || first_stmt_code == BIT_FIELD_REF
1002 || first_stmt_code == INDIRECT_REF
1003 || first_stmt_code == COMPONENT_REF
1004 || first_stmt_code == MEM_REF)))
1006 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1009 "Build SLP failed: different operation "
1010 "in stmt %G", stmt);
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1012 "original stmt %G", first_stmt_info->stmt);
1014 /* Mismatch. */
1015 continue;
1018 if (need_same_oprnds)
1020 tree other_op1 = (call_stmt
1021 ? gimple_call_arg (call_stmt, 1)
1022 : gimple_assign_rhs2 (stmt));
1023 if (!operand_equal_p (first_op1, other_op1, 0))
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1027 "Build SLP failed: different shift "
1028 "arguments in %G", stmt);
1029 /* Mismatch. */
1030 continue;
1034 if (!load_p && rhs_code == CALL_EXPR)
1036 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
1037 as_a <gcall *> (stmt)))
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1041 "Build SLP failed: different calls in %G",
1042 stmt);
1043 /* Mismatch. */
1044 continue;
1049 /* Grouped store or load. */
1050 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1052 if (REFERENCE_CLASS_P (lhs))
1054 /* Store. */
1057 else
1059 /* Load. */
1060 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1061 if (prev_first_load)
1063 /* Check that there are no loads from different interleaving
1064 chains in the same node. */
1065 if (prev_first_load != first_load)
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1069 vect_location,
1070 "Build SLP failed: different "
1071 "interleaving chains in one node %G",
1072 stmt);
1073 /* Mismatch. */
1074 continue;
1077 else
1078 prev_first_load = first_load;
1080 } /* Grouped access. */
1081 else
1083 if (load_p)
1085 /* Not grouped load. */
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1088 "Build SLP failed: not grouped load %G", stmt);
1090 /* FORNOW: Not grouped loads are not supported. */
1091 /* Fatal mismatch. */
1092 matches[0] = false;
1093 return false;
1096 /* Not memory operation. */
1097 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1098 && TREE_CODE_CLASS (rhs_code) != tcc_unary
1099 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1100 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1101 && rhs_code != VIEW_CONVERT_EXPR
1102 && rhs_code != CALL_EXPR)
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "Build SLP failed: operation unsupported %G",
1107 stmt);
1108 /* Fatal mismatch. */
1109 matches[0] = false;
1110 return false;
1113 if (rhs_code == COND_EXPR)
1115 tree cond_expr = gimple_assign_rhs1 (stmt);
1116 enum tree_code cond_code = TREE_CODE (cond_expr);
1117 enum tree_code swap_code = ERROR_MARK;
1118 enum tree_code invert_code = ERROR_MARK;
1120 if (i == 0)
1121 first_cond_code = TREE_CODE (cond_expr);
1122 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1124 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1125 swap_code = swap_tree_comparison (cond_code);
1126 invert_code = invert_tree_comparison (cond_code, honor_nans);
1129 if (first_cond_code == cond_code)
1131 /* Isomorphic can be achieved by swapping. */
1132 else if (first_cond_code == swap_code)
1133 swap[i] = 1;
1134 /* Isomorphic can be achieved by inverting. */
1135 else if (first_cond_code == invert_code)
1136 swap[i] = 2;
1137 else
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1141 "Build SLP failed: different"
1142 " operation %G", stmt);
1143 /* Mismatch. */
1144 continue;
1149 matches[i] = true;
1152 for (i = 0; i < group_size; ++i)
1153 if (!matches[i])
1154 return false;
1156 /* If we allowed a two-operation SLP node verify the target can cope
1157 with the permute we are going to use. */
1158 if (alt_stmt_code != ERROR_MARK
1159 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1161 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1162 vectype, alt_stmt_code))
1164 for (i = 0; i < group_size; ++i)
1165 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1167 matches[i] = false;
1168 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1171 "Build SLP failed: different operation "
1172 "in stmt %G", stmts[i]->stmt);
1173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1174 "original stmt %G", first_stmt_info->stmt);
1177 return false;
1179 *two_operators = true;
1182 return true;
1185 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1186 Note we never remove apart from at destruction time so we do not
1187 need a special value for deleted that differs from empty. */
1188 struct bst_traits
1190 typedef vec <stmt_vec_info> value_type;
1191 typedef vec <stmt_vec_info> compare_type;
1192 static inline hashval_t hash (value_type);
1193 static inline bool equal (value_type existing, value_type candidate);
1194 static inline bool is_empty (value_type x) { return !x.exists (); }
1195 static inline bool is_deleted (value_type x) { return !x.exists (); }
1196 static const bool empty_zero_p = true;
1197 static inline void mark_empty (value_type &x) { x.release (); }
1198 static inline void mark_deleted (value_type &x) { x.release (); }
1199 static inline void remove (value_type &x) { x.release (); }
1201 inline hashval_t
1202 bst_traits::hash (value_type x)
1204 inchash::hash h;
1205 for (unsigned i = 0; i < x.length (); ++i)
1206 h.add_int (gimple_uid (x[i]->stmt));
1207 return h.end ();
1209 inline bool
1210 bst_traits::equal (value_type existing, value_type candidate)
1212 if (existing.length () != candidate.length ())
1213 return false;
1214 for (unsigned i = 0; i < existing.length (); ++i)
1215 if (existing[i] != candidate[i])
1216 return false;
1217 return true;
1220 typedef hash_map <vec <gimple *>, slp_tree,
1221 simple_hashmap_traits <bst_traits, slp_tree> >
1222 scalar_stmts_to_slp_tree_map_t;
1224 static slp_tree
1225 vect_build_slp_tree_2 (vec_info *vinfo,
1226 vec<stmt_vec_info> stmts, unsigned int group_size,
1227 poly_uint64 *max_nunits,
1228 bool *matches, unsigned *npermutes, unsigned *tree_size,
1229 scalar_stmts_to_slp_tree_map_t *bst_map);
1231 static slp_tree
1232 vect_build_slp_tree (vec_info *vinfo,
1233 vec<stmt_vec_info> stmts, unsigned int group_size,
1234 poly_uint64 *max_nunits,
1235 bool *matches, unsigned *npermutes, unsigned *tree_size,
1236 scalar_stmts_to_slp_tree_map_t *bst_map)
1238 if (slp_tree *leader = bst_map->get (stmts))
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1242 *leader ? "" : "failed ", *leader);
1243 if (*leader)
1245 (*leader)->refcnt++;
1246 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1248 return *leader;
1250 poly_uint64 this_max_nunits = 1;
1251 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1252 &this_max_nunits,
1253 matches, npermutes, tree_size, bst_map);
1254 if (res)
1256 res->max_nunits = this_max_nunits;
1257 vect_update_max_nunits (max_nunits, this_max_nunits);
1258 /* Keep a reference for the bst_map use. */
1259 res->refcnt++;
1261 bst_map->put (stmts.copy (), res);
1262 return res;
1265 /* Recursively build an SLP tree starting from NODE.
1266 Fail (and return a value not equal to zero) if def-stmts are not
1267 isomorphic, require data permutation or are of unsupported types of
1268 operation. Otherwise, return 0.
1269 The value returned is the depth in the SLP tree where a mismatch
1270 was found. */
1272 static slp_tree
1273 vect_build_slp_tree_2 (vec_info *vinfo,
1274 vec<stmt_vec_info> stmts, unsigned int group_size,
1275 poly_uint64 *max_nunits,
1276 bool *matches, unsigned *npermutes, unsigned *tree_size,
1277 scalar_stmts_to_slp_tree_map_t *bst_map)
1279 unsigned nops, i, this_tree_size = 0;
1280 poly_uint64 this_max_nunits = *max_nunits;
1281 slp_tree node;
1283 matches[0] = false;
1285 stmt_vec_info stmt_info = stmts[0];
1286 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1287 nops = gimple_call_num_args (stmt);
1288 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1290 nops = gimple_num_ops (stmt) - 1;
1291 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1292 nops++;
1294 else if (is_a <gphi *> (stmt_info->stmt))
1295 nops = 0;
1296 else
1297 return NULL;
1299 /* If the SLP node is a PHI (induction or reduction), terminate
1300 the recursion. */
1301 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1303 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1304 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1305 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1306 return NULL;
1308 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1309 /* Induction from different IVs is not supported. */
1310 if (def_type == vect_induction_def)
1312 stmt_vec_info other_info;
1313 FOR_EACH_VEC_ELT (stmts, i, other_info)
1314 if (stmt_info != other_info)
1315 return NULL;
1317 else if (def_type == vect_reduction_def
1318 || def_type == vect_double_reduction_def
1319 || def_type == vect_nested_cycle)
1321 /* Else def types have to match. */
1322 stmt_vec_info other_info;
1323 FOR_EACH_VEC_ELT (stmts, i, other_info)
1324 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1325 return NULL;
1327 else
1328 return NULL;
1329 (*tree_size)++;
1330 node = vect_create_new_slp_node (stmts);
1331 return node;
1335 bool two_operators = false;
1336 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1337 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1338 &this_max_nunits, matches, &two_operators))
1339 return NULL;
1341 /* If the SLP node is a load, terminate the recursion unless masked. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1345 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1347 /* Masked load. */
1348 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1349 nops = 1;
1351 else
1353 *max_nunits = this_max_nunits;
1354 (*tree_size)++;
1355 node = vect_create_new_slp_node (stmts);
1356 /* And compute the load permutation. Whether it is actually
1357 a permutation depends on the unrolling factor which is
1358 decided later. */
1359 vec<unsigned> load_permutation;
1360 int j;
1361 stmt_vec_info load_info;
1362 load_permutation.create (group_size);
1363 stmt_vec_info first_stmt_info
1364 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1365 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1367 int load_place = vect_get_place_in_interleaving_chain
1368 (load_info, first_stmt_info);
1369 gcc_assert (load_place != -1);
1370 load_permutation.safe_push (load_place);
1372 SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
1373 return node;
1377 /* Get at the operands, verifying they are compatible. */
1378 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1379 slp_oprnd_info oprnd_info;
1380 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1382 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1383 stmts, i, &oprnds_info);
1384 if (res != 0)
1385 matches[(res == -1) ? 0 : i] = false;
1386 if (!matches[0])
1387 break;
1389 for (i = 0; i < group_size; ++i)
1390 if (!matches[i])
1392 vect_free_oprnd_info (oprnds_info);
1393 return NULL;
1396 auto_vec<slp_tree, 4> children;
1398 stmt_info = stmts[0];
1400 /* Create SLP_TREE nodes for the definition node/s. */
1401 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1403 slp_tree child;
1404 unsigned old_tree_size = this_tree_size;
1405 unsigned int j;
1407 if (oprnd_info->first_dt == vect_uninitialized_def)
1409 /* COND_EXPR have one too many eventually if the condition
1410 is a SSA name. */
1411 gcc_assert (i == 3 && nops == 4);
1412 continue;
1415 if (oprnd_info->first_dt != vect_internal_def
1416 && oprnd_info->first_dt != vect_reduction_def
1417 && oprnd_info->first_dt != vect_induction_def)
1419 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1420 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1421 oprnd_info->ops = vNULL;
1422 children.safe_push (invnode);
1423 continue;
1426 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1427 group_size, &this_max_nunits,
1428 matches, npermutes,
1429 &this_tree_size, bst_map)) != NULL)
1431 /* If we have all children of a non-unary child built up from
1432 scalars then just throw that away and build it up this node
1433 from scalars. */
1434 if (is_a <bb_vec_info> (vinfo)
1435 && SLP_TREE_CHILDREN (child).length () > 1
1436 /* ??? Rejecting patterns this way doesn't work. We'd have to
1437 do extra work to cancel the pattern so the uses see the
1438 scalar version. */
1439 && !oprnd_info->any_pattern)
1441 slp_tree grandchild;
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1444 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1445 break;
1446 if (!grandchild
1447 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1449 /* Roll back. */
1450 this_tree_size = old_tree_size;
1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1452 vect_free_slp_tree (grandchild, false);
1453 SLP_TREE_CHILDREN (child).truncate (0);
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE, vect_location,
1457 "Building parent vector operands from "
1458 "scalars instead\n");
1459 oprnd_info->def_stmts = vNULL;
1460 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1461 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1462 oprnd_info->ops = vNULL;
1463 ++this_tree_size;
1464 children.safe_push (child);
1465 continue;
1469 oprnd_info->def_stmts = vNULL;
1470 children.safe_push (child);
1471 continue;
1474 /* If the SLP build failed fatally and we analyze a basic-block
1475 simply treat nodes we fail to build as externally defined
1476 (and thus build vectors from the scalar defs).
1477 The cost model will reject outright expensive cases.
1478 ??? This doesn't treat cases where permutation ultimatively
1479 fails (or we don't try permutation below). Ideally we'd
1480 even compute a permutation that will end up with the maximum
1481 SLP tree size... */
1482 if (is_a <bb_vec_info> (vinfo)
1483 && !matches[0]
1484 /* ??? Rejecting patterns this way doesn't work. We'd have to
1485 do extra work to cancel the pattern so the uses see the
1486 scalar version. */
1487 && !is_pattern_stmt_p (stmt_info)
1488 && !oprnd_info->any_pattern
1489 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_NOTE, vect_location,
1493 "Building vector operands from scalars\n");
1494 this_tree_size++;
1495 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1496 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1497 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1498 children.safe_push (child);
1499 oprnd_info->ops = vNULL;
1500 oprnd_info->def_stmts = vNULL;
1501 continue;
1504 /* If the SLP build for operand zero failed and operand zero
1505 and one can be commutated try that for the scalar stmts
1506 that failed the match. */
1507 if (i == 0
1508 /* A first scalar stmt mismatch signals a fatal mismatch. */
1509 && matches[0]
1510 /* ??? For COND_EXPRs we can swap the comparison operands
1511 as well as the arms under some constraints. */
1512 && nops == 2
1513 && oprnds_info[1]->first_dt == vect_internal_def
1514 && is_gimple_assign (stmt_info->stmt)
1515 /* Swapping operands for reductions breaks assumptions later on. */
1516 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1517 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1518 /* Do so only if the number of not successful permutes was nor more
1519 than a cut-ff as re-trying the recursive match on
1520 possibly each level of the tree would expose exponential
1521 behavior. */
1522 && *npermutes < 4)
1524 /* See whether we can swap the matching or the non-matching
1525 stmt operands. */
1526 bool swap_not_matching = true;
1529 for (j = 0; j < group_size; ++j)
1531 if (matches[j] != !swap_not_matching)
1532 continue;
1533 stmt_vec_info stmt_info = stmts[j];
1534 /* Verify if we can swap operands of this stmt. */
1535 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1536 if (!stmt
1537 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1539 if (!swap_not_matching)
1540 goto fail;
1541 swap_not_matching = false;
1542 break;
1546 while (j != group_size);
1548 /* Swap mismatched definition stmts. */
1549 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE, vect_location,
1551 "Re-trying with swapped operands of stmts ");
1552 for (j = 0; j < group_size; ++j)
1553 if (matches[j] == !swap_not_matching)
1555 std::swap (oprnds_info[0]->def_stmts[j],
1556 oprnds_info[1]->def_stmts[j]);
1557 std::swap (oprnds_info[0]->ops[j],
1558 oprnds_info[1]->ops[j]);
1559 if (dump_enabled_p ())
1560 dump_printf (MSG_NOTE, "%d ", j);
1562 if (dump_enabled_p ())
1563 dump_printf (MSG_NOTE, "\n");
1564 /* And try again with scratch 'matches' ... */
1565 bool *tem = XALLOCAVEC (bool, group_size);
1566 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1567 group_size, &this_max_nunits,
1568 tem, npermutes,
1569 &this_tree_size, bst_map)) != NULL)
1571 /* If we have all children of a non-unary child built up from
1572 scalars then just throw that away and build it up this node
1573 from scalars. */
1574 if (is_a <bb_vec_info> (vinfo)
1575 && SLP_TREE_CHILDREN (child).length () > 1
1576 /* ??? Rejecting patterns this way doesn't work. We'd have
1577 to do extra work to cancel the pattern so the uses see the
1578 scalar version. */
1579 && !oprnd_info->any_pattern)
1581 unsigned int j;
1582 slp_tree grandchild;
1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1585 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1586 break;
1587 if (!grandchild
1588 && (vect_update_all_shared_vectypes
1589 (oprnd_info->def_stmts)))
1591 /* Roll back. */
1592 this_tree_size = old_tree_size;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1594 vect_free_slp_tree (grandchild, false);
1595 SLP_TREE_CHILDREN (child).truncate (0);
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location,
1599 "Building parent vector operands from "
1600 "scalars instead\n");
1601 oprnd_info->def_stmts = vNULL;
1602 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1603 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1604 oprnd_info->ops = vNULL;
1605 ++this_tree_size;
1606 children.safe_push (child);
1607 continue;
1611 oprnd_info->def_stmts = vNULL;
1612 children.safe_push (child);
1613 continue;
1616 ++*npermutes;
1619 fail:
1620 gcc_assert (child == NULL);
1621 FOR_EACH_VEC_ELT (children, j, child)
1622 vect_free_slp_tree (child, false);
1623 vect_free_oprnd_info (oprnds_info);
1624 return NULL;
1627 vect_free_oprnd_info (oprnds_info);
1629 *tree_size += this_tree_size + 1;
1630 *max_nunits = this_max_nunits;
1632 node = vect_create_new_slp_node (stmts);
1633 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1634 SLP_TREE_CHILDREN (node).splice (children);
1635 return node;
1638 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1640 static void
1641 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1642 slp_tree node, hash_set<slp_tree> &visited)
1644 unsigned i;
1645 stmt_vec_info stmt_info;
1646 slp_tree child;
1647 tree op;
1649 if (visited.add (node))
1650 return;
1652 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1653 dump_user_location_t user_loc = loc.get_user_location ();
1654 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1655 SLP_TREE_DEF_TYPE (node) == vect_external_def
1656 ? " (external)"
1657 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1658 ? " (constant)"
1659 : ""), node,
1660 estimated_poly_value (node->max_nunits));
1661 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1662 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1663 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1664 else
1666 dump_printf_loc (metadata, user_loc, "\t{ ");
1667 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1668 dump_printf (metadata, "%T%s ", op,
1669 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1670 dump_printf (metadata, "}\n");
1672 if (SLP_TREE_CHILDREN (node).is_empty ())
1673 return;
1674 dump_printf_loc (metadata, user_loc, "\tchildren");
1675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1676 dump_printf (dump_kind, " %p", (void *)child);
1677 dump_printf (dump_kind, "\n");
1678 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1679 vect_print_slp_tree (dump_kind, loc, child, visited);
1682 static void
1683 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1684 slp_tree node)
1686 hash_set<slp_tree> visited;
1687 vect_print_slp_tree (dump_kind, loc, node, visited);
1690 /* Mark the tree rooted at NODE with PURE_SLP. */
1692 static void
1693 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1695 int i;
1696 stmt_vec_info stmt_info;
1697 slp_tree child;
1699 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1700 return;
1702 if (visited.add (node))
1703 return;
1705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1706 STMT_SLP_TYPE (stmt_info) = pure_slp;
1708 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1709 vect_mark_slp_stmts (child, visited);
1712 static void
1713 vect_mark_slp_stmts (slp_tree node)
1715 hash_set<slp_tree> visited;
1716 vect_mark_slp_stmts (node, visited);
1719 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1721 static void
1722 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1724 int i;
1725 stmt_vec_info stmt_info;
1726 slp_tree child;
1728 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1729 return;
1731 if (visited.add (node))
1732 return;
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1736 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1737 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1738 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1741 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1742 vect_mark_slp_stmts_relevant (child, visited);
1745 static void
1746 vect_mark_slp_stmts_relevant (slp_tree node)
1748 hash_set<slp_tree> visited;
1749 vect_mark_slp_stmts_relevant (node, visited);
1753 /* Rearrange the statements of NODE according to PERMUTATION. */
1755 static void
1756 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1757 vec<unsigned> permutation,
1758 hash_set<slp_tree> &visited)
1760 unsigned int i;
1761 slp_tree child;
1763 if (visited.add (node))
1764 return;
1766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1767 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1769 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1771 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1772 vec<stmt_vec_info> tmp_stmts;
1773 tmp_stmts.create (group_size);
1774 tmp_stmts.quick_grow (group_size);
1775 stmt_vec_info stmt_info;
1776 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1777 tmp_stmts[permutation[i]] = stmt_info;
1778 SLP_TREE_SCALAR_STMTS (node).release ();
1779 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1781 if (SLP_TREE_SCALAR_OPS (node).exists ())
1783 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1784 vec<tree> tmp_ops;
1785 tmp_ops.create (group_size);
1786 tmp_ops.quick_grow (group_size);
1787 tree op;
1788 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1789 tmp_ops[permutation[i]] = op;
1790 SLP_TREE_SCALAR_OPS (node).release ();
1791 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1796 /* Attempt to reorder stmts in a reduction chain so that we don't
1797 require any load permutation. Return true if that was possible,
1798 otherwise return false. */
1800 static bool
1801 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1803 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1804 unsigned int i, j;
1805 unsigned int lidx;
1806 slp_tree node, load;
1808 /* Compare all the permutation sequences to the first one. We know
1809 that at least one load is permuted. */
1810 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1811 if (!node->load_permutation.exists ())
1812 return false;
1813 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1815 if (!load->load_permutation.exists ())
1816 return false;
1817 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1818 if (lidx != node->load_permutation[j])
1819 return false;
1822 /* Check that the loads in the first sequence are different and there
1823 are no gaps between them. */
1824 auto_sbitmap load_index (group_size);
1825 bitmap_clear (load_index);
1826 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1828 if (lidx >= group_size)
1829 return false;
1830 if (bitmap_bit_p (load_index, lidx))
1831 return false;
1833 bitmap_set_bit (load_index, lidx);
1835 for (i = 0; i < group_size; i++)
1836 if (!bitmap_bit_p (load_index, i))
1837 return false;
1839 /* This permutation is valid for reduction. Since the order of the
1840 statements in the nodes is not important unless they are memory
1841 accesses, we can rearrange the statements in all the nodes
1842 according to the order of the loads. */
1843 hash_set<slp_tree> visited;
1844 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1845 node->load_permutation, visited);
1847 /* We are done, no actual permutations need to be generated. */
1848 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1849 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1851 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1852 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1853 /* But we have to keep those permutations that are required because
1854 of handling of gaps. */
1855 if (known_eq (unrolling_factor, 1U)
1856 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1857 && DR_GROUP_GAP (first_stmt_info) == 0))
1858 SLP_TREE_LOAD_PERMUTATION (node).release ();
1859 else
1860 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1861 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1864 return true;
1867 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1869 static void
1870 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1871 hash_set<slp_tree> &visited)
1873 if (visited.add (node))
1874 return;
1876 if (SLP_TREE_CHILDREN (node).length () == 0)
1878 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1879 return;
1880 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1881 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1882 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1883 SLP_INSTANCE_LOADS (inst).safe_push (node);
1885 else
1887 unsigned i;
1888 slp_tree child;
1889 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1890 vect_gather_slp_loads (inst, child, visited);
1894 static void
1895 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1897 hash_set<slp_tree> visited;
1898 vect_gather_slp_loads (inst, node, visited);
1901 /* Check if the required load permutations in the SLP instance
1902 SLP_INSTN are supported. */
1904 static bool
1905 vect_supported_load_permutation_p (slp_instance slp_instn)
1907 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1908 unsigned int i, j, k, next;
1909 slp_tree node;
1911 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1914 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1915 if (node->load_permutation.exists ())
1916 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1917 dump_printf (MSG_NOTE, "%d ", next);
1918 else
1919 for (k = 0; k < group_size; ++k)
1920 dump_printf (MSG_NOTE, "%d ", k);
1921 dump_printf (MSG_NOTE, "\n");
1924 /* In case of reduction every load permutation is allowed, since the order
1925 of the reduction statements is not important (as opposed to the case of
1926 grouped stores). The only condition we need to check is that all the
1927 load nodes are of the same size and have the same permutation (and then
1928 rearrange all the nodes of the SLP instance according to this
1929 permutation). */
1931 /* Check that all the load nodes are of the same size. */
1932 /* ??? Can't we assert this? */
1933 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1934 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1935 return false;
1937 node = SLP_INSTANCE_TREE (slp_instn);
1938 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1940 /* Reduction (there are no data-refs in the root).
1941 In reduction chain the order of the loads is not important. */
1942 if (!STMT_VINFO_DATA_REF (stmt_info)
1943 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1944 vect_attempt_slp_rearrange_stmts (slp_instn);
1946 /* In basic block vectorization we allow any subchain of an interleaving
1947 chain.
1948 FORNOW: not supported in loop SLP because of realignment compications. */
1949 if (STMT_VINFO_BB_VINFO (stmt_info))
1951 /* Check whether the loads in an instance form a subchain and thus
1952 no permutation is necessary. */
1953 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1955 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1956 continue;
1957 bool subchain_p = true;
1958 stmt_vec_info next_load_info = NULL;
1959 stmt_vec_info load_info;
1960 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1962 if (j != 0
1963 && (next_load_info != load_info
1964 || DR_GROUP_GAP (load_info) != 1))
1966 subchain_p = false;
1967 break;
1969 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1971 if (subchain_p)
1972 SLP_TREE_LOAD_PERMUTATION (node).release ();
1973 else
1975 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1976 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1977 unsigned HOST_WIDE_INT nunits;
1978 unsigned k, maxk = 0;
1979 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1980 if (k > maxk)
1981 maxk = k;
1982 /* In BB vectorization we may not actually use a loaded vector
1983 accessing elements in excess of DR_GROUP_SIZE. */
1984 tree vectype = STMT_VINFO_VECTYPE (group_info);
1985 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1986 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1988 if (dump_enabled_p ())
1989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1990 "BB vectorization with gaps at the end of "
1991 "a load is not supported\n");
1992 return false;
1995 /* Verify the permutation can be generated. */
1996 vec<tree> tem;
1997 unsigned n_perms;
1998 if (!vect_transform_slp_perm_load (node, tem, NULL,
1999 1, slp_instn, true, &n_perms))
2001 if (dump_enabled_p ())
2002 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2003 vect_location,
2004 "unsupported load permutation\n");
2005 return false;
2009 return true;
2012 /* For loop vectorization verify we can generate the permutation. Be
2013 conservative about the vectorization factor, there are permutations
2014 that will use three vector inputs only starting from a specific factor
2015 and the vectorization factor is not yet final.
2016 ??? The SLP instance unrolling factor might not be the maximum one. */
2017 unsigned n_perms;
2018 poly_uint64 test_vf
2019 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
2020 LOOP_VINFO_VECT_FACTOR
2021 (STMT_VINFO_LOOP_VINFO (stmt_info)));
2022 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2023 if (node->load_permutation.exists ()
2024 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
2025 slp_instn, true, &n_perms))
2026 return false;
2028 return true;
2032 /* Find the last store in SLP INSTANCE. */
2034 stmt_vec_info
2035 vect_find_last_scalar_stmt_in_slp (slp_tree node)
2037 stmt_vec_info last = NULL;
2038 stmt_vec_info stmt_vinfo;
2040 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2042 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
2043 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2046 return last;
2049 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2050 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2051 (also containing the first GROUP1_SIZE stmts, since stores are
2052 consecutive), the second containing the remainder.
2053 Return the first stmt in the second group. */
2055 static stmt_vec_info
2056 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
2058 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
2059 gcc_assert (group1_size > 0);
2060 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
2061 gcc_assert (group2_size > 0);
2062 DR_GROUP_SIZE (first_vinfo) = group1_size;
2064 stmt_vec_info stmt_info = first_vinfo;
2065 for (unsigned i = group1_size; i > 1; i--)
2067 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2068 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2070 /* STMT is now the last element of the first group. */
2071 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2072 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
2074 DR_GROUP_SIZE (group2) = group2_size;
2075 for (stmt_info = group2; stmt_info;
2076 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
2078 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2079 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2082 /* For the second group, the DR_GROUP_GAP is that before the original group,
2083 plus skipping over the first vector. */
2084 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
2086 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2087 DR_GROUP_GAP (first_vinfo) += group2_size;
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2091 group1_size, group2_size);
2093 return group2;
2096 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2097 statements and a vector of NUNITS elements. */
2099 static poly_uint64
2100 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2102 return exact_div (common_multiple (nunits, group_size), group_size);
2105 /* Analyze an SLP instance starting from a group of grouped stores. Call
2106 vect_build_slp_tree to build a tree of packed stmts if possible.
2107 Return FALSE if it's impossible to SLP any stmt in the loop. */
2109 static bool
2110 vect_analyze_slp_instance (vec_info *vinfo,
2111 scalar_stmts_to_slp_tree_map_t *bst_map,
2112 stmt_vec_info stmt_info, unsigned max_tree_size)
2114 slp_instance new_instance;
2115 slp_tree node;
2116 unsigned int group_size;
2117 tree vectype, scalar_type = NULL_TREE;
2118 unsigned int i;
2119 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2120 vec<stmt_vec_info> scalar_stmts;
2121 bool constructor = false;
2123 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2125 scalar_type = TREE_TYPE (DR_REF (dr));
2126 group_size = DR_GROUP_SIZE (stmt_info);
2127 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2129 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2131 gcc_assert (is_a <loop_vec_info> (vinfo));
2132 vectype = STMT_VINFO_VECTYPE (stmt_info);
2133 group_size = REDUC_GROUP_SIZE (stmt_info);
2135 else if (is_gimple_assign (stmt_info->stmt)
2136 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2138 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2139 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2140 constructor = true;
2142 else
2144 gcc_assert (is_a <loop_vec_info> (vinfo));
2145 vectype = STMT_VINFO_VECTYPE (stmt_info);
2146 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2149 if (!vectype)
2151 if (dump_enabled_p ())
2152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2153 "Build SLP failed: unsupported data-type %T\n",
2154 scalar_type);
2156 return false;
2158 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2160 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2161 scalar_stmts.create (group_size);
2162 stmt_vec_info next_info = stmt_info;
2163 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2165 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2166 while (next_info)
2168 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2169 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2172 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2174 /* Collect the reduction stmts and store them in
2175 SLP_TREE_SCALAR_STMTS. */
2176 while (next_info)
2178 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2179 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2181 /* Mark the first element of the reduction chain as reduction to properly
2182 transform the node. In the reduction analysis phase only the last
2183 element of the chain is marked as reduction. */
2184 STMT_VINFO_DEF_TYPE (stmt_info)
2185 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2186 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2187 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2189 else if (constructor)
2191 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2192 tree val;
2193 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2195 if (TREE_CODE (val) == SSA_NAME)
2197 gimple* def = SSA_NAME_DEF_STMT (val);
2198 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2199 /* Value is defined in another basic block. */
2200 if (!def_info)
2201 return false;
2202 scalar_stmts.safe_push (def_info);
2204 else
2205 return false;
2207 if (dump_enabled_p ())
2208 dump_printf_loc (MSG_NOTE, vect_location,
2209 "Analyzing vectorizable constructor: %G\n",
2210 stmt_info->stmt);
2212 else
2214 /* Collect reduction statements. */
2215 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2216 for (i = 0; reductions.iterate (i, &next_info); i++)
2217 scalar_stmts.safe_push (next_info);
2220 /* Build the tree for the SLP instance. */
2221 bool *matches = XALLOCAVEC (bool, group_size);
2222 unsigned npermutes = 0;
2223 poly_uint64 max_nunits = nunits;
2224 unsigned tree_size = 0;
2225 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2226 &max_nunits, matches, &npermutes,
2227 &tree_size, bst_map);
2228 if (node != NULL)
2230 /* Calculate the unrolling factor based on the smallest type. */
2231 poly_uint64 unrolling_factor
2232 = calculate_unrolling_factor (max_nunits, group_size);
2234 if (maybe_ne (unrolling_factor, 1U)
2235 && is_a <bb_vec_info> (vinfo))
2237 unsigned HOST_WIDE_INT const_max_nunits;
2238 if (!max_nunits.is_constant (&const_max_nunits)
2239 || const_max_nunits > group_size)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "Build SLP failed: store group "
2244 "size not a multiple of the vector size "
2245 "in basic block SLP\n");
2246 vect_free_slp_tree (node, false);
2247 return false;
2249 /* Fatal mismatch. */
2250 matches[group_size / const_max_nunits * const_max_nunits] = false;
2251 vect_free_slp_tree (node, false);
2253 else
2255 /* Create a new SLP instance. */
2256 new_instance = XNEW (class _slp_instance);
2257 SLP_INSTANCE_TREE (new_instance) = node;
2258 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2259 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2260 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2261 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2263 vect_gather_slp_loads (new_instance, node);
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_NOTE, vect_location,
2266 "SLP size %u vs. limit %u.\n",
2267 tree_size, max_tree_size);
2269 /* Compute the load permutation. */
2270 slp_tree load_node;
2271 bool loads_permuted = false;
2272 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2274 if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2275 continue;
2276 unsigned j;
2277 stmt_vec_info load_info;
2278 bool this_load_permuted = false;
2279 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2280 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2281 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2282 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2284 this_load_permuted = true;
2285 break;
2287 if (!this_load_permuted
2288 /* The load requires permutation when unrolling exposes
2289 a gap either because the group is larger than the SLP
2290 group-size or because there is a gap between the groups. */
2291 && (known_eq (unrolling_factor, 1U)
2292 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2293 && DR_GROUP_GAP (first_stmt_info) == 0)))
2295 SLP_TREE_LOAD_PERMUTATION (load_node).release ();
2296 continue;
2298 loads_permuted = true;
2301 if (loads_permuted)
2303 if (!vect_supported_load_permutation_p (new_instance))
2305 if (dump_enabled_p ())
2306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2307 "Build SLP failed: unsupported load "
2308 "permutation %G", stmt_info->stmt);
2309 vect_free_slp_instance (new_instance, false);
2310 return false;
2314 /* If the loads and stores can be handled with load/store-lan
2315 instructions do not generate this SLP instance. */
2316 if (is_a <loop_vec_info> (vinfo)
2317 && loads_permuted
2318 && dr && vect_store_lanes_supported (vectype, group_size, false))
2320 slp_tree load_node;
2321 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2323 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2324 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2325 /* Use SLP for strided accesses (or if we can't load-lanes). */
2326 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2327 || ! vect_load_lanes_supported
2328 (STMT_VINFO_VECTYPE (stmt_vinfo),
2329 DR_GROUP_SIZE (stmt_vinfo), false))
2330 break;
2332 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2334 if (dump_enabled_p ())
2335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2336 "Built SLP cancelled: can use "
2337 "load/store-lanes\n");
2338 vect_free_slp_instance (new_instance, false);
2339 return false;
2343 /* If this is a reduction chain with a conversion in front
2344 amend the SLP tree with a node for that. */
2345 if (!dr
2346 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2347 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2349 /* Get at the conversion stmt - we know it's the single use
2350 of the last stmt of the reduction chain. */
2351 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2352 use_operand_p use_p;
2353 gimple *use_stmt;
2354 bool r = single_imm_use (gimple_assign_lhs (tem),
2355 &use_p, &use_stmt);
2356 gcc_assert (r);
2357 next_info = vinfo->lookup_stmt (use_stmt);
2358 next_info = vect_stmt_to_vectorize (next_info);
2359 scalar_stmts = vNULL;
2360 scalar_stmts.create (group_size);
2361 for (unsigned i = 0; i < group_size; ++i)
2362 scalar_stmts.quick_push (next_info);
2363 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2364 SLP_TREE_CHILDREN (conv).quick_push (node);
2365 SLP_INSTANCE_TREE (new_instance) = conv;
2366 /* We also have to fake this conversion stmt as SLP reduction
2367 group so we don't have to mess with too much code
2368 elsewhere. */
2369 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2370 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2373 vinfo->slp_instances.safe_push (new_instance);
2375 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE, vect_location,
2378 "Final SLP tree for instance:\n");
2379 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2382 return true;
2385 else
2387 /* Failed to SLP. */
2388 /* Free the allocated memory. */
2389 scalar_stmts.release ();
2392 /* For basic block SLP, try to break the group up into multiples of the
2393 vector size. */
2394 unsigned HOST_WIDE_INT const_nunits;
2395 if (is_a <bb_vec_info> (vinfo)
2396 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2397 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2398 && nunits.is_constant (&const_nunits))
2400 /* We consider breaking the group only on VF boundaries from the existing
2401 start. */
2402 for (i = 0; i < group_size; i++)
2403 if (!matches[i]) break;
2405 if (i >= const_nunits && i < group_size)
2407 /* Split into two groups at the first vector boundary before i. */
2408 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2409 unsigned group1_size = i & ~(const_nunits - 1);
2411 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2412 group1_size);
2413 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2414 max_tree_size);
2415 /* If the first non-match was in the middle of a vector,
2416 skip the rest of that vector. */
2417 if (group1_size < i)
2419 i = group1_size + const_nunits;
2420 if (i < group_size)
2421 rest = vect_split_slp_store_group (rest, const_nunits);
2423 if (i < group_size)
2424 res |= vect_analyze_slp_instance (vinfo, bst_map,
2425 rest, max_tree_size);
2426 return res;
2428 /* Even though the first vector did not all match, we might be able to SLP
2429 (some) of the remainder. FORNOW ignore this possibility. */
2432 return false;
2436 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2437 trees of packed scalar stmts if SLP is possible. */
2439 opt_result
2440 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2442 unsigned int i;
2443 stmt_vec_info first_element;
2445 DUMP_VECT_SCOPE ("vect_analyze_slp");
2447 scalar_stmts_to_slp_tree_map_t *bst_map
2448 = new scalar_stmts_to_slp_tree_map_t ();
2450 /* Find SLP sequences starting from groups of grouped stores. */
2451 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2452 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2454 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2456 if (loop_vinfo->reduction_chains.length () > 0)
2458 /* Find SLP sequences starting from reduction chains. */
2459 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2460 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2461 max_tree_size))
2463 /* Dissolve reduction chain group. */
2464 stmt_vec_info vinfo = first_element;
2465 stmt_vec_info last = NULL;
2466 while (vinfo)
2468 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2469 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2470 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2471 last = vinfo;
2472 vinfo = next;
2474 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2475 /* It can be still vectorized as part of an SLP reduction. */
2476 loop_vinfo->reductions.safe_push (last);
2480 /* Find SLP sequences starting from groups of reductions. */
2481 if (loop_vinfo->reductions.length () > 1)
2482 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2483 max_tree_size);
2486 /* The map keeps a reference on SLP nodes built, release that. */
2487 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2488 it != bst_map->end (); ++it)
2489 if ((*it).second)
2490 vect_free_slp_tree ((*it).second, false);
2491 delete bst_map;
2493 return opt_result::success ();
2497 /* For each possible SLP instance decide whether to SLP it and calculate overall
2498 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2499 least one instance. */
2501 bool
2502 vect_make_slp_decision (loop_vec_info loop_vinfo)
2504 unsigned int i;
2505 poly_uint64 unrolling_factor = 1;
2506 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2507 slp_instance instance;
2508 int decided_to_slp = 0;
2510 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2512 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2514 /* FORNOW: SLP if you can. */
2515 /* All unroll factors have the form:
2517 GET_MODE_SIZE (vinfo->vector_mode) * X
2519 for some rational X, so they must have a common multiple. */
2520 unrolling_factor
2521 = force_common_multiple (unrolling_factor,
2522 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2524 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2525 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2526 loop-based vectorization. Such stmts will be marked as HYBRID. */
2527 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2528 decided_to_slp++;
2531 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2533 if (decided_to_slp && dump_enabled_p ())
2535 dump_printf_loc (MSG_NOTE, vect_location,
2536 "Decided to SLP %d instances. Unrolling factor ",
2537 decided_to_slp);
2538 dump_dec (MSG_NOTE, unrolling_factor);
2539 dump_printf (MSG_NOTE, "\n");
2542 return (decided_to_slp > 0);
2546 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2547 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2549 static void
2550 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2551 hash_map<slp_tree, unsigned> &visited)
2553 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2554 imm_use_iterator imm_iter;
2555 gimple *use_stmt;
2556 stmt_vec_info use_vinfo;
2557 slp_tree child;
2558 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2559 int j;
2561 /* We need to union stype over the incoming graph edges but we still
2562 want to limit recursion to stay O(N+E). */
2563 unsigned visited_cnt = ++visited.get_or_insert (node);
2564 gcc_assert (visited_cnt <= node->refcnt);
2565 bool only_edge = (visited_cnt != node->refcnt);
2567 /* Propagate hybrid down the SLP tree. */
2568 if (stype == hybrid)
2570 else if (HYBRID_SLP_STMT (stmt_vinfo))
2571 stype = hybrid;
2572 else if (!only_edge)
2574 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2575 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2576 /* If we get a pattern stmt here we have to use the LHS of the
2577 original stmt for immediate uses. */
2578 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2579 tree def;
2580 if (gimple_code (stmt) == GIMPLE_PHI)
2581 def = gimple_phi_result (stmt);
2582 else
2583 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2584 if (def)
2585 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2587 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2588 if (!use_vinfo)
2589 continue;
2590 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2591 if (!STMT_SLP_TYPE (use_vinfo)
2592 && (STMT_VINFO_RELEVANT (use_vinfo)
2593 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2594 && !(gimple_code (use_stmt) == GIMPLE_PHI
2595 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2599 "def in non-SLP stmt: %G", use_stmt);
2600 stype = hybrid;
2605 if (stype == hybrid
2606 && !HYBRID_SLP_STMT (stmt_vinfo))
2608 if (dump_enabled_p ())
2609 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2610 stmt_vinfo->stmt);
2611 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2614 if (!only_edge)
2615 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2616 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2617 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2618 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2621 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2623 static tree
2624 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2626 walk_stmt_info *wi = (walk_stmt_info *)data;
2627 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2629 if (wi->is_lhs)
2630 return NULL_TREE;
2632 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2633 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2637 def_stmt_info->stmt);
2638 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2641 return NULL_TREE;
2644 static tree
2645 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2646 walk_stmt_info *wi)
2648 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2649 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2650 /* If the stmt is in a SLP instance then this isn't a reason
2651 to mark use definitions in other SLP instances as hybrid. */
2652 if (! STMT_SLP_TYPE (use_vinfo)
2653 && (STMT_VINFO_RELEVANT (use_vinfo)
2654 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2655 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2656 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2658 else
2659 *handled = true;
2660 return NULL_TREE;
2663 /* Find stmts that must be both vectorized and SLPed. */
2665 void
2666 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2668 unsigned int i;
2669 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2670 slp_instance instance;
2672 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2674 /* First walk all pattern stmt in the loop and mark defs of uses as
2675 hybrid because immediate uses in them are not recorded. */
2676 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2678 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2679 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2680 gsi_next (&gsi))
2682 gimple *stmt = gsi_stmt (gsi);
2683 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2684 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2686 walk_stmt_info wi;
2687 memset (&wi, 0, sizeof (wi));
2688 wi.info = loop_vinfo;
2689 gimple_stmt_iterator gsi2
2690 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2691 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2692 vect_detect_hybrid_slp_1, &wi);
2693 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2694 vect_detect_hybrid_slp_2,
2695 vect_detect_hybrid_slp_1, &wi);
2700 /* Then walk the SLP instance trees marking stmts with uses in
2701 non-SLP stmts as hybrid, also propagating hybrid down the
2702 SLP tree, collecting the above info on-the-fly. */
2703 for (unsigned j = 0;; ++j)
2705 hash_map<slp_tree, unsigned> visited;
2706 bool any = false;
2707 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2708 if (j < SLP_INSTANCE_GROUP_SIZE (instance))
2710 any = true;
2711 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2712 j, pure_slp, visited);
2714 if (!any)
2715 break;
2720 /* Initialize a bb_vec_info struct for the statements between
2721 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2723 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2724 gimple_stmt_iterator region_end_in,
2725 vec_info_shared *shared)
2726 : vec_info (vec_info::bb, init_cost (NULL), shared),
2727 bb (gsi_bb (region_begin_in)),
2728 region_begin (region_begin_in),
2729 region_end (region_end_in)
2731 gimple_stmt_iterator gsi;
2733 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2734 gsi_next (&gsi))
2736 gimple *stmt = gsi_stmt (gsi);
2737 gimple_set_uid (stmt, 0);
2738 add_stmt (stmt);
2741 bb->aux = this;
2745 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2746 stmts in the basic block. */
2748 _bb_vec_info::~_bb_vec_info ()
2750 for (gimple_stmt_iterator si = region_begin;
2751 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2752 /* Reset region marker. */
2753 gimple_set_uid (gsi_stmt (si), -1);
2755 bb->aux = NULL;
2758 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2759 given then that child nodes have already been processed, and that
2760 their def types currently match their SLP node's def type. */
2762 static bool
2763 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2764 slp_instance node_instance,
2765 stmt_vector_for_cost *cost_vec)
2767 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2768 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2770 /* Calculate the number of vector statements to be created for the
2771 scalar stmts in this node. For SLP reductions it is equal to the
2772 number of vector statements in the children (which has already been
2773 calculated by the recursive call). Otherwise it is the number of
2774 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2775 VF divided by the number of elements in a vector. */
2776 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2777 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2779 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2780 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2782 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2783 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2784 break;
2787 else
2789 poly_uint64 vf;
2790 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2791 vf = loop_vinfo->vectorization_factor;
2792 else
2793 vf = 1;
2794 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2795 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2796 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2797 = vect_get_num_vectors (vf * group_size, vectype);
2800 bool dummy;
2801 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2804 /* Try to build NODE from scalars, returning true on success.
2805 NODE_INSTANCE is the SLP instance that contains NODE. */
2807 static bool
2808 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2809 slp_instance node_instance)
2811 stmt_vec_info stmt_info;
2812 unsigned int i;
2814 if (!is_a <bb_vec_info> (vinfo)
2815 || node == SLP_INSTANCE_TREE (node_instance)
2816 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2817 return false;
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_NOTE, vect_location,
2821 "Building vector operands from scalars instead\n");
2823 /* Don't remove and free the child nodes here, since they could be
2824 referenced by other structures. The analysis and scheduling phases
2825 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2826 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2827 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2828 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2829 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2831 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2832 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2834 return true;
2837 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2838 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2840 Return true if the operations are supported. */
2842 static bool
2843 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2844 slp_instance node_instance,
2845 hash_set<slp_tree> &visited,
2846 hash_set<slp_tree> &lvisited,
2847 stmt_vector_for_cost *cost_vec)
2849 int i, j;
2850 slp_tree child;
2852 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2853 return true;
2855 /* If we already analyzed the exact same set of scalar stmts we're done.
2856 We share the generated vector stmts for those.
2857 The SLP graph is acyclic so not caching whether we failed or succeeded
2858 doesn't result in any issue since we throw away the lvisited set
2859 when we fail. */
2860 if (visited.contains (node)
2861 || lvisited.add (node))
2862 return true;
2864 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2865 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2866 visited, lvisited, cost_vec))
2867 return false;
2869 /* ??? We have to catch the case late where two first scalar stmts appear
2870 in multiple SLP children with different def type and fail. Remember
2871 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2872 match it when that is vect_internal_def. */
2873 auto_vec<vect_def_type, 4> dt;
2874 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2875 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2876 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2877 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2879 /* Push SLP node def-type to stmt operands. */
2880 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2881 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2882 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2883 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2884 = SLP_TREE_DEF_TYPE (child);
2886 /* Check everything worked out. */
2887 bool res = true;
2888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2889 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2891 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2893 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2894 != SLP_TREE_DEF_TYPE (child))
2895 res = false;
2897 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2898 != dt[j])
2899 res = false;
2901 if (!res && dump_enabled_p ())
2902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2903 "not vectorized: same operand with different "
2904 "def type in stmt.\n");
2906 if (res)
2907 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2908 cost_vec);
2910 /* Restore def-types. */
2911 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2912 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2913 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2915 /* If this node can't be vectorized, try pruning the tree here rather
2916 than felling the whole thing. */
2917 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2918 res = true;
2920 return res;
2924 /* Analyze statements in SLP instances of VINFO. Return true if the
2925 operations are supported. */
2927 bool
2928 vect_slp_analyze_operations (vec_info *vinfo)
2930 slp_instance instance;
2931 int i;
2933 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2935 hash_set<slp_tree> visited;
2936 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2938 hash_set<slp_tree> lvisited;
2939 stmt_vector_for_cost cost_vec;
2940 cost_vec.create (2);
2941 if (!vect_slp_analyze_node_operations (vinfo,
2942 SLP_INSTANCE_TREE (instance),
2943 instance, visited, lvisited,
2944 &cost_vec)
2945 /* Instances with a root stmt require vectorized defs for the
2946 SLP tree root. */
2947 || (SLP_INSTANCE_ROOT_STMT (instance)
2948 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2949 != vect_internal_def)))
2951 slp_tree node = SLP_INSTANCE_TREE (instance);
2952 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_NOTE, vect_location,
2955 "removing SLP instance operations starting from: %G",
2956 stmt_info->stmt);
2957 vect_free_slp_instance (instance, false);
2958 vinfo->slp_instances.ordered_remove (i);
2959 cost_vec.release ();
2961 else
2963 for (hash_set<slp_tree>::iterator x = lvisited.begin();
2964 x != lvisited.end(); ++x)
2965 visited.add (*x);
2966 i++;
2968 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2969 cost_vec.release ();
2973 return !vinfo->slp_instances.is_empty ();
2977 /* Compute the scalar cost of the SLP node NODE and its children
2978 and return it. Do not account defs that are marked in LIFE and
2979 update LIFE according to uses of NODE. */
2981 static void
2982 vect_bb_slp_scalar_cost (basic_block bb,
2983 slp_tree node, vec<bool, va_heap> *life,
2984 stmt_vector_for_cost *cost_vec,
2985 hash_set<slp_tree> &visited)
2987 unsigned i;
2988 stmt_vec_info stmt_info;
2989 slp_tree child;
2991 if (visited.add (node))
2992 return;
2994 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2996 gimple *stmt = stmt_info->stmt;
2997 vec_info *vinfo = stmt_info->vinfo;
2998 ssa_op_iter op_iter;
2999 def_operand_p def_p;
3001 if ((*life)[i])
3002 continue;
3004 /* If there is a non-vectorized use of the defs then the scalar
3005 stmt is kept live in which case we do not account it or any
3006 required defs in the SLP children in the scalar cost. This
3007 way we make the vectorization more costly when compared to
3008 the scalar cost. */
3009 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
3011 imm_use_iterator use_iter;
3012 gimple *use_stmt;
3013 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
3014 if (!is_gimple_debug (use_stmt))
3016 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
3017 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
3019 (*life)[i] = true;
3020 BREAK_FROM_IMM_USE_STMT (use_iter);
3024 if ((*life)[i])
3025 continue;
3027 /* Count scalar stmts only once. */
3028 if (gimple_visited_p (stmt))
3029 continue;
3030 gimple_set_visited (stmt, true);
3032 vect_cost_for_stmt kind;
3033 if (STMT_VINFO_DATA_REF (stmt_info))
3035 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
3036 kind = scalar_load;
3037 else
3038 kind = scalar_store;
3040 else if (vect_nop_conversion_p (stmt_info))
3041 continue;
3042 else
3043 kind = scalar_stmt;
3044 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
3047 auto_vec<bool, 20> subtree_life;
3048 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3050 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3052 /* Do not directly pass LIFE to the recursive call, copy it to
3053 confine changes in the callee to the current child/subtree. */
3054 subtree_life.safe_splice (*life);
3055 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3056 visited);
3057 subtree_life.truncate (0);
3062 /* Check if vectorization of the basic block is profitable. */
3064 static bool
3065 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3067 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3068 slp_instance instance;
3069 int i;
3070 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
3071 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
3073 /* Calculate scalar cost. */
3074 stmt_vector_for_cost scalar_costs;
3075 scalar_costs.create (0);
3076 hash_set<slp_tree> visited;
3077 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3079 auto_vec<bool, 20> life;
3080 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
3081 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3082 SLP_INSTANCE_TREE (instance),
3083 &life, &scalar_costs, visited);
3085 void *target_cost_data = init_cost (NULL);
3086 add_stmt_costs (target_cost_data, &scalar_costs);
3087 scalar_costs.release ();
3088 unsigned dummy;
3089 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3090 destroy_cost_data (target_cost_data);
3092 /* Unset visited flag. */
3093 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3094 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3095 gimple_set_visited (gsi_stmt (gsi), false);
3097 /* Complete the target-specific cost calculation. */
3098 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3099 &vec_inside_cost, &vec_epilogue_cost);
3101 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
3103 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3106 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
3107 vec_inside_cost);
3108 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
3109 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
3110 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
3113 /* Vectorization is profitable if its cost is more than the cost of scalar
3114 version. Note that we err on the vector side for equal cost because
3115 the cost estimate is otherwise quite pessimistic (constant uses are
3116 free on the scalar side but cost a load on the vector side for
3117 example). */
3118 if (vec_outside_cost + vec_inside_cost > scalar_cost)
3119 return false;
3121 return true;
3124 /* Find any vectorizable constructors and add them to the grouped_store
3125 array. */
3127 static void
3128 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3130 gimple_stmt_iterator gsi;
3132 for (gsi = bb_vinfo->region_begin;
3133 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3135 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3136 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3137 continue;
3139 tree rhs = gimple_assign_rhs1 (stmt);
3140 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3141 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3142 CONSTRUCTOR_NELTS (rhs))
3143 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3144 || uniform_vector_p (rhs))
3145 continue;
3147 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3148 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3152 /* Check if the region described by BB_VINFO can be vectorized, returning
3153 true if so. When returning false, set FATAL to true if the same failure
3154 would prevent vectorization at other vector sizes, false if it is still
3155 worth trying other sizes. N_STMTS is the number of statements in the
3156 region. */
3158 static bool
3159 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3161 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3163 slp_instance instance;
3164 int i;
3165 poly_uint64 min_vf = 2;
3167 /* The first group of checks is independent of the vector size. */
3168 fatal = true;
3170 /* Analyze the data references. */
3172 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "not vectorized: unhandled data-ref in basic "
3177 "block.\n");
3178 return false;
3181 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "not vectorized: not enough data-refs in "
3186 "basic block.\n");
3187 return false;
3190 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3194 "not vectorized: unhandled data access in "
3195 "basic block.\n");
3196 return false;
3199 vect_slp_check_for_constructors (bb_vinfo);
3201 /* If there are no grouped stores in the region there is no need
3202 to continue with pattern recog as vect_analyze_slp will fail
3203 anyway. */
3204 if (bb_vinfo->grouped_stores.is_empty ())
3206 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3208 "not vectorized: no grouped stores in "
3209 "basic block.\n");
3210 return false;
3213 /* While the rest of the analysis below depends on it in some way. */
3214 fatal = false;
3216 vect_pattern_recog (bb_vinfo);
3218 /* Check the SLP opportunities in the basic block, analyze and build SLP
3219 trees. */
3220 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3222 if (dump_enabled_p ())
3224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3225 "Failed to SLP the basic block.\n");
3226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3227 "not vectorized: failed to find SLP opportunities "
3228 "in basic block.\n");
3230 return false;
3233 vect_record_base_alignments (bb_vinfo);
3235 /* Analyze and verify the alignment of data references and the
3236 dependence in the SLP instances. */
3237 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3239 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3240 || ! vect_slp_analyze_instance_dependence (instance))
3242 slp_tree node = SLP_INSTANCE_TREE (instance);
3243 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE, vect_location,
3246 "removing SLP instance operations starting from: %G",
3247 stmt_info->stmt);
3248 vect_free_slp_instance (instance, false);
3249 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3250 continue;
3253 /* Mark all the statements that we want to vectorize as pure SLP and
3254 relevant. */
3255 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3256 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3257 if (SLP_INSTANCE_ROOT_STMT (instance))
3258 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3260 i++;
3262 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3263 return false;
3265 if (!vect_slp_analyze_operations (bb_vinfo))
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "not vectorized: bad operation in basic block.\n");
3270 return false;
3273 /* Cost model: check if the vectorization is worthwhile. */
3274 if (!unlimited_cost_model (NULL)
3275 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "not vectorized: vectorization is not "
3280 "profitable.\n");
3281 return false;
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_NOTE, vect_location,
3286 "Basic block will be vectorized using SLP\n");
3287 return true;
3290 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3291 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3292 on success. The region has N_STMTS statements and has the datarefs
3293 given by DATAREFS. */
3295 static bool
3296 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3297 gimple_stmt_iterator region_end,
3298 vec<data_reference_p> datarefs,
3299 unsigned int n_stmts)
3301 bb_vec_info bb_vinfo;
3302 auto_vector_modes vector_modes;
3304 /* Autodetect first vector size we try. */
3305 machine_mode next_vector_mode = VOIDmode;
3306 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3307 unsigned int mode_i = 0;
3309 vec_info_shared shared;
3311 machine_mode autodetected_vector_mode = VOIDmode;
3312 while (1)
3314 bool vectorized = false;
3315 bool fatal = false;
3316 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3318 bool first_time_p = shared.datarefs.is_empty ();
3319 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3320 if (first_time_p)
3321 bb_vinfo->shared->save_datarefs ();
3322 else
3323 bb_vinfo->shared->check_datarefs ();
3324 bb_vinfo->vector_mode = next_vector_mode;
3326 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3327 && dbg_cnt (vect_slp))
3329 if (dump_enabled_p ())
3331 dump_printf_loc (MSG_NOTE, vect_location,
3332 "***** Analysis succeeded with vector mode"
3333 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3334 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3337 bb_vinfo->shared->check_datarefs ();
3338 vect_schedule_slp (bb_vinfo);
3340 unsigned HOST_WIDE_INT bytes;
3341 if (dump_enabled_p ())
3343 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3344 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3345 "basic block part vectorized using %wu byte "
3346 "vectors\n", bytes);
3347 else
3348 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3349 "basic block part vectorized using variable "
3350 "length vectors\n");
3353 vectorized = true;
3355 else
3357 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_NOTE, vect_location,
3359 "***** Analysis failed with vector mode %s\n",
3360 GET_MODE_NAME (bb_vinfo->vector_mode));
3363 if (mode_i == 0)
3364 autodetected_vector_mode = bb_vinfo->vector_mode;
3366 if (!fatal)
3367 while (mode_i < vector_modes.length ()
3368 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_NOTE, vect_location,
3372 "***** The result for vector mode %s would"
3373 " be the same\n",
3374 GET_MODE_NAME (vector_modes[mode_i]));
3375 mode_i += 1;
3378 delete bb_vinfo;
3380 if (mode_i < vector_modes.length ()
3381 && VECTOR_MODE_P (autodetected_vector_mode)
3382 && (related_vector_mode (vector_modes[mode_i],
3383 GET_MODE_INNER (autodetected_vector_mode))
3384 == autodetected_vector_mode)
3385 && (related_vector_mode (autodetected_vector_mode,
3386 GET_MODE_INNER (vector_modes[mode_i]))
3387 == vector_modes[mode_i]))
3389 if (dump_enabled_p ())
3390 dump_printf_loc (MSG_NOTE, vect_location,
3391 "***** Skipping vector mode %s, which would"
3392 " repeat the analysis for %s\n",
3393 GET_MODE_NAME (vector_modes[mode_i]),
3394 GET_MODE_NAME (autodetected_vector_mode));
3395 mode_i += 1;
3398 if (vectorized
3399 || mode_i == vector_modes.length ()
3400 || autodetected_vector_mode == VOIDmode
3401 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3402 vector sizes will fail do not bother iterating. */
3403 || fatal)
3404 return vectorized;
3406 /* Try the next biggest vector size. */
3407 next_vector_mode = vector_modes[mode_i++];
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_NOTE, vect_location,
3410 "***** Re-trying analysis with vector mode %s\n",
3411 GET_MODE_NAME (next_vector_mode));
3415 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3416 true if anything in the basic-block was vectorized. */
3418 bool
3419 vect_slp_bb (basic_block bb)
3421 gimple_stmt_iterator gsi;
3422 bool any_vectorized = false;
3424 gsi = gsi_start_bb (bb);
3425 while (!gsi_end_p (gsi))
3427 gimple_stmt_iterator region_begin = gsi;
3428 vec<data_reference_p> datarefs = vNULL;
3429 int insns = 0;
3431 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3433 gimple *stmt = gsi_stmt (gsi);
3434 if (is_gimple_debug (stmt))
3435 continue;
3436 insns++;
3438 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3439 vect_location = stmt;
3441 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3442 break;
3445 /* Skip leading unhandled stmts. */
3446 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3448 gsi_next (&gsi);
3449 continue;
3452 gimple_stmt_iterator region_end = gsi;
3454 if (insns > param_slp_max_insns_in_bb)
3456 if (dump_enabled_p ())
3457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3458 "not vectorized: too many instructions in "
3459 "basic block.\n");
3461 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3462 any_vectorized = true;
3464 if (gsi_end_p (region_end))
3465 break;
3467 /* Skip the unhandled stmt. */
3468 gsi_next (&gsi);
3471 return any_vectorized;
3475 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3477 static bool
3478 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
3480 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3481 tree op, vectype;
3482 enum vect_def_type dt;
3484 /* For comparison and COND_EXPR type is chosen depending
3485 on the non-constant other comparison operand. */
3486 if (TREE_CODE_CLASS (code) == tcc_comparison)
3488 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3489 op = gimple_assign_rhs1 (stmt);
3491 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3492 gcc_unreachable ();
3494 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3497 if (code == COND_EXPR)
3499 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3500 tree cond = gimple_assign_rhs1 (stmt);
3502 if (TREE_CODE (cond) == SSA_NAME)
3504 if (op_num > 0)
3505 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3506 op = cond;
3508 else
3510 if (op_num > 1)
3511 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3512 op = TREE_OPERAND (cond, 0);
3515 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3516 gcc_unreachable ();
3518 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3521 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3524 /* Build a variable-length vector in which the elements in ELTS are repeated
3525 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3526 RESULTS and add any new instructions to SEQ.
3528 The approach we use is:
3530 (1) Find a vector mode VM with integer elements of mode IM.
3532 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3533 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3534 from small vectors to IM.
3536 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3538 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3539 correct byte contents.
3541 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3543 We try to find the largest IM for which this sequence works, in order
3544 to cut down on the number of interleaves. */
3546 void
3547 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3548 vec<tree> elts, unsigned int nresults,
3549 vec<tree> &results)
3551 unsigned int nelts = elts.length ();
3552 tree element_type = TREE_TYPE (vector_type);
3554 /* (1) Find a vector mode VM with integer elements of mode IM. */
3555 unsigned int nvectors = 1;
3556 tree new_vector_type;
3557 tree permutes[2];
3558 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3559 &nvectors, &new_vector_type,
3560 permutes))
3561 gcc_unreachable ();
3563 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3564 unsigned int partial_nelts = nelts / nvectors;
3565 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3567 tree_vector_builder partial_elts;
3568 auto_vec<tree, 32> pieces (nvectors * 2);
3569 pieces.quick_grow (nvectors * 2);
3570 for (unsigned int i = 0; i < nvectors; ++i)
3572 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3573 ELTS' has mode IM. */
3574 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3575 for (unsigned int j = 0; j < partial_nelts; ++j)
3576 partial_elts.quick_push (elts[i * partial_nelts + j]);
3577 tree t = gimple_build_vector (seq, &partial_elts);
3578 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3579 TREE_TYPE (new_vector_type), t);
3581 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3582 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3585 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3586 correct byte contents.
3588 We need to repeat the following operation log2(nvectors) times:
3590 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3591 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3593 However, if each input repeats every N elements and the VF is
3594 a multiple of N * 2, the HI result is the same as the LO. */
3595 unsigned int in_start = 0;
3596 unsigned int out_start = nvectors;
3597 unsigned int hi_start = nvectors / 2;
3598 /* A bound on the number of outputs needed to produce NRESULTS results
3599 in the final iteration. */
3600 unsigned int noutputs_bound = nvectors * nresults;
3601 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3603 noutputs_bound /= 2;
3604 unsigned int limit = MIN (noutputs_bound, nvectors);
3605 for (unsigned int i = 0; i < limit; ++i)
3607 if ((i & 1) != 0
3608 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3609 2 * in_repeat))
3611 pieces[out_start + i] = pieces[out_start + i - 1];
3612 continue;
3615 tree output = make_ssa_name (new_vector_type);
3616 tree input1 = pieces[in_start + (i / 2)];
3617 tree input2 = pieces[in_start + (i / 2) + hi_start];
3618 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3619 input1, input2,
3620 permutes[i & 1]);
3621 gimple_seq_add_stmt (seq, stmt);
3622 pieces[out_start + i] = output;
3624 std::swap (in_start, out_start);
3627 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3628 results.reserve (nresults);
3629 for (unsigned int i = 0; i < nresults; ++i)
3630 if (i < nvectors)
3631 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3632 pieces[in_start + i]));
3633 else
3634 results.quick_push (results[i - nvectors]);
3638 /* For constant and loop invariant defs of SLP_NODE this function returns
3639 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3640 OP_NODE determines the node for the operand containing the scalar
3641 operands. */
3643 static void
3644 vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
3645 vec<tree> *vec_oprnds)
3647 slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3648 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3649 vec_info *vinfo = stmt_vinfo->vinfo;
3650 unsigned HOST_WIDE_INT nunits;
3651 tree vec_cst;
3652 unsigned j, number_of_places_left_in_vector;
3653 tree vector_type;
3654 tree vop;
3655 int group_size = op_node->ops.length ();
3656 unsigned int vec_num, i;
3657 unsigned number_of_copies = 1;
3658 bool constant_p;
3659 tree neutral_op = NULL;
3660 gimple_seq ctor_seq = NULL;
3661 auto_vec<tree, 16> permute_results;
3663 /* ??? SLP analysis should compute the vector type for the
3664 constant / invariant and store it in the SLP node. */
3665 tree op = op_node->ops[0];
3666 /* Check if vector type is a boolean vector. */
3667 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3668 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3669 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3670 vector_type = truth_type_for (stmt_vectype);
3671 else
3672 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3674 /* ??? For lane-reducing ops we should also have the required number
3675 of vector stmts initialized rather than second-guessing here. */
3676 unsigned int number_of_vectors;
3677 if (is_gimple_assign (stmt_vinfo->stmt)
3678 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3679 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3680 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3681 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3682 else
3683 number_of_vectors
3684 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3685 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3686 vector_type);
3687 vec_oprnds->create (number_of_vectors);
3688 auto_vec<tree> voprnds (number_of_vectors);
3690 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3691 created vectors. It is greater than 1 if unrolling is performed.
3693 For example, we have two scalar operands, s1 and s2 (e.g., group of
3694 strided accesses of size two), while NUNITS is four (i.e., four scalars
3695 of this type can be packed in a vector). The output vector will contain
3696 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3697 will be 2).
3699 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3700 containing the operands.
3702 For example, NUNITS is four as before, and the group size is 8
3703 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3704 {s5, s6, s7, s8}. */
3706 /* When using duplicate_and_interleave, we just need one element for
3707 each scalar statement. */
3708 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3709 nunits = group_size;
3711 number_of_copies = nunits * number_of_vectors / group_size;
3713 number_of_places_left_in_vector = nunits;
3714 constant_p = true;
3715 tree_vector_builder elts (vector_type, nunits, 1);
3716 elts.quick_grow (nunits);
3717 bool place_after_defs = false;
3718 for (j = 0; j < number_of_copies; j++)
3720 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3722 /* Create 'vect_ = {op0,op1,...,opn}'. */
3723 number_of_places_left_in_vector--;
3724 tree orig_op = op;
3725 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3727 if (CONSTANT_CLASS_P (op))
3729 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3731 /* Can't use VIEW_CONVERT_EXPR for booleans because
3732 of possibly different sizes of scalar value and
3733 vector element. */
3734 if (integer_zerop (op))
3735 op = build_int_cst (TREE_TYPE (vector_type), 0);
3736 else if (integer_onep (op))
3737 op = build_all_ones_cst (TREE_TYPE (vector_type));
3738 else
3739 gcc_unreachable ();
3741 else
3742 op = fold_unary (VIEW_CONVERT_EXPR,
3743 TREE_TYPE (vector_type), op);
3744 gcc_assert (op && CONSTANT_CLASS_P (op));
3746 else
3748 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3749 gimple *init_stmt;
3750 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3752 tree true_val
3753 = build_all_ones_cst (TREE_TYPE (vector_type));
3754 tree false_val
3755 = build_zero_cst (TREE_TYPE (vector_type));
3756 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3757 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3758 op, true_val,
3759 false_val);
3761 else
3763 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3764 op);
3765 init_stmt
3766 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3767 op);
3769 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3770 op = new_temp;
3773 elts[number_of_places_left_in_vector] = op;
3774 if (!CONSTANT_CLASS_P (op))
3775 constant_p = false;
3776 if (TREE_CODE (orig_op) == SSA_NAME
3777 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3778 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3779 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3780 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3781 place_after_defs = true;
3783 if (number_of_places_left_in_vector == 0)
3785 if (constant_p
3786 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3787 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3788 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3789 else
3791 if (permute_results.is_empty ())
3792 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3793 elts, number_of_vectors,
3794 permute_results);
3795 vec_cst = permute_results[number_of_vectors - j - 1];
3797 tree init;
3798 gimple_stmt_iterator gsi;
3799 if (place_after_defs)
3801 stmt_vec_info last_stmt_info
3802 = vect_find_last_scalar_stmt_in_slp (slp_node);
3803 gsi = gsi_for_stmt (last_stmt_info->stmt);
3804 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3805 &gsi);
3807 else
3808 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3809 NULL);
3810 if (ctor_seq != NULL)
3812 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3813 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3814 ctor_seq = NULL;
3816 voprnds.quick_push (init);
3817 place_after_defs = false;
3818 number_of_places_left_in_vector = nunits;
3819 constant_p = true;
3820 elts.new_vector (vector_type, nunits, 1);
3821 elts.quick_grow (nunits);
3826 /* Since the vectors are created in the reverse order, we should invert
3827 them. */
3828 vec_num = voprnds.length ();
3829 for (j = vec_num; j != 0; j--)
3831 vop = voprnds[j - 1];
3832 vec_oprnds->quick_push (vop);
3835 /* In case that VF is greater than the unrolling factor needed for the SLP
3836 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3837 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3838 to replicate the vectors. */
3839 while (number_of_vectors > vec_oprnds->length ())
3841 tree neutral_vec = NULL;
3843 if (neutral_op)
3845 if (!neutral_vec)
3846 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3848 vec_oprnds->quick_push (neutral_vec);
3850 else
3852 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3853 vec_oprnds->quick_push (vop);
3859 /* Get vectorized definitions from SLP_NODE that contains corresponding
3860 vectorized def-stmts. */
3862 static void
3863 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3865 stmt_vec_info vec_def_stmt_info;
3866 unsigned int i;
3868 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3870 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3871 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3875 /* Get N vectorized definitions for SLP_NODE.
3876 If the scalar definitions are loop invariants or constants, collect them and
3877 call vect_get_constant_vectors() to create vector stmts.
3878 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3879 must be stored in the corresponding child of SLP_NODE, and we call
3880 vect_get_slp_vect_defs () to retrieve them. */
3882 void
3883 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3885 if (n == -1U)
3886 n = SLP_TREE_CHILDREN (slp_node).length ();
3888 for (unsigned i = 0; i < n; ++i)
3890 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3892 vec<tree> vec_defs = vNULL;
3894 /* For each operand we check if it has vectorized definitions in a child
3895 node or we need to create them (for invariants and constants). */
3896 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3898 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3899 vect_get_slp_vect_defs (child, &vec_defs);
3901 else
3902 vect_get_constant_vectors (slp_node, i, &vec_defs);
3904 vec_oprnds->quick_push (vec_defs);
3908 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3909 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3910 permute statements for the SLP node NODE of the SLP instance
3911 SLP_NODE_INSTANCE. */
3913 bool
3914 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3915 gimple_stmt_iterator *gsi, poly_uint64 vf,
3916 slp_instance slp_node_instance, bool analyze_only,
3917 unsigned *n_perms)
3919 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3920 vec_info *vinfo = stmt_info->vinfo;
3921 int vec_index = 0;
3922 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3923 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3924 unsigned int mask_element;
3925 machine_mode mode;
3927 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3928 return false;
3930 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3932 mode = TYPE_MODE (vectype);
3933 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3935 /* Initialize the vect stmts of NODE to properly insert the generated
3936 stmts later. */
3937 if (! analyze_only)
3938 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3939 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3940 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3942 /* Generate permutation masks for every NODE. Number of masks for each NODE
3943 is equal to GROUP_SIZE.
3944 E.g., we have a group of three nodes with three loads from the same
3945 location in each node, and the vector size is 4. I.e., we have a
3946 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3947 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3948 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3951 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3952 The last mask is illegal since we assume two operands for permute
3953 operation, and the mask element values can't be outside that range.
3954 Hence, the last mask must be converted into {2,5,5,5}.
3955 For the first two permutations we need the first and the second input
3956 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3957 we need the second and the third vectors: {b1,c1,a2,b2} and
3958 {c2,a3,b3,c3}. */
3960 int vect_stmts_counter = 0;
3961 unsigned int index = 0;
3962 int first_vec_index = -1;
3963 int second_vec_index = -1;
3964 bool noop_p = true;
3965 *n_perms = 0;
3967 vec_perm_builder mask;
3968 unsigned int nelts_to_build;
3969 unsigned int nvectors_per_build;
3970 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3971 && multiple_p (nunits, group_size));
3972 if (repeating_p)
3974 /* A single vector contains a whole number of copies of the node, so:
3975 (a) all permutes can use the same mask; and
3976 (b) the permutes only need a single vector input. */
3977 mask.new_vector (nunits, group_size, 3);
3978 nelts_to_build = mask.encoded_nelts ();
3979 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3981 else
3983 /* We need to construct a separate mask for each vector statement. */
3984 unsigned HOST_WIDE_INT const_nunits, const_vf;
3985 if (!nunits.is_constant (&const_nunits)
3986 || !vf.is_constant (&const_vf))
3987 return false;
3988 mask.new_vector (const_nunits, const_nunits, 1);
3989 nelts_to_build = const_vf * group_size;
3990 nvectors_per_build = 1;
3993 unsigned int count = mask.encoded_nelts ();
3994 mask.quick_grow (count);
3995 vec_perm_indices indices;
3997 for (unsigned int j = 0; j < nelts_to_build; j++)
3999 unsigned int iter_num = j / group_size;
4000 unsigned int stmt_num = j % group_size;
4001 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
4002 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
4003 if (repeating_p)
4005 first_vec_index = 0;
4006 mask_element = i;
4008 else
4010 /* Enforced before the loop when !repeating_p. */
4011 unsigned int const_nunits = nunits.to_constant ();
4012 vec_index = i / const_nunits;
4013 mask_element = i % const_nunits;
4014 if (vec_index == first_vec_index
4015 || first_vec_index == -1)
4017 first_vec_index = vec_index;
4019 else if (vec_index == second_vec_index
4020 || second_vec_index == -1)
4022 second_vec_index = vec_index;
4023 mask_element += const_nunits;
4025 else
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4029 "permutation requires at "
4030 "least three vectors %G",
4031 stmt_info->stmt);
4032 gcc_assert (analyze_only);
4033 return false;
4036 gcc_assert (mask_element < 2 * const_nunits);
4039 if (mask_element != index)
4040 noop_p = false;
4041 mask[index++] = mask_element;
4043 if (index == count && !noop_p)
4045 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4046 if (!can_vec_perm_const_p (mode, indices))
4048 if (dump_enabled_p ())
4050 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4051 vect_location,
4052 "unsupported vect permute { ");
4053 for (i = 0; i < count; ++i)
4055 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4056 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
4058 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
4060 gcc_assert (analyze_only);
4061 return false;
4064 ++*n_perms;
4067 if (index == count)
4069 if (!analyze_only)
4071 tree mask_vec = NULL_TREE;
4073 if (! noop_p)
4074 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
4076 if (second_vec_index == -1)
4077 second_vec_index = first_vec_index;
4079 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4081 /* Generate the permute statement if necessary. */
4082 tree first_vec = dr_chain[first_vec_index + ri];
4083 tree second_vec = dr_chain[second_vec_index + ri];
4084 stmt_vec_info perm_stmt_info;
4085 if (! noop_p)
4087 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4088 tree perm_dest
4089 = vect_create_destination_var (gimple_assign_lhs (stmt),
4090 vectype);
4091 perm_dest = make_ssa_name (perm_dest);
4092 gassign *perm_stmt
4093 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4094 first_vec, second_vec,
4095 mask_vec);
4096 perm_stmt_info
4097 = vect_finish_stmt_generation (stmt_info, perm_stmt,
4098 gsi);
4100 else
4101 /* If mask was NULL_TREE generate the requested
4102 identity transform. */
4103 perm_stmt_info = vinfo->lookup_def (first_vec);
4105 /* Store the vector statement in NODE. */
4106 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4107 = perm_stmt_info;
4111 index = 0;
4112 first_vec_index = -1;
4113 second_vec_index = -1;
4114 noop_p = true;
4118 return true;
4121 /* Vectorize SLP instance tree in postorder. */
4123 static void
4124 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
4126 gimple_stmt_iterator si;
4127 stmt_vec_info stmt_info;
4128 unsigned int group_size;
4129 tree vectype;
4130 int i, j;
4131 slp_tree child;
4133 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4134 return;
4136 /* See if we have already vectorized the node in the graph of the
4137 SLP instance. */
4138 if (SLP_TREE_VEC_STMTS (node).exists ())
4139 return;
4141 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4142 vect_schedule_slp_instance (child, instance);
4144 /* Push SLP node def-type to stmts. */
4145 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4146 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4148 stmt_vec_info child_stmt_info;
4149 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4150 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4153 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4155 /* VECTYPE is the type of the destination. */
4156 vectype = STMT_VINFO_VECTYPE (stmt_info);
4157 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4158 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4160 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4161 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4163 if (dump_enabled_p ())
4164 dump_printf_loc (MSG_NOTE, vect_location,
4165 "------>vectorizing SLP node starting from: %G",
4166 stmt_info->stmt);
4168 /* Vectorized stmts go before the last scalar stmt which is where
4169 all uses are ready. */
4170 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4171 si = gsi_for_stmt (last_stmt_info->stmt);
4173 /* Handle two-operation SLP nodes by vectorizing the group with
4174 both operations and then performing a merge. */
4175 bool done_p = false;
4176 if (SLP_TREE_TWO_OPERATORS (node))
4178 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4179 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4180 enum tree_code ocode = ERROR_MARK;
4181 stmt_vec_info ostmt_info;
4182 vec_perm_builder mask (group_size, group_size, 1);
4183 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4185 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4186 if (gimple_assign_rhs_code (ostmt) != code0)
4188 mask.quick_push (1);
4189 ocode = gimple_assign_rhs_code (ostmt);
4191 else
4192 mask.quick_push (0);
4194 if (ocode != ERROR_MARK)
4196 vec<stmt_vec_info> v0;
4197 vec<stmt_vec_info> v1;
4198 unsigned j;
4199 tree tmask = NULL_TREE;
4200 vect_transform_stmt (stmt_info, &si, node, instance);
4201 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4202 SLP_TREE_VEC_STMTS (node).truncate (0);
4203 gimple_assign_set_rhs_code (stmt, ocode);
4204 vect_transform_stmt (stmt_info, &si, node, instance);
4205 gimple_assign_set_rhs_code (stmt, code0);
4206 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4207 SLP_TREE_VEC_STMTS (node).truncate (0);
4208 tree meltype = build_nonstandard_integer_type
4209 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4210 tree mvectype = get_same_sized_vectype (meltype, vectype);
4211 unsigned k = 0, l;
4212 for (j = 0; j < v0.length (); ++j)
4214 /* Enforced by vect_build_slp_tree, which rejects variable-length
4215 vectors for SLP_TREE_TWO_OPERATORS. */
4216 unsigned int const_nunits = nunits.to_constant ();
4217 tree_vector_builder melts (mvectype, const_nunits, 1);
4218 for (l = 0; l < const_nunits; ++l)
4220 if (k >= group_size)
4221 k = 0;
4222 tree t = build_int_cst (meltype,
4223 mask[k++] * const_nunits + l);
4224 melts.quick_push (t);
4226 tmask = melts.build ();
4228 /* ??? Not all targets support a VEC_PERM_EXPR with a
4229 constant mask that would translate to a vec_merge RTX
4230 (with their vec_perm_const_ok). We can either not
4231 vectorize in that case or let veclower do its job.
4232 Unfortunately that isn't too great and at least for
4233 plus/minus we'd eventually like to match targets
4234 vector addsub instructions. */
4235 gimple *vstmt;
4236 vstmt = gimple_build_assign (make_ssa_name (vectype),
4237 VEC_PERM_EXPR,
4238 gimple_assign_lhs (v0[j]->stmt),
4239 gimple_assign_lhs (v1[j]->stmt),
4240 tmask);
4241 SLP_TREE_VEC_STMTS (node).quick_push
4242 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4244 v0.release ();
4245 v1.release ();
4246 done_p = true;
4249 if (!done_p)
4250 vect_transform_stmt (stmt_info, &si, node, instance);
4252 /* Restore stmt def-types. */
4253 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4254 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4256 stmt_vec_info child_stmt_info;
4257 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4258 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4262 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4263 For loop vectorization this is done in vectorizable_call, but for SLP
4264 it needs to be deferred until end of vect_schedule_slp, because multiple
4265 SLP instances may refer to the same scalar stmt. */
4267 static void
4268 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4270 gimple *new_stmt;
4271 gimple_stmt_iterator gsi;
4272 int i;
4273 slp_tree child;
4274 tree lhs;
4275 stmt_vec_info stmt_info;
4277 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4278 return;
4280 if (visited.add (node))
4281 return;
4283 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4284 vect_remove_slp_scalar_calls (child, visited);
4286 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4288 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4289 if (!stmt || gimple_bb (stmt) == NULL)
4290 continue;
4291 if (is_pattern_stmt_p (stmt_info)
4292 || !PURE_SLP_STMT (stmt_info))
4293 continue;
4294 lhs = gimple_call_lhs (stmt);
4295 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4296 gsi = gsi_for_stmt (stmt);
4297 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4298 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4302 static void
4303 vect_remove_slp_scalar_calls (slp_tree node)
4305 hash_set<slp_tree> visited;
4306 vect_remove_slp_scalar_calls (node, visited);
4309 /* Vectorize the instance root. */
4311 void
4312 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4314 gassign *rstmt = NULL;
4316 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4318 stmt_vec_info child_stmt_info;
4319 int j;
4321 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4323 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4324 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4325 if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4326 TREE_TYPE (vect_lhs)))
4327 vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4328 vect_lhs);
4329 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4330 break;
4333 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4335 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4336 stmt_vec_info child_stmt_info;
4337 int j;
4338 vec<constructor_elt, va_gc> *v;
4339 vec_alloc (v, nelts);
4341 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4343 CONSTRUCTOR_APPEND_ELT (v,
4344 NULL_TREE,
4345 gimple_get_lhs (child_stmt_info->stmt));
4347 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4348 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4349 tree r_constructor = build_constructor (rtype, v);
4350 rstmt = gimple_build_assign (lhs, r_constructor);
4353 gcc_assert (rstmt);
4355 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4356 gsi_replace (&rgsi, rstmt, true);
4359 /* Generate vector code for all SLP instances in the loop/basic block. */
4361 void
4362 vect_schedule_slp (vec_info *vinfo)
4364 vec<slp_instance> slp_instances;
4365 slp_instance instance;
4366 unsigned int i;
4368 slp_instances = vinfo->slp_instances;
4369 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4371 slp_tree node = SLP_INSTANCE_TREE (instance);
4372 /* Schedule the tree of INSTANCE. */
4373 vect_schedule_slp_instance (node, instance);
4375 if (SLP_INSTANCE_ROOT_STMT (instance))
4376 vectorize_slp_instance_root_stmt (node, instance);
4378 if (dump_enabled_p ())
4379 dump_printf_loc (MSG_NOTE, vect_location,
4380 "vectorizing stmts using SLP.\n");
4383 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4385 slp_tree root = SLP_INSTANCE_TREE (instance);
4386 stmt_vec_info store_info;
4387 unsigned int j;
4389 /* Remove scalar call stmts. Do not do this for basic-block
4390 vectorization as not all uses may be vectorized.
4391 ??? Why should this be necessary? DCE should be able to
4392 remove the stmts itself.
4393 ??? For BB vectorization we can as well remove scalar
4394 stmts starting from the SLP tree root if they have no
4395 uses. */
4396 if (is_a <loop_vec_info> (vinfo))
4397 vect_remove_slp_scalar_calls (root);
4399 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4400 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4402 if (!STMT_VINFO_DATA_REF (store_info))
4403 break;
4405 if (SLP_INSTANCE_ROOT_STMT (instance))
4406 continue;
4408 store_info = vect_orig_stmt (store_info);
4409 /* Free the attached stmt_vec_info and remove the stmt. */
4410 vinfo->remove_stmt (store_info);