Invoke vectorizable_live_operation in a consistent way
[official-gcc.git] / gcc / tree-vect-slp.c
blob32174fe6bb02f187897553ecde73b4a07c4432b3
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
498 static bool
499 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
500 vec<gimple *> stmts, unsigned int group_size,
501 unsigned nops, unsigned int *max_nunits,
502 bool *matches, bool *two_operators)
504 unsigned int i;
505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
509 enum tree_code first_cond_code = ERROR_MARK;
510 tree lhs;
511 bool need_same_oprnds = false;
512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
513 optab optab;
514 int icode;
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
517 HOST_WIDE_INT dummy;
518 gimple *first_load = NULL, *prev_first_load = NULL;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts, i, stmt)
523 swap[i] = 0;
524 matches[i] = false;
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
541 /* Fatal mismatch. */
542 matches[0] = false;
543 return false;
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
556 /* Fatal mismatch. */
557 matches[0] = false;
558 return false;
561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
573 /* Fatal mismatch. */
574 matches[0] = false;
575 return false;
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a <bb_vec_info> (vinfo)
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
597 rhs_code = CALL_EXPR;
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
619 /* Check the operation. */
620 if (i == 0)
622 first_stmt_code = rhs_code;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
630 vec_mode = TYPE_MODE (vectype);
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
636 if (!optab
637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
643 if (!optab)
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
649 matches[0] = false;
650 return false;
652 icode = (int) optab_handler (optab, vec_mode);
653 if (icode == CODE_FOR_nothing)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
660 matches[0] = false;
661 return false;
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
677 else
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
686 || rhs_code != IMAGPART_EXPR)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
695 && (first_stmt_code == ARRAY_REF
696 || first_stmt_code == BIT_FIELD_REF
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
712 /* Mismatch. */
713 continue;
716 if (need_same_oprnds
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
726 /* Mismatch. */
727 continue;
730 if (rhs_code == CALL_EXPR)
732 gimple *first_stmt = stmts[0];
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
746 /* Mismatch. */
747 continue;
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
755 if (REFERENCE_CLASS_P (lhs))
757 /* Store. */
760 else
762 /* Load. */
763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
764 if (prev_first_load)
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load != first_load)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
779 /* Mismatch. */
780 continue;
783 else
784 prev_first_load = first_load;
786 } /* Grouped access. */
787 else
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
801 matches[0] = false;
802 return false;
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
810 && rhs_code != CALL_EXPR)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
824 if (rhs_code == COND_EXPR)
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
840 if (first_cond_code == cond_code)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
853 "Build SLP failed: different"
854 " operation");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 stmt, 0);
858 /* Mismatch. */
859 continue;
864 matches[i] = true;
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
876 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
877 auto_vec_perm_indices sel (count);
878 for (i = 0; i < count; ++i)
880 unsigned int elt = i;
881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
882 elt += count;
883 sel.quick_push (elt);
885 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
887 for (i = 0; i < group_size; ++i)
888 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
890 matches[i] = false;
891 if (dump_enabled_p ())
893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
894 "Build SLP failed: different operation "
895 "in stmt ");
896 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
897 stmts[i], 0);
898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
899 "original stmt ");
900 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
901 first_stmt, 0);
904 return false;
906 *two_operators = true;
909 return true;
912 /* Traits for the hash_set to record failed SLP builds for a stmt set.
913 Note we never remove apart from at destruction time so we do not
914 need a special value for deleted that differs from empty. */
915 struct bst_traits
917 typedef vec <gimple *> value_type;
918 typedef vec <gimple *> compare_type;
919 static inline hashval_t hash (value_type);
920 static inline bool equal (value_type existing, value_type candidate);
921 static inline bool is_empty (value_type x) { return !x.exists (); }
922 static inline bool is_deleted (value_type x) { return !x.exists (); }
923 static inline void mark_empty (value_type &x) { x.release (); }
924 static inline void mark_deleted (value_type &x) { x.release (); }
925 static inline void remove (value_type &x) { x.release (); }
927 inline hashval_t
928 bst_traits::hash (value_type x)
930 inchash::hash h;
931 for (unsigned i = 0; i < x.length (); ++i)
932 h.add_int (gimple_uid (x[i]));
933 return h.end ();
935 inline bool
936 bst_traits::equal (value_type existing, value_type candidate)
938 if (existing.length () != candidate.length ())
939 return false;
940 for (unsigned i = 0; i < existing.length (); ++i)
941 if (existing[i] != candidate[i])
942 return false;
943 return true;
946 static hash_set <vec <gimple *>, bst_traits> *bst_fail;
948 static slp_tree
949 vect_build_slp_tree_2 (vec_info *vinfo,
950 vec<gimple *> stmts, unsigned int group_size,
951 unsigned int *max_nunits,
952 vec<slp_tree> *loads,
953 bool *matches, unsigned *npermutes, unsigned *tree_size,
954 unsigned max_tree_size);
956 static slp_tree
957 vect_build_slp_tree (vec_info *vinfo,
958 vec<gimple *> stmts, unsigned int group_size,
959 unsigned int *max_nunits,
960 vec<slp_tree> *loads,
961 bool *matches, unsigned *npermutes, unsigned *tree_size,
962 unsigned max_tree_size)
964 if (bst_fail->contains (stmts))
965 return NULL;
966 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
967 loads, matches, npermutes, tree_size,
968 max_tree_size);
969 /* When SLP build fails for stmts record this, otherwise SLP build
970 can be exponential in time when we allow to construct parts from
971 scalars, see PR81723. */
972 if (! res)
974 vec <gimple *> x;
975 x.create (stmts.length ());
976 x.splice (stmts);
977 bst_fail->add (x);
979 return res;
982 /* Recursively build an SLP tree starting from NODE.
983 Fail (and return a value not equal to zero) if def-stmts are not
984 isomorphic, require data permutation or are of unsupported types of
985 operation. Otherwise, return 0.
986 The value returned is the depth in the SLP tree where a mismatch
987 was found. */
989 static slp_tree
990 vect_build_slp_tree_2 (vec_info *vinfo,
991 vec<gimple *> stmts, unsigned int group_size,
992 unsigned int *max_nunits,
993 vec<slp_tree> *loads,
994 bool *matches, unsigned *npermutes, unsigned *tree_size,
995 unsigned max_tree_size)
997 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
998 gimple *stmt;
999 slp_tree node;
1001 matches[0] = false;
1003 stmt = stmts[0];
1004 if (is_gimple_call (stmt))
1005 nops = gimple_call_num_args (stmt);
1006 else if (is_gimple_assign (stmt))
1008 nops = gimple_num_ops (stmt) - 1;
1009 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1010 nops++;
1012 else if (gimple_code (stmt) == GIMPLE_PHI)
1013 nops = 0;
1014 else
1015 return NULL;
1017 /* If the SLP node is a PHI (induction or reduction), terminate
1018 the recursion. */
1019 if (gimple_code (stmt) == GIMPLE_PHI)
1021 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1022 /* Induction from different IVs is not supported. */
1023 if (def_type == vect_induction_def)
1025 FOR_EACH_VEC_ELT (stmts, i, stmt)
1026 if (stmt != stmts[0])
1027 return NULL;
1029 else
1031 /* Else def types have to match. */
1032 FOR_EACH_VEC_ELT (stmts, i, stmt)
1034 /* But for reduction chains only check on the first stmt. */
1035 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1036 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1037 continue;
1038 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1039 return NULL;
1042 node = vect_create_new_slp_node (stmts);
1043 return node;
1047 bool two_operators = false;
1048 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1049 if (!vect_build_slp_tree_1 (vinfo, swap,
1050 stmts, group_size, nops,
1051 &this_max_nunits, matches, &two_operators))
1052 return NULL;
1054 /* If the SLP node is a load, terminate the recursion. */
1055 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1056 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1058 *max_nunits = this_max_nunits;
1059 node = vect_create_new_slp_node (stmts);
1060 loads->safe_push (node);
1061 return node;
1064 /* Get at the operands, verifying they are compatible. */
1065 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1066 slp_oprnd_info oprnd_info;
1067 FOR_EACH_VEC_ELT (stmts, i, stmt)
1069 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1070 stmt, i, &oprnds_info);
1071 if (res != 0)
1072 matches[(res == -1) ? 0 : i] = false;
1073 if (!matches[0])
1074 break;
1076 for (i = 0; i < group_size; ++i)
1077 if (!matches[i])
1079 vect_free_oprnd_info (oprnds_info);
1080 return NULL;
1083 auto_vec<slp_tree, 4> children;
1084 auto_vec<slp_tree> this_loads;
1086 stmt = stmts[0];
1088 if (tree_size)
1089 max_tree_size -= *tree_size;
1091 /* Create SLP_TREE nodes for the definition node/s. */
1092 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1094 slp_tree child;
1095 unsigned old_nloads = this_loads.length ();
1096 unsigned old_tree_size = this_tree_size;
1097 unsigned int j;
1099 if (oprnd_info->first_dt != vect_internal_def
1100 && oprnd_info->first_dt != vect_reduction_def
1101 && oprnd_info->first_dt != vect_induction_def)
1102 continue;
1104 if (++this_tree_size > max_tree_size)
1106 FOR_EACH_VEC_ELT (children, j, child)
1107 vect_free_slp_tree (child);
1108 vect_free_oprnd_info (oprnds_info);
1109 return NULL;
1112 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1113 group_size, &this_max_nunits,
1114 &this_loads, matches, npermutes,
1115 &this_tree_size,
1116 max_tree_size)) != NULL)
1118 /* If we have all children of child built up from scalars then just
1119 throw that away and build it up this node from scalars. */
1120 if (!SLP_TREE_CHILDREN (child).is_empty ()
1121 /* ??? Rejecting patterns this way doesn't work. We'd have to
1122 do extra work to cancel the pattern so the uses see the
1123 scalar version. */
1124 && !is_pattern_stmt_p
1125 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1127 slp_tree grandchild;
1129 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1130 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1131 break;
1132 if (!grandchild)
1134 /* Roll back. */
1135 this_loads.truncate (old_nloads);
1136 this_tree_size = old_tree_size;
1137 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1138 vect_free_slp_tree (grandchild);
1139 SLP_TREE_CHILDREN (child).truncate (0);
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "Building parent vector operands from "
1143 "scalars instead\n");
1144 oprnd_info->def_stmts = vNULL;
1145 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1146 children.safe_push (child);
1147 continue;
1151 oprnd_info->def_stmts = vNULL;
1152 children.safe_push (child);
1153 continue;
1156 /* If the SLP build failed fatally and we analyze a basic-block
1157 simply treat nodes we fail to build as externally defined
1158 (and thus build vectors from the scalar defs).
1159 The cost model will reject outright expensive cases.
1160 ??? This doesn't treat cases where permutation ultimatively
1161 fails (or we don't try permutation below). Ideally we'd
1162 even compute a permutation that will end up with the maximum
1163 SLP tree size... */
1164 if (is_a <bb_vec_info> (vinfo)
1165 && !matches[0]
1166 /* ??? Rejecting patterns this way doesn't work. We'd have to
1167 do extra work to cancel the pattern so the uses see the
1168 scalar version. */
1169 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1171 dump_printf_loc (MSG_NOTE, vect_location,
1172 "Building vector operands from scalars\n");
1173 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1174 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1175 children.safe_push (child);
1176 oprnd_info->def_stmts = vNULL;
1177 continue;
1180 /* If the SLP build for operand zero failed and operand zero
1181 and one can be commutated try that for the scalar stmts
1182 that failed the match. */
1183 if (i == 0
1184 /* A first scalar stmt mismatch signals a fatal mismatch. */
1185 && matches[0]
1186 /* ??? For COND_EXPRs we can swap the comparison operands
1187 as well as the arms under some constraints. */
1188 && nops == 2
1189 && oprnds_info[1]->first_dt == vect_internal_def
1190 && is_gimple_assign (stmt)
1191 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1192 && ! two_operators
1193 /* Do so only if the number of not successful permutes was nor more
1194 than a cut-ff as re-trying the recursive match on
1195 possibly each level of the tree would expose exponential
1196 behavior. */
1197 && *npermutes < 4)
1199 /* Verify if we can safely swap or if we committed to a specific
1200 operand order already. */
1201 for (j = 0; j < group_size; ++j)
1202 if (!matches[j]
1203 && (swap[j] != 0
1204 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1206 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209 "Build SLP failed: cannot swap operands "
1210 "of shared stmt ");
1211 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1212 stmts[j], 0);
1214 goto fail;
1217 /* Swap mismatched definition stmts. */
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "Re-trying with swapped operands of stmts ");
1220 for (j = 0; j < group_size; ++j)
1221 if (!matches[j])
1223 std::swap (oprnds_info[0]->def_stmts[j],
1224 oprnds_info[1]->def_stmts[j]);
1225 dump_printf (MSG_NOTE, "%d ", j);
1227 dump_printf (MSG_NOTE, "\n");
1228 /* And try again with scratch 'matches' ... */
1229 bool *tem = XALLOCAVEC (bool, group_size);
1230 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1231 group_size, &this_max_nunits,
1232 &this_loads, tem, npermutes,
1233 &this_tree_size,
1234 max_tree_size)) != NULL)
1236 /* ... so if successful we can apply the operand swapping
1237 to the GIMPLE IL. This is necessary because for example
1238 vect_get_slp_defs uses operand indexes and thus expects
1239 canonical operand order. This is also necessary even
1240 if we end up building the operand from scalars as
1241 we'll continue to process swapped operand two. */
1242 for (j = 0; j < group_size; ++j)
1244 gimple *stmt = stmts[j];
1245 gimple_set_plf (stmt, GF_PLF_1, false);
1247 for (j = 0; j < group_size; ++j)
1249 gimple *stmt = stmts[j];
1250 if (!matches[j])
1252 /* Avoid swapping operands twice. */
1253 if (gimple_plf (stmt, GF_PLF_1))
1254 continue;
1255 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1256 gimple_assign_rhs2_ptr (stmt));
1257 gimple_set_plf (stmt, GF_PLF_1, true);
1260 /* Verify we swap all duplicates or none. */
1261 if (flag_checking)
1262 for (j = 0; j < group_size; ++j)
1264 gimple *stmt = stmts[j];
1265 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1268 /* If we have all children of child built up from scalars then
1269 just throw that away and build it up this node from scalars. */
1270 if (!SLP_TREE_CHILDREN (child).is_empty ()
1271 /* ??? Rejecting patterns this way doesn't work. We'd have
1272 to do extra work to cancel the pattern so the uses see the
1273 scalar version. */
1274 && !is_pattern_stmt_p
1275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1277 unsigned int j;
1278 slp_tree grandchild;
1280 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1281 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1282 break;
1283 if (!grandchild)
1285 /* Roll back. */
1286 this_loads.truncate (old_nloads);
1287 this_tree_size = old_tree_size;
1288 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1289 vect_free_slp_tree (grandchild);
1290 SLP_TREE_CHILDREN (child).truncate (0);
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "Building parent vector operands from "
1294 "scalars instead\n");
1295 oprnd_info->def_stmts = vNULL;
1296 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1297 children.safe_push (child);
1298 continue;
1302 oprnd_info->def_stmts = vNULL;
1303 children.safe_push (child);
1304 continue;
1307 ++*npermutes;
1310 fail:
1311 gcc_assert (child == NULL);
1312 FOR_EACH_VEC_ELT (children, j, child)
1313 vect_free_slp_tree (child);
1314 vect_free_oprnd_info (oprnds_info);
1315 return NULL;
1318 vect_free_oprnd_info (oprnds_info);
1320 if (tree_size)
1321 *tree_size += this_tree_size;
1322 *max_nunits = this_max_nunits;
1323 loads->safe_splice (this_loads);
1325 node = vect_create_new_slp_node (stmts);
1326 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1327 SLP_TREE_CHILDREN (node).splice (children);
1328 return node;
1331 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1333 static void
1334 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1336 int i;
1337 gimple *stmt;
1338 slp_tree child;
1340 dump_printf_loc (dump_kind, loc, "node%s\n",
1341 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1342 ? " (external)" : "");
1343 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1345 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1346 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1348 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1349 vect_print_slp_tree (dump_kind, loc, child);
1353 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1354 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1355 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1356 stmts in NODE are to be marked. */
1358 static void
1359 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1361 int i;
1362 gimple *stmt;
1363 slp_tree child;
1365 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1366 return;
1368 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1369 if (j < 0 || i == j)
1370 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1373 vect_mark_slp_stmts (child, mark, j);
1377 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1379 static void
1380 vect_mark_slp_stmts_relevant (slp_tree node)
1382 int i;
1383 gimple *stmt;
1384 stmt_vec_info stmt_info;
1385 slp_tree child;
1387 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1388 return;
1390 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1392 stmt_info = vinfo_for_stmt (stmt);
1393 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1394 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1395 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1399 vect_mark_slp_stmts_relevant (child);
1403 /* Rearrange the statements of NODE according to PERMUTATION. */
1405 static void
1406 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1407 vec<unsigned> permutation)
1409 gimple *stmt;
1410 vec<gimple *> tmp_stmts;
1411 unsigned int i;
1412 slp_tree child;
1414 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1415 vect_slp_rearrange_stmts (child, group_size, permutation);
1417 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1418 tmp_stmts.create (group_size);
1419 tmp_stmts.quick_grow_cleared (group_size);
1421 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1422 tmp_stmts[permutation[i]] = stmt;
1424 SLP_TREE_SCALAR_STMTS (node).release ();
1425 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1429 /* Attempt to reorder stmts in a reduction chain so that we don't
1430 require any load permutation. Return true if that was possible,
1431 otherwise return false. */
1433 static bool
1434 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1436 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1437 unsigned int i, j;
1438 unsigned int lidx;
1439 slp_tree node, load;
1441 /* Compare all the permutation sequences to the first one. We know
1442 that at least one load is permuted. */
1443 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1444 if (!node->load_permutation.exists ())
1445 return false;
1446 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1448 if (!load->load_permutation.exists ())
1449 return false;
1450 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1451 if (lidx != node->load_permutation[j])
1452 return false;
1455 /* Check that the loads in the first sequence are different and there
1456 are no gaps between them. */
1457 auto_sbitmap load_index (group_size);
1458 bitmap_clear (load_index);
1459 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1461 if (lidx >= group_size)
1462 return false;
1463 if (bitmap_bit_p (load_index, lidx))
1464 return false;
1466 bitmap_set_bit (load_index, lidx);
1468 for (i = 0; i < group_size; i++)
1469 if (!bitmap_bit_p (load_index, i))
1470 return false;
1472 /* This permutation is valid for reduction. Since the order of the
1473 statements in the nodes is not important unless they are memory
1474 accesses, we can rearrange the statements in all the nodes
1475 according to the order of the loads. */
1476 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1477 node->load_permutation);
1479 /* We are done, no actual permutations need to be generated. */
1480 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1481 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1483 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1484 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1485 /* But we have to keep those permutations that are required because
1486 of handling of gaps. */
1487 if (unrolling_factor == 1
1488 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1489 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1490 SLP_TREE_LOAD_PERMUTATION (node).release ();
1491 else
1492 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1493 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1496 return true;
1499 /* Check if the required load permutations in the SLP instance
1500 SLP_INSTN are supported. */
1502 static bool
1503 vect_supported_load_permutation_p (slp_instance slp_instn)
1505 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1506 unsigned int i, j, k, next;
1507 slp_tree node;
1508 gimple *stmt, *load, *next_load;
1510 if (dump_enabled_p ())
1512 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1513 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1514 if (node->load_permutation.exists ())
1515 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1516 dump_printf (MSG_NOTE, "%d ", next);
1517 else
1518 for (k = 0; k < group_size; ++k)
1519 dump_printf (MSG_NOTE, "%d ", k);
1520 dump_printf (MSG_NOTE, "\n");
1523 /* In case of reduction every load permutation is allowed, since the order
1524 of the reduction statements is not important (as opposed to the case of
1525 grouped stores). The only condition we need to check is that all the
1526 load nodes are of the same size and have the same permutation (and then
1527 rearrange all the nodes of the SLP instance according to this
1528 permutation). */
1530 /* Check that all the load nodes are of the same size. */
1531 /* ??? Can't we assert this? */
1532 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1533 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1534 return false;
1536 node = SLP_INSTANCE_TREE (slp_instn);
1537 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1539 /* Reduction (there are no data-refs in the root).
1540 In reduction chain the order of the loads is not important. */
1541 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1542 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1543 vect_attempt_slp_rearrange_stmts (slp_instn);
1545 /* In basic block vectorization we allow any subchain of an interleaving
1546 chain.
1547 FORNOW: not supported in loop SLP because of realignment compications. */
1548 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1550 /* Check whether the loads in an instance form a subchain and thus
1551 no permutation is necessary. */
1552 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1554 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1555 continue;
1556 bool subchain_p = true;
1557 next_load = NULL;
1558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1560 if (j != 0
1561 && (next_load != load
1562 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1564 subchain_p = false;
1565 break;
1567 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1569 if (subchain_p)
1570 SLP_TREE_LOAD_PERMUTATION (node).release ();
1571 else
1573 stmt_vec_info group_info
1574 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1575 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1576 unsigned nunits
1577 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1578 unsigned k, maxk = 0;
1579 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1580 if (k > maxk)
1581 maxk = k;
1582 /* In BB vectorization we may not actually use a loaded vector
1583 accessing elements in excess of GROUP_SIZE. */
1584 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1587 "BB vectorization with gaps at the end of "
1588 "a load is not supported\n");
1589 return false;
1592 /* Verify the permutation can be generated. */
1593 vec<tree> tem;
1594 unsigned n_perms;
1595 if (!vect_transform_slp_perm_load (node, tem, NULL,
1596 1, slp_instn, true, &n_perms))
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1599 vect_location,
1600 "unsupported load permutation\n");
1601 return false;
1605 return true;
1608 /* For loop vectorization verify we can generate the permutation. */
1609 unsigned n_perms;
1610 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1611 if (node->load_permutation.exists ()
1612 && !vect_transform_slp_perm_load
1613 (node, vNULL, NULL,
1614 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1615 &n_perms))
1616 return false;
1618 return true;
1622 /* Find the last store in SLP INSTANCE. */
1624 gimple *
1625 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1627 gimple *last = NULL, *stmt;
1629 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1631 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1632 if (is_pattern_stmt_p (stmt_vinfo))
1633 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1634 else
1635 last = get_later_stmt (stmt, last);
1638 return last;
1641 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1643 static void
1644 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1645 stmt_vector_for_cost *prologue_cost_vec,
1646 stmt_vector_for_cost *body_cost_vec,
1647 unsigned ncopies_for_cost)
1649 unsigned i, j;
1650 slp_tree child;
1651 gimple *stmt;
1652 stmt_vec_info stmt_info;
1653 tree lhs;
1655 /* Recurse down the SLP tree. */
1656 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1657 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1658 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1659 body_cost_vec, ncopies_for_cost);
1661 /* Look at the first scalar stmt to determine the cost. */
1662 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1663 stmt_info = vinfo_for_stmt (stmt);
1664 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1666 vect_memory_access_type memory_access_type
1667 = (STMT_VINFO_STRIDED_P (stmt_info)
1668 ? VMAT_STRIDED_SLP
1669 : VMAT_CONTIGUOUS);
1670 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1671 vect_model_store_cost (stmt_info, ncopies_for_cost,
1672 memory_access_type, vect_uninitialized_def,
1673 node, prologue_cost_vec, body_cost_vec);
1674 else
1676 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1677 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1679 /* If the load is permuted then the alignment is determined by
1680 the first group element not by the first scalar stmt DR. */
1681 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1682 stmt_info = vinfo_for_stmt (stmt);
1683 /* Record the cost for the permutation. */
1684 unsigned n_perms;
1685 vect_transform_slp_perm_load (node, vNULL, NULL,
1686 ncopies_for_cost, instance, true,
1687 &n_perms);
1688 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1689 stmt_info, 0, vect_body);
1690 unsigned nunits
1691 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1692 /* And adjust the number of loads performed. This handles
1693 redundancies as well as loads that are later dead. */
1694 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1695 bitmap_clear (perm);
1696 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1697 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1698 ncopies_for_cost = 0;
1699 bool load_seen = false;
1700 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1702 if (i % nunits == 0)
1704 if (load_seen)
1705 ncopies_for_cost++;
1706 load_seen = false;
1708 if (bitmap_bit_p (perm, i))
1709 load_seen = true;
1711 if (load_seen)
1712 ncopies_for_cost++;
1713 gcc_assert (ncopies_for_cost
1714 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1715 + nunits - 1) / nunits);
1716 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1718 /* Record the cost for the vector loads. */
1719 vect_model_load_cost (stmt_info, ncopies_for_cost,
1720 memory_access_type, node, prologue_cost_vec,
1721 body_cost_vec);
1722 return;
1725 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1727 /* ncopies_for_cost is the number of IVs we generate. */
1728 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1729 stmt_info, 0, vect_body);
1731 /* Prologue cost for the initial values and step vector. */
1732 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1733 CONSTANT_CLASS_P
1734 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1735 (stmt_info))
1736 ? vector_load : vec_construct,
1737 stmt_info, 0, vect_prologue);
1738 record_stmt_cost (prologue_cost_vec, 1,
1739 CONSTANT_CLASS_P
1740 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1741 ? vector_load : vec_construct,
1742 stmt_info, 0, vect_prologue);
1744 /* ??? No easy way to get at the actual number of vector stmts
1745 to be geneated and thus the derived IVs. */
1747 else
1749 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1750 stmt_info, 0, vect_body);
1751 if (SLP_TREE_TWO_OPERATORS (node))
1753 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1754 stmt_info, 0, vect_body);
1755 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1756 stmt_info, 0, vect_body);
1760 /* Push SLP node def-type to stmts. */
1761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1762 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1764 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1766 /* Scan operands and account for prologue cost of constants/externals.
1767 ??? This over-estimates cost for multiple uses and should be
1768 re-engineered. */
1769 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1770 lhs = gimple_get_lhs (stmt);
1771 for (i = 0; i < gimple_num_ops (stmt); ++i)
1773 tree op = gimple_op (stmt, i);
1774 gimple *def_stmt;
1775 enum vect_def_type dt;
1776 if (!op || op == lhs)
1777 continue;
1778 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1780 /* Without looking at the actual initializer a vector of
1781 constants can be implemented as load from the constant pool.
1782 ??? We need to pass down stmt_info for a vector type
1783 even if it points to the wrong stmt. */
1784 if (dt == vect_constant_def)
1785 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1786 stmt_info, 0, vect_prologue);
1787 else if (dt == vect_external_def)
1788 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1789 stmt_info, 0, vect_prologue);
1793 /* Restore stmt def-types. */
1794 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1795 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1796 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1797 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1800 /* Compute the cost for the SLP instance INSTANCE. */
1802 static void
1803 vect_analyze_slp_cost (slp_instance instance, void *data)
1805 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1806 unsigned ncopies_for_cost;
1807 stmt_info_for_cost *si;
1808 unsigned i;
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_NOTE, vect_location,
1812 "=== vect_analyze_slp_cost ===\n");
1814 /* Calculate the number of vector stmts to create based on the unrolling
1815 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1816 GROUP_SIZE / NUNITS otherwise. */
1817 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1818 slp_tree node = SLP_INSTANCE_TREE (instance);
1819 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1820 /* Adjust the group_size by the vectorization factor which is always one
1821 for basic-block vectorization. */
1822 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1823 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1824 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1825 /* For reductions look at a reduction operand in case the reduction
1826 operation is widening like DOT_PROD or SAD. */
1827 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1830 switch (gimple_assign_rhs_code (stmt))
1832 case DOT_PROD_EXPR:
1833 case SAD_EXPR:
1834 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1835 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1836 break;
1837 default:;
1840 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1842 prologue_cost_vec.create (10);
1843 body_cost_vec.create (10);
1844 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1845 &prologue_cost_vec, &body_cost_vec,
1846 ncopies_for_cost);
1848 /* Record the prologue costs, which were delayed until we were
1849 sure that SLP was successful. */
1850 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1852 struct _stmt_vec_info *stmt_info
1853 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1854 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1855 si->misalign, vect_prologue);
1858 /* Record the instance's instructions in the target cost model. */
1859 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1861 struct _stmt_vec_info *stmt_info
1862 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1863 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1864 si->misalign, vect_body);
1867 prologue_cost_vec.release ();
1868 body_cost_vec.release ();
1871 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1872 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1873 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1874 containing the remainder.
1875 Return the first stmt in the second group. */
1877 static gimple *
1878 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1880 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1881 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1882 gcc_assert (group1_size > 0);
1883 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1884 gcc_assert (group2_size > 0);
1885 GROUP_SIZE (first_vinfo) = group1_size;
1887 gimple *stmt = first_stmt;
1888 for (unsigned i = group1_size; i > 1; i--)
1890 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1891 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1893 /* STMT is now the last element of the first group. */
1894 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1895 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1897 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1898 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1900 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1901 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1904 /* For the second group, the GROUP_GAP is that before the original group,
1905 plus skipping over the first vector. */
1906 GROUP_GAP (vinfo_for_stmt (group2)) =
1907 GROUP_GAP (first_vinfo) + group1_size;
1909 /* GROUP_GAP of the first group now has to skip over the second group too. */
1910 GROUP_GAP (first_vinfo) += group2_size;
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1914 group1_size, group2_size);
1916 return group2;
1919 /* Analyze an SLP instance starting from a group of grouped stores. Call
1920 vect_build_slp_tree to build a tree of packed stmts if possible.
1921 Return FALSE if it's impossible to SLP any stmt in the loop. */
1923 static bool
1924 vect_analyze_slp_instance (vec_info *vinfo,
1925 gimple *stmt, unsigned max_tree_size)
1927 slp_instance new_instance;
1928 slp_tree node;
1929 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1930 unsigned int unrolling_factor = 1, nunits;
1931 tree vectype, scalar_type = NULL_TREE;
1932 gimple *next;
1933 unsigned int i;
1934 unsigned int max_nunits = 0;
1935 vec<slp_tree> loads;
1936 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1937 vec<gimple *> scalar_stmts;
1939 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1941 if (dr)
1943 scalar_type = TREE_TYPE (DR_REF (dr));
1944 vectype = get_vectype_for_scalar_type (scalar_type);
1946 else
1948 gcc_assert (is_a <loop_vec_info> (vinfo));
1949 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1952 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1954 else
1956 gcc_assert (is_a <loop_vec_info> (vinfo));
1957 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1958 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1961 if (!vectype)
1963 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "Build SLP failed: unsupported data-type ");
1967 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1971 return false;
1973 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1975 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1976 scalar_stmts.create (group_size);
1977 next = stmt;
1978 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1980 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1981 while (next)
1983 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1984 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1985 scalar_stmts.safe_push (
1986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1987 else
1988 scalar_stmts.safe_push (next);
1989 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1991 /* Mark the first element of the reduction chain as reduction to properly
1992 transform the node. In the reduction analysis phase only the last
1993 element of the chain is marked as reduction. */
1994 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1995 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1997 else
1999 /* Collect reduction statements. */
2000 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2001 for (i = 0; reductions.iterate (i, &next); i++)
2002 scalar_stmts.safe_push (next);
2005 loads.create (group_size);
2007 /* Build the tree for the SLP instance. */
2008 bool *matches = XALLOCAVEC (bool, group_size);
2009 unsigned npermutes = 0;
2010 bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
2011 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2012 &max_nunits, &loads, matches, &npermutes,
2013 NULL, max_tree_size);
2014 delete bst_fail;
2015 if (node != NULL)
2017 /* Calculate the unrolling factor based on the smallest type. */
2018 unrolling_factor
2019 = least_common_multiple (max_nunits, group_size) / group_size;
2021 if (unrolling_factor != 1
2022 && is_a <bb_vec_info> (vinfo))
2025 if (max_nunits > group_size)
2027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2028 "Build SLP failed: store group "
2029 "size not a multiple of the vector size "
2030 "in basic block SLP\n");
2031 vect_free_slp_tree (node);
2032 loads.release ();
2033 return false;
2035 /* Fatal mismatch. */
2036 matches[group_size/max_nunits * max_nunits] = false;
2037 vect_free_slp_tree (node);
2038 loads.release ();
2040 else
2042 /* Create a new SLP instance. */
2043 new_instance = XNEW (struct _slp_instance);
2044 SLP_INSTANCE_TREE (new_instance) = node;
2045 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2046 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2047 SLP_INSTANCE_LOADS (new_instance) = loads;
2049 /* Compute the load permutation. */
2050 slp_tree load_node;
2051 bool loads_permuted = false;
2052 FOR_EACH_VEC_ELT (loads, i, load_node)
2054 vec<unsigned> load_permutation;
2055 int j;
2056 gimple *load, *first_stmt;
2057 bool this_load_permuted = false;
2058 load_permutation.create (group_size);
2059 first_stmt = GROUP_FIRST_ELEMENT
2060 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2061 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2063 int load_place = vect_get_place_in_interleaving_chain
2064 (load, first_stmt);
2065 gcc_assert (load_place != -1);
2066 if (load_place != j)
2067 this_load_permuted = true;
2068 load_permutation.safe_push (load_place);
2070 if (!this_load_permuted
2071 /* The load requires permutation when unrolling exposes
2072 a gap either because the group is larger than the SLP
2073 group-size or because there is a gap between the groups. */
2074 && (unrolling_factor == 1
2075 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2076 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2078 load_permutation.release ();
2079 continue;
2081 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2082 loads_permuted = true;
2085 if (loads_permuted)
2087 if (!vect_supported_load_permutation_p (new_instance))
2089 if (dump_enabled_p ())
2091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2092 "Build SLP failed: unsupported load "
2093 "permutation ");
2094 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2095 TDF_SLIM, stmt, 0);
2097 vect_free_slp_instance (new_instance);
2098 return false;
2102 /* If the loads and stores can be handled with load/store-lan
2103 instructions do not generate this SLP instance. */
2104 if (is_a <loop_vec_info> (vinfo)
2105 && loads_permuted
2106 && dr && vect_store_lanes_supported (vectype, group_size))
2108 slp_tree load_node;
2109 FOR_EACH_VEC_ELT (loads, i, load_node)
2111 gimple *first_stmt = GROUP_FIRST_ELEMENT
2112 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2114 /* Use SLP for strided accesses (or if we
2115 can't load-lanes). */
2116 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2117 || ! vect_load_lanes_supported
2118 (STMT_VINFO_VECTYPE (stmt_vinfo),
2119 GROUP_SIZE (stmt_vinfo)))
2120 break;
2122 if (i == loads.length ())
2124 if (dump_enabled_p ())
2125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2126 "Built SLP cancelled: can use "
2127 "load/store-lanes\n");
2128 vect_free_slp_instance (new_instance);
2129 return false;
2133 vinfo->slp_instances.safe_push (new_instance);
2135 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_NOTE, vect_location,
2138 "Final SLP tree for instance:\n");
2139 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2142 return true;
2145 else
2147 /* Failed to SLP. */
2148 /* Free the allocated memory. */
2149 scalar_stmts.release ();
2150 loads.release ();
2153 /* For basic block SLP, try to break the group up into multiples of the
2154 vector size. */
2155 if (is_a <bb_vec_info> (vinfo)
2156 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2157 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2159 /* We consider breaking the group only on VF boundaries from the existing
2160 start. */
2161 for (i = 0; i < group_size; i++)
2162 if (!matches[i]) break;
2164 if (i >= nunits && i < group_size)
2166 /* Split into two groups at the first vector boundary before i. */
2167 gcc_assert ((nunits & (nunits - 1)) == 0);
2168 unsigned group1_size = i & ~(nunits - 1);
2170 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2171 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2172 /* If the first non-match was in the middle of a vector,
2173 skip the rest of that vector. */
2174 if (group1_size < i)
2176 i = group1_size + nunits;
2177 if (i < group_size)
2178 rest = vect_split_slp_store_group (rest, nunits);
2180 if (i < group_size)
2181 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2182 return res;
2184 /* Even though the first vector did not all match, we might be able to SLP
2185 (some) of the remainder. FORNOW ignore this possibility. */
2188 return false;
2192 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2193 trees of packed scalar stmts if SLP is possible. */
2195 bool
2196 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2198 unsigned int i;
2199 gimple *first_element;
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2204 /* Find SLP sequences starting from groups of grouped stores. */
2205 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2206 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2208 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2210 if (loop_vinfo->reduction_chains.length () > 0)
2212 /* Find SLP sequences starting from reduction chains. */
2213 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2214 if (! vect_analyze_slp_instance (vinfo, first_element,
2215 max_tree_size))
2217 /* Dissolve reduction chain group. */
2218 gimple *next, *stmt = first_element;
2219 while (stmt)
2221 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2222 next = GROUP_NEXT_ELEMENT (vinfo);
2223 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2224 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2225 stmt = next;
2227 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2228 = vect_internal_def;
2232 /* Find SLP sequences starting from groups of reductions. */
2233 if (loop_vinfo->reductions.length () > 1)
2234 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2235 max_tree_size);
2238 return true;
2242 /* For each possible SLP instance decide whether to SLP it and calculate overall
2243 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2244 least one instance. */
2246 bool
2247 vect_make_slp_decision (loop_vec_info loop_vinfo)
2249 unsigned int i, unrolling_factor = 1;
2250 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2251 slp_instance instance;
2252 int decided_to_slp = 0;
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2256 "\n");
2258 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2260 /* FORNOW: SLP if you can. */
2261 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2262 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2264 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2265 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2266 loop-based vectorization. Such stmts will be marked as HYBRID. */
2267 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2268 decided_to_slp++;
2271 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2273 if (decided_to_slp && dump_enabled_p ())
2274 dump_printf_loc (MSG_NOTE, vect_location,
2275 "Decided to SLP %d instances. Unrolling factor %d\n",
2276 decided_to_slp, unrolling_factor);
2278 return (decided_to_slp > 0);
2282 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2283 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2285 static void
2286 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2288 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2289 imm_use_iterator imm_iter;
2290 gimple *use_stmt;
2291 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2292 slp_tree child;
2293 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2295 int j;
2297 /* Propagate hybrid down the SLP tree. */
2298 if (stype == hybrid)
2300 else if (HYBRID_SLP_STMT (stmt_vinfo))
2301 stype = hybrid;
2302 else
2304 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2305 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2306 /* If we get a pattern stmt here we have to use the LHS of the
2307 original stmt for immediate uses. */
2308 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2309 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2310 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2311 tree def;
2312 if (gimple_code (stmt) == GIMPLE_PHI)
2313 def = gimple_phi_result (stmt);
2314 else
2315 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2316 if (def)
2317 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2319 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2320 continue;
2321 use_vinfo = vinfo_for_stmt (use_stmt);
2322 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2323 && STMT_VINFO_RELATED_STMT (use_vinfo))
2324 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2325 if (!STMT_SLP_TYPE (use_vinfo)
2326 && (STMT_VINFO_RELEVANT (use_vinfo)
2327 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2328 && !(gimple_code (use_stmt) == GIMPLE_PHI
2329 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2331 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2334 "def in non-SLP stmt: ");
2335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2337 stype = hybrid;
2342 if (stype == hybrid
2343 && !HYBRID_SLP_STMT (stmt_vinfo))
2345 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2348 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2350 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2353 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2354 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2355 vect_detect_hybrid_slp_stmts (child, i, stype);
2358 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2360 static tree
2361 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2363 walk_stmt_info *wi = (walk_stmt_info *)data;
2364 struct loop *loopp = (struct loop *)wi->info;
2366 if (wi->is_lhs)
2367 return NULL_TREE;
2369 if (TREE_CODE (*tp) == SSA_NAME
2370 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2372 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2373 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2374 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2376 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2379 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2381 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2385 return NULL_TREE;
2388 static tree
2389 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2390 walk_stmt_info *)
2392 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2393 /* If the stmt is in a SLP instance then this isn't a reason
2394 to mark use definitions in other SLP instances as hybrid. */
2395 if (! STMT_SLP_TYPE (use_vinfo)
2396 && (STMT_VINFO_RELEVANT (use_vinfo)
2397 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2398 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2399 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2401 else
2402 *handled = true;
2403 return NULL_TREE;
2406 /* Find stmts that must be both vectorized and SLPed. */
2408 void
2409 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2411 unsigned int i;
2412 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2413 slp_instance instance;
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2417 "\n");
2419 /* First walk all pattern stmt in the loop and mark defs of uses as
2420 hybrid because immediate uses in them are not recorded. */
2421 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2423 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2424 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2425 gsi_next (&gsi))
2427 gimple *stmt = gsi_stmt (gsi);
2428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2429 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2431 walk_stmt_info wi;
2432 memset (&wi, 0, sizeof (wi));
2433 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2434 gimple_stmt_iterator gsi2
2435 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2436 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2437 vect_detect_hybrid_slp_1, &wi);
2438 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2439 vect_detect_hybrid_slp_2,
2440 vect_detect_hybrid_slp_1, &wi);
2445 /* Then walk the SLP instance trees marking stmts with uses in
2446 non-SLP stmts as hybrid, also propagating hybrid down the
2447 SLP tree, collecting the above info on-the-fly. */
2448 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2450 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2451 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2452 i, pure_slp);
2457 /* Initialize a bb_vec_info struct for the statements between
2458 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2460 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2461 gimple_stmt_iterator region_end_in)
2462 : vec_info (vec_info::bb, init_cost (NULL)),
2463 bb (gsi_bb (region_begin_in)),
2464 region_begin (region_begin_in),
2465 region_end (region_end_in)
2467 gimple_stmt_iterator gsi;
2469 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2470 gsi_next (&gsi))
2472 gimple *stmt = gsi_stmt (gsi);
2473 gimple_set_uid (stmt, 0);
2474 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2477 bb->aux = this;
2481 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2482 stmts in the basic block. */
2484 _bb_vec_info::~_bb_vec_info ()
2486 for (gimple_stmt_iterator si = region_begin;
2487 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2489 gimple *stmt = gsi_stmt (si);
2490 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2492 if (stmt_info)
2493 /* Free stmt_vec_info. */
2494 free_stmt_vec_info (stmt);
2496 /* Reset region marker. */
2497 gimple_set_uid (stmt, -1);
2500 bb->aux = NULL;
2504 /* Analyze statements contained in SLP tree node after recursively analyzing
2505 the subtree. Return TRUE if the operations are supported. */
2507 static bool
2508 vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance)
2510 bool dummy;
2511 int i, j;
2512 gimple *stmt;
2513 slp_tree child;
2515 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2516 return true;
2518 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2519 if (!vect_slp_analyze_node_operations (child, node_instance))
2520 return false;
2522 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2524 gcc_assert (stmt_info);
2525 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2527 /* For BB vectorization vector types are assigned here.
2528 Memory accesses already got their vector type assigned
2529 in vect_analyze_data_refs. */
2530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2531 if (bb_vinfo
2532 && ! STMT_VINFO_DATA_REF (stmt_info))
2534 gcc_assert (PURE_SLP_STMT (stmt_info));
2536 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2537 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_NOTE, vect_location,
2540 "get vectype for scalar type: ");
2541 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2542 dump_printf (MSG_NOTE, "\n");
2545 tree vectype = get_vectype_for_scalar_type (scalar_type);
2546 if (!vectype)
2548 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "not SLPed: unsupported data-type ");
2552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2553 scalar_type);
2554 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2556 return false;
2559 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2562 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2563 dump_printf (MSG_NOTE, "\n");
2566 gimple *sstmt;
2567 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2568 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2571 /* Push SLP node def-type to stmt operands. */
2572 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2573 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2574 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2575 = SLP_TREE_DEF_TYPE (child);
2576 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2577 /* Restore def-types. */
2578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2579 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2580 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2581 = vect_internal_def;
2582 if (! res)
2583 return false;
2585 return true;
2589 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2590 operations are supported. */
2592 bool
2593 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2595 slp_instance instance;
2596 int i;
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE, vect_location,
2600 "=== vect_slp_analyze_operations ===\n");
2602 for (i = 0; slp_instances.iterate (i, &instance); )
2604 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance),
2605 instance))
2607 dump_printf_loc (MSG_NOTE, vect_location,
2608 "removing SLP instance operations starting from: ");
2609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2610 SLP_TREE_SCALAR_STMTS
2611 (SLP_INSTANCE_TREE (instance))[0], 0);
2612 vect_free_slp_instance (instance);
2613 slp_instances.ordered_remove (i);
2615 else
2617 /* Compute the costs of the SLP instance. */
2618 vect_analyze_slp_cost (instance, data);
2619 i++;
2623 if (!slp_instances.length ())
2624 return false;
2626 return true;
2630 /* Compute the scalar cost of the SLP node NODE and its children
2631 and return it. Do not account defs that are marked in LIFE and
2632 update LIFE according to uses of NODE. */
2634 static unsigned
2635 vect_bb_slp_scalar_cost (basic_block bb,
2636 slp_tree node, vec<bool, va_heap> *life)
2638 unsigned scalar_cost = 0;
2639 unsigned i;
2640 gimple *stmt;
2641 slp_tree child;
2643 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2645 unsigned stmt_cost;
2646 ssa_op_iter op_iter;
2647 def_operand_p def_p;
2648 stmt_vec_info stmt_info;
2650 if ((*life)[i])
2651 continue;
2653 /* If there is a non-vectorized use of the defs then the scalar
2654 stmt is kept live in which case we do not account it or any
2655 required defs in the SLP children in the scalar cost. This
2656 way we make the vectorization more costly when compared to
2657 the scalar cost. */
2658 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2660 imm_use_iterator use_iter;
2661 gimple *use_stmt;
2662 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2663 if (!is_gimple_debug (use_stmt)
2664 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2665 use_stmt)
2666 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2668 (*life)[i] = true;
2669 BREAK_FROM_IMM_USE_STMT (use_iter);
2672 if ((*life)[i])
2673 continue;
2675 /* Count scalar stmts only once. */
2676 if (gimple_visited_p (stmt))
2677 continue;
2678 gimple_set_visited (stmt, true);
2680 stmt_info = vinfo_for_stmt (stmt);
2681 if (STMT_VINFO_DATA_REF (stmt_info))
2683 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2684 stmt_cost = vect_get_stmt_cost (scalar_load);
2685 else
2686 stmt_cost = vect_get_stmt_cost (scalar_store);
2688 else
2689 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2691 scalar_cost += stmt_cost;
2694 auto_vec<bool, 20> subtree_life;
2695 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2697 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2699 /* Do not directly pass LIFE to the recursive call, copy it to
2700 confine changes in the callee to the current child/subtree. */
2701 subtree_life.safe_splice (*life);
2702 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2703 subtree_life.truncate (0);
2707 return scalar_cost;
2710 /* Check if vectorization of the basic block is profitable. */
2712 static bool
2713 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2715 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2716 slp_instance instance;
2717 int i;
2718 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2719 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2721 /* Calculate scalar cost. */
2722 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2724 auto_vec<bool, 20> life;
2725 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2726 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2727 SLP_INSTANCE_TREE (instance),
2728 &life);
2731 /* Unset visited flag. */
2732 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2733 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2734 gimple_set_visited (gsi_stmt (gsi), false);
2736 /* Complete the target-specific cost calculation. */
2737 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2738 &vec_inside_cost, &vec_epilogue_cost);
2740 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2742 if (dump_enabled_p ())
2744 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2745 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2746 vec_inside_cost);
2747 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2748 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2749 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2752 /* Vectorization is profitable if its cost is more than the cost of scalar
2753 version. Note that we err on the vector side for equal cost because
2754 the cost estimate is otherwise quite pessimistic (constant uses are
2755 free on the scalar side but cost a load on the vector side for
2756 example). */
2757 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2758 return false;
2760 return true;
2763 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2764 if so and sets fatal to true if failure is independent of
2765 current_vector_size. */
2767 static bb_vec_info
2768 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2769 gimple_stmt_iterator region_end,
2770 vec<data_reference_p> datarefs, int n_stmts,
2771 bool &fatal)
2773 bb_vec_info bb_vinfo;
2774 slp_instance instance;
2775 int i;
2776 int min_vf = 2;
2778 /* The first group of checks is independent of the vector size. */
2779 fatal = true;
2781 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2785 "not vectorized: too many instructions in "
2786 "basic block.\n");
2787 free_data_refs (datarefs);
2788 return NULL;
2791 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2792 if (!bb_vinfo)
2793 return NULL;
2795 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2797 /* Analyze the data references. */
2799 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2801 if (dump_enabled_p ())
2802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2803 "not vectorized: unhandled data-ref in basic "
2804 "block.\n");
2806 delete bb_vinfo;
2807 return NULL;
2810 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2812 if (dump_enabled_p ())
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2814 "not vectorized: not enough data-refs in "
2815 "basic block.\n");
2817 delete bb_vinfo;
2818 return NULL;
2821 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2825 "not vectorized: unhandled data access in "
2826 "basic block.\n");
2828 delete bb_vinfo;
2829 return NULL;
2832 /* If there are no grouped stores in the region there is no need
2833 to continue with pattern recog as vect_analyze_slp will fail
2834 anyway. */
2835 if (bb_vinfo->grouped_stores.is_empty ())
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2839 "not vectorized: no grouped stores in "
2840 "basic block.\n");
2842 delete bb_vinfo;
2843 return NULL;
2846 /* While the rest of the analysis below depends on it in some way. */
2847 fatal = false;
2849 vect_pattern_recog (bb_vinfo);
2851 /* Check the SLP opportunities in the basic block, analyze and build SLP
2852 trees. */
2853 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2855 if (dump_enabled_p ())
2857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2858 "Failed to SLP the basic block.\n");
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "not vectorized: failed to find SLP opportunities "
2861 "in basic block.\n");
2864 delete bb_vinfo;
2865 return NULL;
2868 vect_record_base_alignments (bb_vinfo);
2870 /* Analyze and verify the alignment of data references and the
2871 dependence in the SLP instances. */
2872 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2874 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2875 || ! vect_slp_analyze_instance_dependence (instance))
2877 dump_printf_loc (MSG_NOTE, vect_location,
2878 "removing SLP instance operations starting from: ");
2879 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2880 SLP_TREE_SCALAR_STMTS
2881 (SLP_INSTANCE_TREE (instance))[0], 0);
2882 vect_free_slp_instance (instance);
2883 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2884 continue;
2887 /* Mark all the statements that we want to vectorize as pure SLP and
2888 relevant. */
2889 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2890 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2892 i++;
2894 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2896 delete bb_vinfo;
2897 return NULL;
2900 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2901 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2903 if (dump_enabled_p ())
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2905 "not vectorized: bad operation in basic block.\n");
2907 delete bb_vinfo;
2908 return NULL;
2911 /* Cost model: check if the vectorization is worthwhile. */
2912 if (!unlimited_cost_model (NULL)
2913 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2915 if (dump_enabled_p ())
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2917 "not vectorized: vectorization is not "
2918 "profitable.\n");
2920 delete bb_vinfo;
2921 return NULL;
2924 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_NOTE, vect_location,
2926 "Basic block will be vectorized using SLP\n");
2928 return bb_vinfo;
2932 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2933 true if anything in the basic-block was vectorized. */
2935 bool
2936 vect_slp_bb (basic_block bb)
2938 bb_vec_info bb_vinfo;
2939 gimple_stmt_iterator gsi;
2940 unsigned int vector_sizes;
2941 bool any_vectorized = false;
2943 if (dump_enabled_p ())
2944 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2946 /* Autodetect first vector size we try. */
2947 current_vector_size = 0;
2948 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2950 gsi = gsi_start_bb (bb);
2952 while (1)
2954 if (gsi_end_p (gsi))
2955 break;
2957 gimple_stmt_iterator region_begin = gsi;
2958 vec<data_reference_p> datarefs = vNULL;
2959 int insns = 0;
2961 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2963 gimple *stmt = gsi_stmt (gsi);
2964 if (is_gimple_debug (stmt))
2965 continue;
2966 insns++;
2968 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2969 vect_location = gimple_location (stmt);
2971 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2972 break;
2975 /* Skip leading unhandled stmts. */
2976 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2978 gsi_next (&gsi);
2979 continue;
2982 gimple_stmt_iterator region_end = gsi;
2984 bool vectorized = false;
2985 bool fatal = false;
2986 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2987 datarefs, insns, fatal);
2988 if (bb_vinfo
2989 && dbg_cnt (vect_slp))
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2994 vect_schedule_slp (bb_vinfo);
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE, vect_location,
2998 "basic block part vectorized\n");
3000 vectorized = true;
3002 delete bb_vinfo;
3004 any_vectorized |= vectorized;
3006 vector_sizes &= ~current_vector_size;
3007 if (vectorized
3008 || vector_sizes == 0
3009 || current_vector_size == 0
3010 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3011 vector sizes will fail do not bother iterating. */
3012 || fatal)
3014 if (gsi_end_p (region_end))
3015 break;
3017 /* Skip the unhandled stmt. */
3018 gsi_next (&gsi);
3020 /* And reset vector sizes. */
3021 current_vector_size = 0;
3022 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3024 else
3026 /* Try the next biggest vector size. */
3027 current_vector_size = 1 << floor_log2 (vector_sizes);
3028 if (dump_enabled_p ())
3029 dump_printf_loc (MSG_NOTE, vect_location,
3030 "***** Re-trying analysis with "
3031 "vector size %d\n", current_vector_size);
3033 /* Start over. */
3034 gsi = region_begin;
3038 return any_vectorized;
3042 /* Return 1 if vector type of boolean constant which is OPNUM
3043 operand in statement STMT is a boolean vector. */
3045 static bool
3046 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3048 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3049 enum tree_code code = gimple_expr_code (stmt);
3050 tree op, vectype;
3051 gimple *def_stmt;
3052 enum vect_def_type dt;
3054 /* For comparison and COND_EXPR type is chosen depending
3055 on the other comparison operand. */
3056 if (TREE_CODE_CLASS (code) == tcc_comparison)
3058 if (opnum)
3059 op = gimple_assign_rhs1 (stmt);
3060 else
3061 op = gimple_assign_rhs2 (stmt);
3063 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3064 &dt, &vectype))
3065 gcc_unreachable ();
3067 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3070 if (code == COND_EXPR)
3072 tree cond = gimple_assign_rhs1 (stmt);
3074 if (TREE_CODE (cond) == SSA_NAME)
3075 op = cond;
3076 else if (opnum)
3077 op = TREE_OPERAND (cond, 1);
3078 else
3079 op = TREE_OPERAND (cond, 0);
3081 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3082 &dt, &vectype))
3083 gcc_unreachable ();
3085 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3088 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3092 /* For constant and loop invariant defs of SLP_NODE this function returns
3093 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3094 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3095 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3096 REDUC_INDEX is the index of the reduction operand in the statements, unless
3097 it is -1. */
3099 static void
3100 vect_get_constant_vectors (tree op, slp_tree slp_node,
3101 vec<tree> *vec_oprnds,
3102 unsigned int op_num, unsigned int number_of_vectors)
3104 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3105 gimple *stmt = stmts[0];
3106 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3107 unsigned nunits;
3108 tree vec_cst;
3109 unsigned j, number_of_places_left_in_vector;
3110 tree vector_type;
3111 tree vop;
3112 int group_size = stmts.length ();
3113 unsigned int vec_num, i;
3114 unsigned number_of_copies = 1;
3115 vec<tree> voprnds;
3116 voprnds.create (number_of_vectors);
3117 bool constant_p, is_store;
3118 tree neutral_op = NULL;
3119 enum tree_code code = gimple_expr_code (stmt);
3120 gimple_seq ctor_seq = NULL;
3122 /* Check if vector type is a boolean vector. */
3123 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3124 && vect_mask_constant_operand_p (stmt, op_num))
3125 vector_type
3126 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3127 else
3128 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3129 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3131 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3133 is_store = true;
3134 op = gimple_assign_rhs1 (stmt);
3136 else
3137 is_store = false;
3139 gcc_assert (op);
3141 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3142 created vectors. It is greater than 1 if unrolling is performed.
3144 For example, we have two scalar operands, s1 and s2 (e.g., group of
3145 strided accesses of size two), while NUNITS is four (i.e., four scalars
3146 of this type can be packed in a vector). The output vector will contain
3147 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3148 will be 2).
3150 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3151 containing the operands.
3153 For example, NUNITS is four as before, and the group size is 8
3154 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3155 {s5, s6, s7, s8}. */
3157 number_of_copies = nunits * number_of_vectors / group_size;
3159 number_of_places_left_in_vector = nunits;
3160 constant_p = true;
3161 auto_vec<tree, 32> elts (nunits);
3162 elts.quick_grow (nunits);
3163 bool place_after_defs = false;
3164 for (j = 0; j < number_of_copies; j++)
3166 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3168 if (is_store)
3169 op = gimple_assign_rhs1 (stmt);
3170 else
3172 switch (code)
3174 case COND_EXPR:
3176 tree cond = gimple_assign_rhs1 (stmt);
3177 if (TREE_CODE (cond) == SSA_NAME)
3178 op = gimple_op (stmt, op_num + 1);
3179 else if (op_num == 0 || op_num == 1)
3180 op = TREE_OPERAND (cond, op_num);
3181 else
3183 if (op_num == 2)
3184 op = gimple_assign_rhs2 (stmt);
3185 else
3186 op = gimple_assign_rhs3 (stmt);
3189 break;
3191 case CALL_EXPR:
3192 op = gimple_call_arg (stmt, op_num);
3193 break;
3195 case LSHIFT_EXPR:
3196 case RSHIFT_EXPR:
3197 case LROTATE_EXPR:
3198 case RROTATE_EXPR:
3199 op = gimple_op (stmt, op_num + 1);
3200 /* Unlike the other binary operators, shifts/rotates have
3201 the shift count being int, instead of the same type as
3202 the lhs, so make sure the scalar is the right type if
3203 we are dealing with vectors of
3204 long long/long/short/char. */
3205 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3206 op = fold_convert (TREE_TYPE (vector_type), op);
3207 break;
3209 default:
3210 op = gimple_op (stmt, op_num + 1);
3211 break;
3215 /* Create 'vect_ = {op0,op1,...,opn}'. */
3216 number_of_places_left_in_vector--;
3217 tree orig_op = op;
3218 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3220 if (CONSTANT_CLASS_P (op))
3222 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3224 /* Can't use VIEW_CONVERT_EXPR for booleans because
3225 of possibly different sizes of scalar value and
3226 vector element. */
3227 if (integer_zerop (op))
3228 op = build_int_cst (TREE_TYPE (vector_type), 0);
3229 else if (integer_onep (op))
3230 op = build_all_ones_cst (TREE_TYPE (vector_type));
3231 else
3232 gcc_unreachable ();
3234 else
3235 op = fold_unary (VIEW_CONVERT_EXPR,
3236 TREE_TYPE (vector_type), op);
3237 gcc_assert (op && CONSTANT_CLASS_P (op));
3239 else
3241 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3242 gimple *init_stmt;
3243 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3245 tree true_val
3246 = build_all_ones_cst (TREE_TYPE (vector_type));
3247 tree false_val
3248 = build_zero_cst (TREE_TYPE (vector_type));
3249 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3250 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3251 op, true_val,
3252 false_val);
3254 else
3256 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3257 op);
3258 init_stmt
3259 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3260 op);
3262 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3263 op = new_temp;
3266 elts[number_of_places_left_in_vector] = op;
3267 if (!CONSTANT_CLASS_P (op))
3268 constant_p = false;
3269 if (TREE_CODE (orig_op) == SSA_NAME
3270 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3271 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3272 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3273 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3274 place_after_defs = true;
3276 if (number_of_places_left_in_vector == 0)
3278 if (constant_p)
3279 vec_cst = build_vector (vector_type, elts);
3280 else
3282 vec<constructor_elt, va_gc> *v;
3283 unsigned k;
3284 vec_alloc (v, nunits);
3285 for (k = 0; k < nunits; ++k)
3286 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3287 vec_cst = build_constructor (vector_type, v);
3289 tree init;
3290 gimple_stmt_iterator gsi;
3291 if (place_after_defs)
3293 gsi = gsi_for_stmt
3294 (vect_find_last_scalar_stmt_in_slp (slp_node));
3295 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3297 else
3298 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3299 if (ctor_seq != NULL)
3301 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3302 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3303 GSI_SAME_STMT);
3304 ctor_seq = NULL;
3306 voprnds.quick_push (init);
3307 place_after_defs = false;
3308 number_of_places_left_in_vector = nunits;
3309 constant_p = true;
3314 /* Since the vectors are created in the reverse order, we should invert
3315 them. */
3316 vec_num = voprnds.length ();
3317 for (j = vec_num; j != 0; j--)
3319 vop = voprnds[j - 1];
3320 vec_oprnds->quick_push (vop);
3323 voprnds.release ();
3325 /* In case that VF is greater than the unrolling factor needed for the SLP
3326 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3327 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3328 to replicate the vectors. */
3329 while (number_of_vectors > vec_oprnds->length ())
3331 tree neutral_vec = NULL;
3333 if (neutral_op)
3335 if (!neutral_vec)
3336 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3338 vec_oprnds->quick_push (neutral_vec);
3340 else
3342 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3343 vec_oprnds->quick_push (vop);
3349 /* Get vectorized definitions from SLP_NODE that contains corresponding
3350 vectorized def-stmts. */
3352 static void
3353 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3355 tree vec_oprnd;
3356 gimple *vec_def_stmt;
3357 unsigned int i;
3359 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3361 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3363 gcc_assert (vec_def_stmt);
3364 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3365 vec_oprnd = gimple_phi_result (vec_def_stmt);
3366 else
3367 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3368 vec_oprnds->quick_push (vec_oprnd);
3373 /* Get vectorized definitions for SLP_NODE.
3374 If the scalar definitions are loop invariants or constants, collect them and
3375 call vect_get_constant_vectors() to create vector stmts.
3376 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3377 must be stored in the corresponding child of SLP_NODE, and we call
3378 vect_get_slp_vect_defs () to retrieve them. */
3380 void
3381 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3382 vec<vec<tree> > *vec_oprnds)
3384 gimple *first_stmt;
3385 int number_of_vects = 0, i;
3386 unsigned int child_index = 0;
3387 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3388 slp_tree child = NULL;
3389 vec<tree> vec_defs;
3390 tree oprnd;
3391 bool vectorized_defs;
3393 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3394 FOR_EACH_VEC_ELT (ops, i, oprnd)
3396 /* For each operand we check if it has vectorized definitions in a child
3397 node or we need to create them (for invariants and constants). We
3398 check if the LHS of the first stmt of the next child matches OPRND.
3399 If it does, we found the correct child. Otherwise, we call
3400 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3401 to check this child node for the next operand. */
3402 vectorized_defs = false;
3403 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3405 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3407 /* We have to check both pattern and original def, if available. */
3408 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3410 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3411 gimple *related
3412 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3413 tree first_def_op;
3415 if (gimple_code (first_def) == GIMPLE_PHI)
3416 first_def_op = gimple_phi_result (first_def);
3417 else
3418 first_def_op = gimple_get_lhs (first_def);
3419 if (operand_equal_p (oprnd, first_def_op, 0)
3420 || (related
3421 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3423 /* The number of vector defs is determined by the number of
3424 vector statements in the node from which we get those
3425 statements. */
3426 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3427 vectorized_defs = true;
3428 child_index++;
3431 else
3432 child_index++;
3435 if (!vectorized_defs)
3437 if (i == 0)
3439 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3440 /* Number of vector stmts was calculated according to LHS in
3441 vect_schedule_slp_instance (), fix it by replacing LHS with
3442 RHS, if necessary. See vect_get_smallest_scalar_type () for
3443 details. */
3444 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3445 &rhs_size_unit);
3446 if (rhs_size_unit != lhs_size_unit)
3448 number_of_vects *= rhs_size_unit;
3449 number_of_vects /= lhs_size_unit;
3454 /* Allocate memory for vectorized defs. */
3455 vec_defs = vNULL;
3456 vec_defs.create (number_of_vects);
3458 /* For reduction defs we call vect_get_constant_vectors (), since we are
3459 looking for initial loop invariant values. */
3460 if (vectorized_defs)
3461 /* The defs are already vectorized. */
3462 vect_get_slp_vect_defs (child, &vec_defs);
3463 else
3464 /* Build vectors from scalar defs. */
3465 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3466 number_of_vects);
3468 vec_oprnds->quick_push (vec_defs);
3472 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3473 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3474 permute statements for the SLP node NODE of the SLP instance
3475 SLP_NODE_INSTANCE. */
3477 bool
3478 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3479 gimple_stmt_iterator *gsi, int vf,
3480 slp_instance slp_node_instance, bool analyze_only,
3481 unsigned *n_perms)
3483 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3485 tree mask_element_type = NULL_TREE, mask_type;
3486 int nunits, vec_index = 0;
3487 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3488 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3489 int mask_element;
3490 machine_mode mode;
3492 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3493 return false;
3495 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3497 mode = TYPE_MODE (vectype);
3499 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3500 same size as the vector element being permuted. */
3501 mask_element_type = lang_hooks.types.type_for_mode
3502 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3503 mask_type = get_vectype_for_scalar_type (mask_element_type);
3504 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3505 auto_vec_perm_indices mask (nunits);
3506 mask.quick_grow (nunits);
3508 /* Initialize the vect stmts of NODE to properly insert the generated
3509 stmts later. */
3510 if (! analyze_only)
3511 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3512 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3513 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3515 /* Generate permutation masks for every NODE. Number of masks for each NODE
3516 is equal to GROUP_SIZE.
3517 E.g., we have a group of three nodes with three loads from the same
3518 location in each node, and the vector size is 4. I.e., we have a
3519 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3520 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3521 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3524 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3525 The last mask is illegal since we assume two operands for permute
3526 operation, and the mask element values can't be outside that range.
3527 Hence, the last mask must be converted into {2,5,5,5}.
3528 For the first two permutations we need the first and the second input
3529 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3530 we need the second and the third vectors: {b1,c1,a2,b2} and
3531 {c2,a3,b3,c3}. */
3533 int vect_stmts_counter = 0;
3534 int index = 0;
3535 int first_vec_index = -1;
3536 int second_vec_index = -1;
3537 bool noop_p = true;
3538 *n_perms = 0;
3540 for (int j = 0; j < vf; j++)
3542 for (int k = 0; k < group_size; k++)
3544 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3545 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3546 vec_index = i / nunits;
3547 mask_element = i % nunits;
3548 if (vec_index == first_vec_index
3549 || first_vec_index == -1)
3551 first_vec_index = vec_index;
3553 else if (vec_index == second_vec_index
3554 || second_vec_index == -1)
3556 second_vec_index = vec_index;
3557 mask_element += nunits;
3559 else
3561 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 "permutation requires at "
3565 "least three vectors ");
3566 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3567 stmt, 0);
3569 return false;
3572 gcc_assert (mask_element >= 0
3573 && mask_element < 2 * nunits);
3574 if (mask_element != index)
3575 noop_p = false;
3576 mask[index++] = mask_element;
3578 if (index == nunits)
3580 if (! noop_p
3581 && ! can_vec_perm_p (mode, false, &mask))
3583 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3586 vect_location,
3587 "unsupported vect permute { ");
3588 for (i = 0; i < nunits; ++i)
3589 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3590 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3592 return false;
3595 if (! noop_p)
3596 ++*n_perms;
3598 if (!analyze_only)
3600 tree mask_vec = NULL_TREE;
3602 if (! noop_p)
3604 auto_vec<tree, 32> mask_elts (nunits);
3605 for (int l = 0; l < nunits; ++l)
3606 mask_elts.quick_push (build_int_cst (mask_element_type,
3607 mask[l]));
3608 mask_vec = build_vector (mask_type, mask_elts);
3611 if (second_vec_index == -1)
3612 second_vec_index = first_vec_index;
3614 /* Generate the permute statement if necessary. */
3615 tree first_vec = dr_chain[first_vec_index];
3616 tree second_vec = dr_chain[second_vec_index];
3617 gimple *perm_stmt;
3618 if (! noop_p)
3620 tree perm_dest
3621 = vect_create_destination_var (gimple_assign_lhs (stmt),
3622 vectype);
3623 perm_dest = make_ssa_name (perm_dest);
3624 perm_stmt = gimple_build_assign (perm_dest,
3625 VEC_PERM_EXPR,
3626 first_vec, second_vec,
3627 mask_vec);
3628 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3630 else
3631 /* If mask was NULL_TREE generate the requested
3632 identity transform. */
3633 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3635 /* Store the vector statement in NODE. */
3636 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3639 index = 0;
3640 first_vec_index = -1;
3641 second_vec_index = -1;
3642 noop_p = true;
3647 return true;
3652 /* Vectorize SLP instance tree in postorder. */
3654 static bool
3655 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3656 unsigned int vectorization_factor)
3658 gimple *stmt;
3659 bool grouped_store, is_store;
3660 gimple_stmt_iterator si;
3661 stmt_vec_info stmt_info;
3662 unsigned int vec_stmts_size, nunits, group_size;
3663 tree vectype;
3664 int i, j;
3665 slp_tree child;
3667 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3668 return false;
3670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3671 vect_schedule_slp_instance (child, instance, vectorization_factor);
3673 /* Push SLP node def-type to stmts. */
3674 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3675 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3676 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3677 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3679 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3680 stmt_info = vinfo_for_stmt (stmt);
3682 /* VECTYPE is the type of the destination. */
3683 vectype = STMT_VINFO_VECTYPE (stmt_info);
3684 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3685 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3687 /* For each SLP instance calculate number of vector stmts to be created
3688 for the scalar stmts in each node of the SLP tree. Number of vector
3689 elements in one vector iteration is the number of scalar elements in
3690 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3691 size.
3692 Unless this is a SLP reduction in which case the number of vector
3693 stmts is equal to the number of vector stmts of the children. */
3694 if (GROUP_FIRST_ELEMENT (stmt_info)
3695 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3696 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3697 else
3698 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3700 if (!SLP_TREE_VEC_STMTS (node).exists ())
3702 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3703 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3706 if (dump_enabled_p ())
3708 dump_printf_loc (MSG_NOTE,vect_location,
3709 "------>vectorizing SLP node starting from: ");
3710 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3713 /* Vectorized stmts go before the last scalar stmt which is where
3714 all uses are ready. */
3715 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3717 /* Mark the first element of the reduction chain as reduction to properly
3718 transform the node. In the analysis phase only the last element of the
3719 chain is marked as reduction. */
3720 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3721 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3723 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3724 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3727 /* Handle two-operation SLP nodes by vectorizing the group with
3728 both operations and then performing a merge. */
3729 if (SLP_TREE_TWO_OPERATORS (node))
3731 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3732 enum tree_code ocode = ERROR_MARK;
3733 gimple *ostmt;
3734 auto_vec_perm_indices mask (group_size);
3735 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3736 if (gimple_assign_rhs_code (ostmt) != code0)
3738 mask.quick_push (1);
3739 ocode = gimple_assign_rhs_code (ostmt);
3741 else
3742 mask.quick_push (0);
3743 if (ocode != ERROR_MARK)
3745 vec<gimple *> v0;
3746 vec<gimple *> v1;
3747 unsigned j;
3748 tree tmask = NULL_TREE;
3749 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3750 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3751 SLP_TREE_VEC_STMTS (node).truncate (0);
3752 gimple_assign_set_rhs_code (stmt, ocode);
3753 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3754 gimple_assign_set_rhs_code (stmt, code0);
3755 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3756 SLP_TREE_VEC_STMTS (node).truncate (0);
3757 tree meltype = build_nonstandard_integer_type
3758 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3759 tree mvectype = get_same_sized_vectype (meltype, vectype);
3760 unsigned k = 0, l;
3761 for (j = 0; j < v0.length (); ++j)
3763 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3764 auto_vec<tree, 32> melts (nunits);
3765 for (l = 0; l < nunits; ++l)
3767 if (k >= group_size)
3768 k = 0;
3769 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3770 melts.quick_push (t);
3772 tmask = build_vector (mvectype, melts);
3774 /* ??? Not all targets support a VEC_PERM_EXPR with a
3775 constant mask that would translate to a vec_merge RTX
3776 (with their vec_perm_const_ok). We can either not
3777 vectorize in that case or let veclower do its job.
3778 Unfortunately that isn't too great and at least for
3779 plus/minus we'd eventually like to match targets
3780 vector addsub instructions. */
3781 gimple *vstmt;
3782 vstmt = gimple_build_assign (make_ssa_name (vectype),
3783 VEC_PERM_EXPR,
3784 gimple_assign_lhs (v0[j]),
3785 gimple_assign_lhs (v1[j]), tmask);
3786 vect_finish_stmt_generation (stmt, vstmt, &si);
3787 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3789 v0.release ();
3790 v1.release ();
3791 return false;
3794 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3796 /* Restore stmt def-types. */
3797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3798 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3799 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3800 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3802 return is_store;
3805 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3806 For loop vectorization this is done in vectorizable_call, but for SLP
3807 it needs to be deferred until end of vect_schedule_slp, because multiple
3808 SLP instances may refer to the same scalar stmt. */
3810 static void
3811 vect_remove_slp_scalar_calls (slp_tree node)
3813 gimple *stmt, *new_stmt;
3814 gimple_stmt_iterator gsi;
3815 int i;
3816 slp_tree child;
3817 tree lhs;
3818 stmt_vec_info stmt_info;
3820 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3821 return;
3823 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3824 vect_remove_slp_scalar_calls (child);
3826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3828 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3829 continue;
3830 stmt_info = vinfo_for_stmt (stmt);
3831 if (stmt_info == NULL
3832 || is_pattern_stmt_p (stmt_info)
3833 || !PURE_SLP_STMT (stmt_info))
3834 continue;
3835 lhs = gimple_call_lhs (stmt);
3836 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3837 set_vinfo_for_stmt (new_stmt, stmt_info);
3838 set_vinfo_for_stmt (stmt, NULL);
3839 STMT_VINFO_STMT (stmt_info) = new_stmt;
3840 gsi = gsi_for_stmt (stmt);
3841 gsi_replace (&gsi, new_stmt, false);
3842 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3846 /* Generate vector code for all SLP instances in the loop/basic block. */
3848 bool
3849 vect_schedule_slp (vec_info *vinfo)
3851 vec<slp_instance> slp_instances;
3852 slp_instance instance;
3853 unsigned int i, vf;
3854 bool is_store = false;
3856 slp_instances = vinfo->slp_instances;
3857 if (is_a <loop_vec_info> (vinfo))
3858 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3859 else
3860 vf = 1;
3862 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3864 /* Schedule the tree of INSTANCE. */
3865 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3866 instance, vf);
3867 if (dump_enabled_p ())
3868 dump_printf_loc (MSG_NOTE, vect_location,
3869 "vectorizing stmts using SLP.\n");
3872 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3874 slp_tree root = SLP_INSTANCE_TREE (instance);
3875 gimple *store;
3876 unsigned int j;
3877 gimple_stmt_iterator gsi;
3879 /* Remove scalar call stmts. Do not do this for basic-block
3880 vectorization as not all uses may be vectorized.
3881 ??? Why should this be necessary? DCE should be able to
3882 remove the stmts itself.
3883 ??? For BB vectorization we can as well remove scalar
3884 stmts starting from the SLP tree root if they have no
3885 uses. */
3886 if (is_a <loop_vec_info> (vinfo))
3887 vect_remove_slp_scalar_calls (root);
3889 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3890 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3892 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3893 break;
3895 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3896 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3897 /* Free the attached stmt_vec_info and remove the stmt. */
3898 gsi = gsi_for_stmt (store);
3899 unlink_stmt_vdef (store);
3900 gsi_remove (&gsi, true);
3901 release_defs (store);
3902 free_stmt_vec_info (store);
3906 return is_store;