Daily bump.
[official-gcc.git] / gcc / tree-vect-slp.c
blobde62c6d0c37fa7f54566113cce7bf81048d256d1
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimple-iterator.h"
46 #include "gimple-ssa.h"
47 #include "tree-phinodes.h"
48 #include "ssa-iterators.h"
49 #include "stringpool.h"
50 #include "tree-ssanames.h"
51 #include "tree-pass.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "recog.h" /* FIXME: for insn_data */
55 #include "insn-codes.h"
56 #include "optabs.h"
57 #include "tree-vectorizer.h"
58 #include "langhooks.h"
60 /* Extract the location of the basic block in the source code.
61 Return the basic block location if succeed and NULL if not. */
63 source_location
64 find_bb_location (basic_block bb)
66 gimple stmt = NULL;
67 gimple_stmt_iterator si;
69 if (!bb)
70 return UNKNOWN_LOCATION;
72 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
74 stmt = gsi_stmt (si);
75 if (gimple_location (stmt) != UNKNOWN_LOCATION)
76 return gimple_location (stmt);
79 return UNKNOWN_LOCATION;
83 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
85 static void
86 vect_free_slp_tree (slp_tree node)
88 int i;
89 slp_tree child;
91 if (!node)
92 return;
94 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
95 vect_free_slp_tree (child);
97 SLP_TREE_CHILDREN (node).release ();
98 SLP_TREE_SCALAR_STMTS (node).release ();
99 SLP_TREE_VEC_STMTS (node).release ();
100 SLP_TREE_LOAD_PERMUTATION (node).release ();
102 free (node);
106 /* Free the memory allocated for the SLP instance. */
108 void
109 vect_free_slp_instance (slp_instance instance)
111 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
112 SLP_INSTANCE_LOADS (instance).release ();
113 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
114 free (instance);
118 /* Create an SLP node for SCALAR_STMTS. */
120 static slp_tree
121 vect_create_new_slp_node (vec<gimple> scalar_stmts)
123 slp_tree node;
124 gimple stmt = scalar_stmts[0];
125 unsigned int nops;
127 if (is_gimple_call (stmt))
128 nops = gimple_call_num_args (stmt);
129 else if (is_gimple_assign (stmt))
131 nops = gimple_num_ops (stmt) - 1;
132 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
133 nops++;
135 else
136 return NULL;
138 node = XNEW (struct _slp_tree);
139 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
140 SLP_TREE_VEC_STMTS (node).create (0);
141 SLP_TREE_CHILDREN (node).create (nops);
142 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
144 return node;
148 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
149 operand. */
150 static vec<slp_oprnd_info>
151 vect_create_oprnd_info (int nops, int group_size)
153 int i;
154 slp_oprnd_info oprnd_info;
155 vec<slp_oprnd_info> oprnds_info;
157 oprnds_info.create (nops);
158 for (i = 0; i < nops; i++)
160 oprnd_info = XNEW (struct _slp_oprnd_info);
161 oprnd_info->def_stmts.create (group_size);
162 oprnd_info->first_dt = vect_uninitialized_def;
163 oprnd_info->first_op_type = NULL_TREE;
164 oprnd_info->first_pattern = false;
165 oprnds_info.quick_push (oprnd_info);
168 return oprnds_info;
172 /* Free operands info. */
174 static void
175 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
177 int i;
178 slp_oprnd_info oprnd_info;
180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
182 oprnd_info->def_stmts.release ();
183 XDELETE (oprnd_info);
186 oprnds_info.release ();
190 /* Find the place of the data-ref in STMT in the interleaving chain that starts
191 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 static int
194 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
196 gimple next_stmt = first_stmt;
197 int result = 0;
199 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
200 return -1;
204 if (next_stmt == stmt)
205 return result;
206 result++;
207 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
209 while (next_stmt);
211 return -1;
215 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
216 they are of a valid type and that they match the defs of the first stmt of
217 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
218 return -1, if the error could be corrected by swapping operands of the
219 operation return 1, if everything is ok return 0. */
221 static int
222 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
223 gimple stmt, bool first,
224 vec<slp_oprnd_info> *oprnds_info)
226 tree oprnd;
227 unsigned int i, number_of_oprnds;
228 tree def;
229 gimple def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 struct loop *loop = NULL;
232 bool pattern = false;
233 slp_oprnd_info oprnd_info;
234 int first_op_idx = 1;
235 bool commutative = false;
236 bool first_op_cond = false;
238 if (loop_vinfo)
239 loop = LOOP_VINFO_LOOP (loop_vinfo);
241 if (is_gimple_call (stmt))
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
246 else if (is_gimple_assign (stmt))
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
252 first_op_cond = true;
253 commutative = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = false;
263 for (i = 0; i < number_of_oprnds; i++)
265 again:
266 if (first_op_cond)
268 if (i == 0 || i == 1)
269 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
270 swapped ? !i : i);
271 else
272 oprnd = gimple_op (stmt, first_op_idx + i - 1);
274 else
275 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
277 oprnd_info = (*oprnds_info)[i];
279 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
280 &def, &dt)
281 || (!def_stmt && dt != vect_constant_def))
283 if (dump_enabled_p ())
285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
286 "Build SLP failed: can't find def for ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
288 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
291 return -1;
294 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
295 from the pattern. Check that all the stmts of the node are in the
296 pattern. */
297 if (def_stmt && gimple_bb (def_stmt)
298 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
299 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
300 && gimple_code (def_stmt) != GIMPLE_PHI))
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern)
309 if (i == 0
310 && !swapped
311 && commutative)
313 swapped = true;
314 goto again;
317 if (dump_enabled_p ())
319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
320 "Build SLP failed: some of the stmts"
321 " are in a pattern, and others are not ");
322 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
323 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
326 return 1;
329 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
330 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
332 if (dt == vect_unknown_def_type)
334 if (dump_enabled_p ())
335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
336 "Unsupported pattern.\n");
337 return -1;
340 switch (gimple_code (def_stmt))
342 case GIMPLE_PHI:
343 def = gimple_phi_result (def_stmt);
344 break;
346 case GIMPLE_ASSIGN:
347 def = gimple_assign_lhs (def_stmt);
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (first)
360 oprnd_info->first_dt = dt;
361 oprnd_info->first_pattern = pattern;
362 oprnd_info->first_op_type = TREE_TYPE (oprnd);
364 else
366 /* Not first stmt of the group, check that the def-stmt/s match
367 the def-stmt/s of the first stmt. Allow different definition
368 types for reduction chains: the first stmt must be a
369 vect_reduction_def (a phi node), and the rest
370 vect_internal_def. */
371 if (((oprnd_info->first_dt != dt
372 && !(oprnd_info->first_dt == vect_reduction_def
373 && dt == vect_internal_def)
374 && !((oprnd_info->first_dt == vect_external_def
375 || oprnd_info->first_dt == vect_constant_def)
376 && (dt == vect_external_def
377 || dt == vect_constant_def)))
378 || !types_compatible_p (oprnd_info->first_op_type,
379 TREE_TYPE (oprnd))))
381 /* Try swapping operands if we got a mismatch. */
382 if (i == 0
383 && !swapped
384 && commutative)
386 swapped = true;
387 goto again;
390 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
392 "Build SLP failed: different types\n");
394 return 1;
398 /* Check the types of the definitions. */
399 switch (dt)
401 case vect_constant_def:
402 case vect_external_def:
403 case vect_reduction_def:
404 break;
406 case vect_internal_def:
407 oprnd_info->def_stmts.quick_push (def_stmt);
408 break;
410 default:
411 /* FORNOW: Not supported. */
412 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: illegal type of def ");
416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
417 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
420 return -1;
424 /* Swap operands. */
425 if (swapped)
427 if (first_op_cond)
429 tree cond = gimple_assign_rhs1 (stmt);
430 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
431 &TREE_OPERAND (cond, 1));
432 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
434 else
435 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
436 gimple_assign_rhs2_ptr (stmt));
439 return 0;
443 /* Verify if the scalar stmts STMTS are isomorphic, require data
444 permutation or are of unsupported types of operation. Return
445 true if they are, otherwise return false and indicate in *MATCHES
446 which stmts are not isomorphic to the first one. If MATCHES[0]
447 is false then this indicates the comparison could not be
448 carried out or the stmts will never be vectorized by SLP. */
450 static bool
451 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
452 vec<gimple> stmts, unsigned int group_size,
453 unsigned nops, unsigned int *max_nunits,
454 unsigned int vectorization_factor, bool *matches)
456 unsigned int i;
457 gimple stmt = stmts[0];
458 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
459 enum tree_code first_cond_code = ERROR_MARK;
460 tree lhs;
461 bool need_same_oprnds = false;
462 tree vectype, scalar_type, first_op1 = NULL_TREE;
463 optab optab;
464 int icode;
465 machine_mode optab_op2_mode;
466 machine_mode vec_mode;
467 struct data_reference *first_dr;
468 HOST_WIDE_INT dummy;
469 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
470 tree cond;
472 /* For every stmt in NODE find its def stmt/s. */
473 FOR_EACH_VEC_ELT (stmts, i, stmt)
475 matches[i] = false;
477 if (dump_enabled_p ())
479 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
480 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
481 dump_printf (MSG_NOTE, "\n");
484 /* Fail to vectorize statements marked as unvectorizable. */
485 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "Build SLP failed: unvectorizable statement ");
491 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
492 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
494 /* Fatal mismatch. */
495 matches[0] = false;
496 return false;
499 lhs = gimple_get_lhs (stmt);
500 if (lhs == NULL_TREE)
502 if (dump_enabled_p ())
504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
505 "Build SLP failed: not GIMPLE_ASSIGN nor "
506 "GIMPLE_CALL ");
507 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
508 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
510 /* Fatal mismatch. */
511 matches[0] = false;
512 return false;
515 if (is_gimple_assign (stmt)
516 && gimple_assign_rhs_code (stmt) == COND_EXPR
517 && (cond = gimple_assign_rhs1 (stmt))
518 && !COMPARISON_CLASS_P (cond))
520 if (dump_enabled_p ())
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523 "Build SLP failed: condition is not "
524 "comparison ");
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
526 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
528 /* Fatal mismatch. */
529 matches[0] = false;
530 return false;
533 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
534 vectype = get_vectype_for_scalar_type (scalar_type);
535 if (!vectype)
537 if (dump_enabled_p ())
539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "Build SLP failed: unsupported data-type ");
541 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
542 scalar_type);
543 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
545 /* Fatal mismatch. */
546 matches[0] = false;
547 return false;
550 /* In case of multiple types we need to detect the smallest type. */
551 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
553 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
554 if (bb_vinfo)
555 vectorization_factor = *max_nunits;
558 if (is_gimple_call (stmt))
560 rhs_code = CALL_EXPR;
561 if (gimple_call_internal_p (stmt)
562 || gimple_call_tail_p (stmt)
563 || gimple_call_noreturn_p (stmt)
564 || !gimple_call_nothrow_p (stmt)
565 || gimple_call_chain (stmt))
567 if (dump_enabled_p ())
569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570 "Build SLP failed: unsupported call type ");
571 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
572 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
574 /* Fatal mismatch. */
575 matches[0] = false;
576 return false;
579 else
580 rhs_code = gimple_assign_rhs_code (stmt);
582 /* Check the operation. */
583 if (i == 0)
585 first_stmt_code = rhs_code;
587 /* Shift arguments should be equal in all the packed stmts for a
588 vector shift with scalar shift operand. */
589 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
590 || rhs_code == LROTATE_EXPR
591 || rhs_code == RROTATE_EXPR)
593 vec_mode = TYPE_MODE (vectype);
595 /* First see if we have a vector/vector shift. */
596 optab = optab_for_tree_code (rhs_code, vectype,
597 optab_vector);
599 if (!optab
600 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
602 /* No vector/vector shift, try for a vector/scalar shift. */
603 optab = optab_for_tree_code (rhs_code, vectype,
604 optab_scalar);
606 if (!optab)
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
610 "Build SLP failed: no optab.\n");
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
615 icode = (int) optab_handler (optab, vec_mode);
616 if (icode == CODE_FOR_nothing)
618 if (dump_enabled_p ())
619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
620 "Build SLP failed: "
621 "op not supported by target.\n");
622 /* Fatal mismatch. */
623 matches[0] = false;
624 return false;
626 optab_op2_mode = insn_data[icode].operand[2].mode;
627 if (!VECTOR_MODE_P (optab_op2_mode))
629 need_same_oprnds = true;
630 first_op1 = gimple_assign_rhs2 (stmt);
634 else if (rhs_code == WIDEN_LSHIFT_EXPR)
636 need_same_oprnds = true;
637 first_op1 = gimple_assign_rhs2 (stmt);
640 else
642 if (first_stmt_code != rhs_code
643 && (first_stmt_code != IMAGPART_EXPR
644 || rhs_code != REALPART_EXPR)
645 && (first_stmt_code != REALPART_EXPR
646 || rhs_code != IMAGPART_EXPR)
647 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
648 && (first_stmt_code == ARRAY_REF
649 || first_stmt_code == BIT_FIELD_REF
650 || first_stmt_code == INDIRECT_REF
651 || first_stmt_code == COMPONENT_REF
652 || first_stmt_code == MEM_REF)))
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: different operation "
658 "in stmt ");
659 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
660 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
662 /* Mismatch. */
663 continue;
666 if (need_same_oprnds
667 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
672 "Build SLP failed: different shift "
673 "arguments in ");
674 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
675 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
677 /* Mismatch. */
678 continue;
681 if (rhs_code == CALL_EXPR)
683 gimple first_stmt = stmts[0];
684 if (gimple_call_num_args (stmt) != nops
685 || !operand_equal_p (gimple_call_fn (first_stmt),
686 gimple_call_fn (stmt), 0)
687 || gimple_call_fntype (first_stmt)
688 != gimple_call_fntype (stmt))
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
693 "Build SLP failed: different calls in ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
695 stmt, 0);
696 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
698 /* Mismatch. */
699 continue;
704 /* Grouped store or load. */
705 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
707 if (REFERENCE_CLASS_P (lhs))
709 /* Store. */
712 else
714 /* Load. */
715 unsigned unrolling_factor
716 = least_common_multiple
717 (*max_nunits, group_size) / group_size;
718 /* FORNOW: Check that there is no gap between the loads
719 and no gap between the groups when we need to load
720 multiple groups at once.
721 ??? We should enhance this to only disallow gaps
722 inside vectors. */
723 if ((unrolling_factor > 1
724 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
725 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
726 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
727 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
729 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "Build SLP failed: grouped "
733 "loads have gaps ");
734 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
735 stmt, 0);
736 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
738 /* Fatal mismatch. */
739 matches[0] = false;
740 return false;
743 /* Check that the size of interleaved loads group is not
744 greater than the SLP group size. */
745 unsigned ncopies
746 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
747 if (loop_vinfo
748 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
749 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
750 - GROUP_GAP (vinfo_for_stmt (stmt)))
751 > ncopies * group_size))
753 if (dump_enabled_p ())
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
756 "Build SLP failed: the number "
757 "of interleaved loads is greater than "
758 "the SLP group size ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
760 stmt, 0);
761 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
763 /* Fatal mismatch. */
764 matches[0] = false;
765 return false;
768 old_first_load = first_load;
769 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
770 if (prev_first_load)
772 /* Check that there are no loads from different interleaving
773 chains in the same node. */
774 if (prev_first_load != first_load)
776 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
779 vect_location,
780 "Build SLP failed: different "
781 "interleaving chains in one node ");
782 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
783 stmt, 0);
784 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
786 /* Mismatch. */
787 continue;
790 else
791 prev_first_load = first_load;
793 /* In some cases a group of loads is just the same load
794 repeated N times. Only analyze its cost once. */
795 if (first_load == stmt && old_first_load != first_load)
797 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
798 if (vect_supportable_dr_alignment (first_dr, false)
799 == dr_unaligned_unsupported)
801 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
804 vect_location,
805 "Build SLP failed: unsupported "
806 "unaligned load ");
807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
808 stmt, 0);
809 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
811 /* Fatal mismatch. */
812 matches[0] = false;
813 return false;
817 } /* Grouped access. */
818 else
820 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
822 /* Not grouped load. */
823 if (dump_enabled_p ())
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
826 "Build SLP failed: not grouped load ");
827 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
828 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
831 /* FORNOW: Not grouped loads are not supported. */
832 /* Fatal mismatch. */
833 matches[0] = false;
834 return false;
837 /* Not memory operation. */
838 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
839 && TREE_CODE_CLASS (rhs_code) != tcc_unary
840 && rhs_code != COND_EXPR
841 && rhs_code != CALL_EXPR)
843 if (dump_enabled_p ())
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: operation");
847 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
848 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
849 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
851 /* Fatal mismatch. */
852 matches[0] = false;
853 return false;
856 if (rhs_code == COND_EXPR)
858 tree cond_expr = gimple_assign_rhs1 (stmt);
860 if (i == 0)
861 first_cond_code = TREE_CODE (cond_expr);
862 else if (first_cond_code != TREE_CODE (cond_expr))
864 if (dump_enabled_p ())
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
867 "Build SLP failed: different"
868 " operation");
869 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
870 stmt, 0);
871 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
873 /* Mismatch. */
874 continue;
879 matches[i] = true;
882 for (i = 0; i < group_size; ++i)
883 if (!matches[i])
884 return false;
886 return true;
889 /* Recursively build an SLP tree starting from NODE.
890 Fail (and return a value not equal to zero) if def-stmts are not
891 isomorphic, require data permutation or are of unsupported types of
892 operation. Otherwise, return 0.
893 The value returned is the depth in the SLP tree where a mismatch
894 was found. */
896 static bool
897 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
898 slp_tree *node, unsigned int group_size,
899 unsigned int *max_nunits,
900 vec<slp_tree> *loads,
901 unsigned int vectorization_factor,
902 bool *matches, unsigned *npermutes, unsigned *tree_size,
903 unsigned max_tree_size)
905 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
906 gimple stmt;
908 if (!matches)
909 matches = XALLOCAVEC (bool, group_size);
910 if (!npermutes)
911 npermutes = &this_npermutes;
913 matches[0] = false;
915 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
916 if (is_gimple_call (stmt))
917 nops = gimple_call_num_args (stmt);
918 else if (is_gimple_assign (stmt))
920 nops = gimple_num_ops (stmt) - 1;
921 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
922 nops++;
924 else
925 return false;
927 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
928 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
929 max_nunits, vectorization_factor, matches))
930 return false;
932 /* If the SLP node is a load, terminate the recursion. */
933 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
934 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
936 loads->safe_push (*node);
937 return true;
940 /* Get at the operands, verifying they are compatible. */
941 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
942 slp_oprnd_info oprnd_info;
943 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
945 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
946 stmt, (i == 0), &oprnds_info))
948 case 0:
949 break;
950 case -1:
951 matches[0] = false;
952 vect_free_oprnd_info (oprnds_info);
953 return false;
954 case 1:
955 matches[i] = false;
956 break;
959 for (i = 0; i < group_size; ++i)
960 if (!matches[i])
962 vect_free_oprnd_info (oprnds_info);
963 return false;
966 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
968 /* Create SLP_TREE nodes for the definition node/s. */
969 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
971 slp_tree child;
972 unsigned old_nloads = loads->length ();
973 unsigned old_max_nunits = *max_nunits;
975 if (oprnd_info->first_dt != vect_internal_def)
976 continue;
978 if (++this_tree_size > max_tree_size)
980 vect_free_oprnd_info (oprnds_info);
981 return false;
984 child = vect_create_new_slp_node (oprnd_info->def_stmts);
985 if (!child)
987 vect_free_oprnd_info (oprnds_info);
988 return false;
991 bool *matches = XALLOCAVEC (bool, group_size);
992 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
993 group_size, max_nunits, loads,
994 vectorization_factor, matches,
995 npermutes, &this_tree_size, max_tree_size))
997 oprnd_info->def_stmts = vNULL;
998 SLP_TREE_CHILDREN (*node).quick_push (child);
999 continue;
1002 /* If the SLP build for operand zero failed and operand zero
1003 and one can be commutated try that for the scalar stmts
1004 that failed the match. */
1005 if (i == 0
1006 /* A first scalar stmt mismatch signals a fatal mismatch. */
1007 && matches[0]
1008 /* ??? For COND_EXPRs we can swap the comparison operands
1009 as well as the arms under some constraints. */
1010 && nops == 2
1011 && oprnds_info[1]->first_dt == vect_internal_def
1012 && is_gimple_assign (stmt)
1013 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1014 /* Do so only if the number of not successful permutes was nor more
1015 than a cut-ff as re-trying the recursive match on
1016 possibly each level of the tree would expose exponential
1017 behavior. */
1018 && *npermutes < 4)
1020 /* Roll back. */
1021 *max_nunits = old_max_nunits;
1022 loads->truncate (old_nloads);
1023 /* Swap mismatched definition stmts. */
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "Re-trying with swapped operands of stmts ");
1026 for (unsigned j = 0; j < group_size; ++j)
1027 if (!matches[j])
1029 gimple tem = oprnds_info[0]->def_stmts[j];
1030 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1031 oprnds_info[1]->def_stmts[j] = tem;
1032 dump_printf (MSG_NOTE, "%d ", j);
1034 dump_printf (MSG_NOTE, "\n");
1035 /* And try again ... */
1036 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1037 group_size, max_nunits, loads,
1038 vectorization_factor,
1039 matches, npermutes, &this_tree_size,
1040 max_tree_size))
1042 oprnd_info->def_stmts = vNULL;
1043 SLP_TREE_CHILDREN (*node).quick_push (child);
1044 continue;
1047 ++*npermutes;
1050 oprnd_info->def_stmts = vNULL;
1051 vect_free_slp_tree (child);
1052 vect_free_oprnd_info (oprnds_info);
1053 return false;
1056 if (tree_size)
1057 *tree_size += this_tree_size;
1059 vect_free_oprnd_info (oprnds_info);
1060 return true;
1063 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1065 static void
1066 vect_print_slp_tree (int dump_kind, slp_tree node)
1068 int i;
1069 gimple stmt;
1070 slp_tree child;
1072 if (!node)
1073 return;
1075 dump_printf (dump_kind, "node ");
1076 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1078 dump_printf (dump_kind, "\n\tstmt %d ", i);
1079 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1081 dump_printf (dump_kind, "\n");
1083 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1084 vect_print_slp_tree (dump_kind, child);
1088 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1089 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1090 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1091 stmts in NODE are to be marked. */
1093 static void
1094 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1096 int i;
1097 gimple stmt;
1098 slp_tree child;
1100 if (!node)
1101 return;
1103 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1104 if (j < 0 || i == j)
1105 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1107 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1108 vect_mark_slp_stmts (child, mark, j);
1112 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1114 static void
1115 vect_mark_slp_stmts_relevant (slp_tree node)
1117 int i;
1118 gimple stmt;
1119 stmt_vec_info stmt_info;
1120 slp_tree child;
1122 if (!node)
1123 return;
1125 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1127 stmt_info = vinfo_for_stmt (stmt);
1128 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1129 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1130 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1133 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1134 vect_mark_slp_stmts_relevant (child);
1138 /* Rearrange the statements of NODE according to PERMUTATION. */
1140 static void
1141 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1142 vec<unsigned> permutation)
1144 gimple stmt;
1145 vec<gimple> tmp_stmts;
1146 unsigned int i;
1147 slp_tree child;
1149 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1150 vect_slp_rearrange_stmts (child, group_size, permutation);
1152 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1153 tmp_stmts.create (group_size);
1154 tmp_stmts.quick_grow_cleared (group_size);
1156 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1157 tmp_stmts[permutation[i]] = stmt;
1159 SLP_TREE_SCALAR_STMTS (node).release ();
1160 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1164 /* Check if the required load permutations in the SLP instance
1165 SLP_INSTN are supported. */
1167 static bool
1168 vect_supported_load_permutation_p (slp_instance slp_instn)
1170 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1171 unsigned int i, j, k, next;
1172 sbitmap load_index;
1173 slp_tree node;
1174 gimple stmt, load, next_load, first_load;
1175 struct data_reference *dr;
1177 if (dump_enabled_p ())
1179 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1180 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1181 if (node->load_permutation.exists ())
1182 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1183 dump_printf (MSG_NOTE, "%d ", next);
1184 else
1185 for (k = 0; k < group_size; ++k)
1186 dump_printf (MSG_NOTE, "%d ", k);
1187 dump_printf (MSG_NOTE, "\n");
1190 /* In case of reduction every load permutation is allowed, since the order
1191 of the reduction statements is not important (as opposed to the case of
1192 grouped stores). The only condition we need to check is that all the
1193 load nodes are of the same size and have the same permutation (and then
1194 rearrange all the nodes of the SLP instance according to this
1195 permutation). */
1197 /* Check that all the load nodes are of the same size. */
1198 /* ??? Can't we assert this? */
1199 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1200 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1201 return false;
1203 node = SLP_INSTANCE_TREE (slp_instn);
1204 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1206 /* Reduction (there are no data-refs in the root).
1207 In reduction chain the order of the loads is important. */
1208 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1209 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1211 slp_tree load;
1212 unsigned int lidx;
1214 /* Compare all the permutation sequences to the first one. We know
1215 that at least one load is permuted. */
1216 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1217 if (!node->load_permutation.exists ())
1218 return false;
1219 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1221 if (!load->load_permutation.exists ())
1222 return false;
1223 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1224 if (lidx != node->load_permutation[j])
1225 return false;
1228 /* Check that the loads in the first sequence are different and there
1229 are no gaps between them. */
1230 load_index = sbitmap_alloc (group_size);
1231 bitmap_clear (load_index);
1232 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1234 if (bitmap_bit_p (load_index, lidx))
1236 sbitmap_free (load_index);
1237 return false;
1239 bitmap_set_bit (load_index, lidx);
1241 for (i = 0; i < group_size; i++)
1242 if (!bitmap_bit_p (load_index, i))
1244 sbitmap_free (load_index);
1245 return false;
1247 sbitmap_free (load_index);
1249 /* This permutation is valid for reduction. Since the order of the
1250 statements in the nodes is not important unless they are memory
1251 accesses, we can rearrange the statements in all the nodes
1252 according to the order of the loads. */
1253 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1254 node->load_permutation);
1256 /* We are done, no actual permutations need to be generated. */
1257 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1258 SLP_TREE_LOAD_PERMUTATION (node).release ();
1259 return true;
1262 /* In basic block vectorization we allow any subchain of an interleaving
1263 chain.
1264 FORNOW: not supported in loop SLP because of realignment compications. */
1265 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1267 /* Check that for every node in the instance the loads
1268 form a subchain. */
1269 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1271 next_load = NULL;
1272 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1274 if (j != 0 && next_load != load)
1275 return false;
1276 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1280 /* Check that the alignment of the first load in every subchain, i.e.,
1281 the first statement in every load node, is supported.
1282 ??? This belongs in alignment checking. */
1283 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1285 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1286 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1288 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1289 if (vect_supportable_dr_alignment (dr, false)
1290 == dr_unaligned_unsupported)
1292 if (dump_enabled_p ())
1294 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1295 vect_location,
1296 "unsupported unaligned load ");
1297 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1298 first_load, 0);
1299 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1301 return false;
1306 /* We are done, no actual permutations need to be generated. */
1307 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1308 SLP_TREE_LOAD_PERMUTATION (node).release ();
1309 return true;
1312 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1313 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1314 well (unless it's reduction). */
1315 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1316 return false;
1317 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1318 if (!node->load_permutation.exists ())
1319 return false;
1321 load_index = sbitmap_alloc (group_size);
1322 bitmap_clear (load_index);
1323 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1325 unsigned int lidx = node->load_permutation[0];
1326 if (bitmap_bit_p (load_index, lidx))
1328 sbitmap_free (load_index);
1329 return false;
1331 bitmap_set_bit (load_index, lidx);
1332 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1333 if (k != lidx)
1335 sbitmap_free (load_index);
1336 return false;
1339 for (i = 0; i < group_size; i++)
1340 if (!bitmap_bit_p (load_index, i))
1342 sbitmap_free (load_index);
1343 return false;
1345 sbitmap_free (load_index);
1347 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1348 if (node->load_permutation.exists ()
1349 && !vect_transform_slp_perm_load
1350 (node, vNULL, NULL,
1351 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1352 return false;
1353 return true;
1357 /* Find the first load in the loop that belongs to INSTANCE.
1358 When loads are in several SLP nodes, there can be a case in which the first
1359 load does not appear in the first SLP node to be transformed, causing
1360 incorrect order of statements. Since we generate all the loads together,
1361 they must be inserted before the first load of the SLP instance and not
1362 before the first load of the first node of the instance. */
1364 static gimple
1365 vect_find_first_load_in_slp_instance (slp_instance instance)
1367 int i, j;
1368 slp_tree load_node;
1369 gimple first_load = NULL, load;
1371 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1372 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1373 first_load = get_earlier_stmt (load, first_load);
1375 return first_load;
1379 /* Find the last store in SLP INSTANCE. */
1381 static gimple
1382 vect_find_last_store_in_slp_instance (slp_instance instance)
1384 int i;
1385 slp_tree node;
1386 gimple last_store = NULL, store;
1388 node = SLP_INSTANCE_TREE (instance);
1389 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1390 last_store = get_later_stmt (store, last_store);
1392 return last_store;
1395 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1397 static void
1398 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1399 slp_instance instance, slp_tree node,
1400 stmt_vector_for_cost *prologue_cost_vec,
1401 unsigned ncopies_for_cost)
1403 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1405 unsigned i;
1406 slp_tree child;
1407 gimple stmt, s;
1408 stmt_vec_info stmt_info;
1409 tree lhs;
1410 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1412 /* Recurse down the SLP tree. */
1413 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1414 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1415 instance, child, prologue_cost_vec,
1416 ncopies_for_cost);
1418 /* Look at the first scalar stmt to determine the cost. */
1419 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1420 stmt_info = vinfo_for_stmt (stmt);
1421 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1423 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1424 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1425 vect_uninitialized_def,
1426 node, prologue_cost_vec, body_cost_vec);
1427 else
1429 int i;
1430 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1431 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1432 node, prologue_cost_vec, body_cost_vec);
1433 /* If the load is permuted record the cost for the permutation.
1434 ??? Loads from multiple chains are let through here only
1435 for a single special case involving complex numbers where
1436 in the end no permutation is necessary. */
1437 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1438 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1439 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1440 && vect_get_place_in_interleaving_chain
1441 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1443 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1444 stmt_info, 0, vect_body);
1445 break;
1449 else
1450 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1451 stmt_info, 0, vect_body);
1453 /* Scan operands and account for prologue cost of constants/externals.
1454 ??? This over-estimates cost for multiple uses and should be
1455 re-engineered. */
1456 lhs = gimple_get_lhs (stmt);
1457 for (i = 0; i < gimple_num_ops (stmt); ++i)
1459 tree def, op = gimple_op (stmt, i);
1460 gimple def_stmt;
1461 enum vect_def_type dt;
1462 if (!op || op == lhs)
1463 continue;
1464 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1465 &def_stmt, &def, &dt)
1466 && (dt == vect_constant_def || dt == vect_external_def))
1467 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1468 stmt_info, 0, vect_prologue);
1472 /* Compute the cost for the SLP instance INSTANCE. */
1474 static void
1475 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1476 slp_instance instance, unsigned nunits)
1478 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1479 unsigned ncopies_for_cost;
1480 stmt_info_for_cost *si;
1481 unsigned i;
1483 /* Calculate the number of vector stmts to create based on the unrolling
1484 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1485 GROUP_SIZE / NUNITS otherwise. */
1486 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1487 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1489 prologue_cost_vec.create (10);
1490 body_cost_vec.create (10);
1491 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1492 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1493 instance, SLP_INSTANCE_TREE (instance),
1494 &prologue_cost_vec, ncopies_for_cost);
1496 /* Record the prologue costs, which were delayed until we were
1497 sure that SLP was successful. Unlike the body costs, we know
1498 the final values now regardless of the loop vectorization factor. */
1499 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1500 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1501 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1503 struct _stmt_vec_info *stmt_info
1504 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1505 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1506 si->misalign, vect_prologue);
1509 prologue_cost_vec.release ();
1512 /* Analyze an SLP instance starting from a group of grouped stores. Call
1513 vect_build_slp_tree to build a tree of packed stmts if possible.
1514 Return FALSE if it's impossible to SLP any stmt in the loop. */
1516 static bool
1517 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1518 gimple stmt, unsigned max_tree_size)
1520 slp_instance new_instance;
1521 slp_tree node;
1522 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1523 unsigned int unrolling_factor = 1, nunits;
1524 tree vectype, scalar_type = NULL_TREE;
1525 gimple next;
1526 unsigned int vectorization_factor = 0;
1527 int i;
1528 unsigned int max_nunits = 0;
1529 vec<slp_tree> loads;
1530 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1531 vec<gimple> scalar_stmts;
1533 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1535 if (dr)
1537 scalar_type = TREE_TYPE (DR_REF (dr));
1538 vectype = get_vectype_for_scalar_type (scalar_type);
1540 else
1542 gcc_assert (loop_vinfo);
1543 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1546 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1548 else
1550 gcc_assert (loop_vinfo);
1551 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1552 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1555 if (!vectype)
1557 if (dump_enabled_p ())
1559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1560 "Build SLP failed: unsupported data-type ");
1561 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1562 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1565 return false;
1568 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1569 if (loop_vinfo)
1570 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1571 else
1572 vectorization_factor = nunits;
1574 /* Calculate the unrolling factor. */
1575 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1576 if (unrolling_factor != 1 && !loop_vinfo)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1580 "Build SLP failed: unrolling required in basic"
1581 " block SLP\n");
1583 return false;
1586 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1587 scalar_stmts.create (group_size);
1588 next = stmt;
1589 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1591 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1592 while (next)
1594 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1595 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1596 scalar_stmts.safe_push (
1597 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1598 else
1599 scalar_stmts.safe_push (next);
1600 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1603 else
1605 /* Collect reduction statements. */
1606 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1607 for (i = 0; reductions.iterate (i, &next); i++)
1608 scalar_stmts.safe_push (next);
1611 node = vect_create_new_slp_node (scalar_stmts);
1613 loads.create (group_size);
1615 /* Build the tree for the SLP instance. */
1616 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1617 &max_nunits, &loads,
1618 vectorization_factor, NULL, NULL, NULL,
1619 max_tree_size))
1621 /* Calculate the unrolling factor based on the smallest type. */
1622 if (max_nunits > nunits)
1623 unrolling_factor = least_common_multiple (max_nunits, group_size)
1624 / group_size;
1626 if (unrolling_factor != 1 && !loop_vinfo)
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630 "Build SLP failed: unrolling required in basic"
1631 " block SLP\n");
1632 vect_free_slp_tree (node);
1633 loads.release ();
1634 return false;
1637 /* Create a new SLP instance. */
1638 new_instance = XNEW (struct _slp_instance);
1639 SLP_INSTANCE_TREE (new_instance) = node;
1640 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1641 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1642 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1643 SLP_INSTANCE_LOADS (new_instance) = loads;
1644 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1646 /* Compute the load permutation. */
1647 slp_tree load_node;
1648 bool loads_permuted = false;
1649 FOR_EACH_VEC_ELT (loads, i, load_node)
1651 vec<unsigned> load_permutation;
1652 int j;
1653 gimple load, first_stmt;
1654 bool this_load_permuted = false;
1655 load_permutation.create (group_size);
1656 first_stmt = GROUP_FIRST_ELEMENT
1657 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1658 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1660 int load_place
1661 = vect_get_place_in_interleaving_chain (load, first_stmt);
1662 gcc_assert (load_place != -1);
1663 if (load_place != j)
1664 this_load_permuted = true;
1665 load_permutation.safe_push (load_place);
1667 if (!this_load_permuted)
1669 load_permutation.release ();
1670 continue;
1672 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1673 loads_permuted = true;
1676 if (loads_permuted)
1678 if (!vect_supported_load_permutation_p (new_instance))
1680 if (dump_enabled_p ())
1682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683 "Build SLP failed: unsupported load "
1684 "permutation ");
1685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1686 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1688 vect_free_slp_instance (new_instance);
1689 return false;
1692 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1693 = vect_find_first_load_in_slp_instance (new_instance);
1696 /* Compute the costs of this SLP instance. */
1697 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1698 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1700 if (loop_vinfo)
1701 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1702 else
1703 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1705 if (dump_enabled_p ())
1706 vect_print_slp_tree (MSG_NOTE, node);
1708 return true;
1711 /* Failed to SLP. */
1712 /* Free the allocated memory. */
1713 vect_free_slp_tree (node);
1714 loads.release ();
1716 return false;
1720 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1721 trees of packed scalar stmts if SLP is possible. */
1723 bool
1724 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1725 unsigned max_tree_size)
1727 unsigned int i;
1728 vec<gimple> grouped_stores;
1729 vec<gimple> reductions = vNULL;
1730 vec<gimple> reduc_chains = vNULL;
1731 gimple first_element;
1732 bool ok = false;
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1737 if (loop_vinfo)
1739 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1740 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1741 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1743 else
1744 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1746 /* Find SLP sequences starting from groups of grouped stores. */
1747 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1748 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1749 max_tree_size))
1750 ok = true;
1752 if (bb_vinfo && !ok)
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1756 "Failed to SLP the basic block.\n");
1758 return false;
1761 if (loop_vinfo
1762 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1764 /* Find SLP sequences starting from reduction chains. */
1765 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1766 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1767 max_tree_size))
1768 ok = true;
1769 else
1770 return false;
1772 /* Don't try to vectorize SLP reductions if reduction chain was
1773 detected. */
1774 return ok;
1777 /* Find SLP sequences starting from groups of reductions. */
1778 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1779 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1780 max_tree_size))
1781 ok = true;
1783 return true;
1787 /* For each possible SLP instance decide whether to SLP it and calculate overall
1788 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1789 least one instance. */
1791 bool
1792 vect_make_slp_decision (loop_vec_info loop_vinfo)
1794 unsigned int i, unrolling_factor = 1;
1795 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1796 slp_instance instance;
1797 int decided_to_slp = 0;
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1801 "\n");
1803 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1805 /* FORNOW: SLP if you can. */
1806 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1807 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1809 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1810 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1811 loop-based vectorization. Such stmts will be marked as HYBRID. */
1812 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1813 decided_to_slp++;
1816 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1818 if (decided_to_slp && dump_enabled_p ())
1819 dump_printf_loc (MSG_NOTE, vect_location,
1820 "Decided to SLP %d instances. Unrolling factor %d\n",
1821 decided_to_slp, unrolling_factor);
1823 return (decided_to_slp > 0);
1827 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1828 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1830 static void
1831 vect_detect_hybrid_slp_stmts (slp_tree node)
1833 int i;
1834 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1835 gimple stmt = stmts[0];
1836 imm_use_iterator imm_iter;
1837 gimple use_stmt;
1838 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1839 slp_tree child;
1840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1841 struct loop *loop = NULL;
1842 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1843 basic_block bb = NULL;
1845 if (!node)
1846 return;
1848 if (loop_vinfo)
1849 loop = LOOP_VINFO_LOOP (loop_vinfo);
1850 else
1851 bb = BB_VINFO_BB (bb_vinfo);
1853 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1854 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1855 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1856 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1857 if (gimple_bb (use_stmt)
1858 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1859 || bb == gimple_bb (use_stmt))
1860 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1861 && !STMT_SLP_TYPE (stmt_vinfo)
1862 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1863 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))
1864 || (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
1865 && STMT_VINFO_RELATED_STMT (stmt_vinfo)
1866 && !STMT_SLP_TYPE (vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo)))))
1867 && !(gimple_code (use_stmt) == GIMPLE_PHI
1868 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1869 == vect_reduction_def))
1870 vect_mark_slp_stmts (node, hybrid, i);
1872 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1873 vect_detect_hybrid_slp_stmts (child);
1877 /* Find stmts that must be both vectorized and SLPed. */
1879 void
1880 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1882 unsigned int i;
1883 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1884 slp_instance instance;
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1888 "\n");
1890 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1891 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1895 /* Create and initialize a new bb_vec_info struct for BB, as well as
1896 stmt_vec_info structs for all the stmts in it. */
1898 static bb_vec_info
1899 new_bb_vec_info (basic_block bb)
1901 bb_vec_info res = NULL;
1902 gimple_stmt_iterator gsi;
1904 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1905 BB_VINFO_BB (res) = bb;
1907 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1909 gimple stmt = gsi_stmt (gsi);
1910 gimple_set_uid (stmt, 0);
1911 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1914 BB_VINFO_GROUPED_STORES (res).create (10);
1915 BB_VINFO_SLP_INSTANCES (res).create (2);
1916 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1918 bb->aux = res;
1919 return res;
1923 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1924 stmts in the basic block. */
1926 static void
1927 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1929 vec<slp_instance> slp_instances;
1930 slp_instance instance;
1931 basic_block bb;
1932 gimple_stmt_iterator si;
1933 unsigned i;
1935 if (!bb_vinfo)
1936 return;
1938 bb = BB_VINFO_BB (bb_vinfo);
1940 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1942 gimple stmt = gsi_stmt (si);
1943 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945 if (stmt_info)
1946 /* Free stmt_vec_info. */
1947 free_stmt_vec_info (stmt);
1950 vect_destroy_datarefs (NULL, bb_vinfo);
1951 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1952 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1953 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1954 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1955 vect_free_slp_instance (instance);
1956 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1957 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1958 free (bb_vinfo);
1959 bb->aux = NULL;
1963 /* Analyze statements contained in SLP tree node after recursively analyzing
1964 the subtree. Return TRUE if the operations are supported. */
1966 static bool
1967 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1969 bool dummy;
1970 int i;
1971 gimple stmt;
1972 slp_tree child;
1974 if (!node)
1975 return true;
1977 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1978 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1979 return false;
1981 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1983 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1984 gcc_assert (stmt_info);
1985 gcc_assert (PURE_SLP_STMT (stmt_info));
1987 if (!vect_analyze_stmt (stmt, &dummy, node))
1988 return false;
1991 return true;
1995 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1996 operations are supported. */
1998 static bool
1999 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2001 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2002 slp_instance instance;
2003 int i;
2005 for (i = 0; slp_instances.iterate (i, &instance); )
2007 if (!vect_slp_analyze_node_operations (bb_vinfo,
2008 SLP_INSTANCE_TREE (instance)))
2010 vect_free_slp_instance (instance);
2011 slp_instances.ordered_remove (i);
2013 else
2014 i++;
2017 if (!slp_instances.length ())
2018 return false;
2020 return true;
2024 /* Compute the scalar cost of the SLP node NODE and its children
2025 and return it. Do not account defs that are marked in LIFE and
2026 update LIFE according to uses of NODE. */
2028 static unsigned
2029 vect_bb_slp_scalar_cost (basic_block bb,
2030 slp_tree node, vec<bool, va_heap> *life)
2032 unsigned scalar_cost = 0;
2033 unsigned i;
2034 gimple stmt;
2035 slp_tree child;
2037 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2039 unsigned stmt_cost;
2040 ssa_op_iter op_iter;
2041 def_operand_p def_p;
2042 stmt_vec_info stmt_info;
2044 if ((*life)[i])
2045 continue;
2047 /* If there is a non-vectorized use of the defs then the scalar
2048 stmt is kept live in which case we do not account it or any
2049 required defs in the SLP children in the scalar cost. This
2050 way we make the vectorization more costly when compared to
2051 the scalar cost. */
2052 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2054 imm_use_iterator use_iter;
2055 gimple use_stmt;
2056 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2057 if (!is_gimple_debug (use_stmt)
2058 && (gimple_code (use_stmt) == GIMPLE_PHI
2059 || gimple_bb (use_stmt) != bb
2060 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2062 (*life)[i] = true;
2063 BREAK_FROM_IMM_USE_STMT (use_iter);
2066 if ((*life)[i])
2067 continue;
2069 stmt_info = vinfo_for_stmt (stmt);
2070 if (STMT_VINFO_DATA_REF (stmt_info))
2072 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2073 stmt_cost = vect_get_stmt_cost (scalar_load);
2074 else
2075 stmt_cost = vect_get_stmt_cost (scalar_store);
2077 else
2078 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2080 scalar_cost += stmt_cost;
2083 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2084 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2086 return scalar_cost;
2089 /* Check if vectorization of the basic block is profitable. */
2091 static bool
2092 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2094 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2095 slp_instance instance;
2096 int i, j;
2097 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2098 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2099 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2100 stmt_vec_info stmt_info = NULL;
2101 stmt_vector_for_cost body_cost_vec;
2102 stmt_info_for_cost *ci;
2104 /* Calculate vector costs. */
2105 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2107 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2109 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2111 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2112 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2113 stmt_info, ci->misalign, vect_body);
2117 /* Calculate scalar cost. */
2118 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2120 auto_vec<bool, 20> life;
2121 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2122 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2123 SLP_INSTANCE_TREE (instance),
2124 &life);
2127 /* Complete the target-specific cost calculation. */
2128 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2129 &vec_inside_cost, &vec_epilogue_cost);
2131 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2133 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2136 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2137 vec_inside_cost);
2138 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2139 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2140 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2143 /* Vectorization is profitable if its cost is less than the cost of scalar
2144 version. */
2145 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2146 return false;
2148 return true;
2151 /* Check if the basic block can be vectorized. */
2153 static bb_vec_info
2154 vect_slp_analyze_bb_1 (basic_block bb)
2156 bb_vec_info bb_vinfo;
2157 vec<slp_instance> slp_instances;
2158 slp_instance instance;
2159 int i;
2160 int min_vf = 2;
2161 unsigned n_stmts = 0;
2163 bb_vinfo = new_bb_vec_info (bb);
2164 if (!bb_vinfo)
2165 return NULL;
2167 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2169 if (dump_enabled_p ())
2170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2171 "not vectorized: unhandled data-ref in basic "
2172 "block.\n");
2174 destroy_bb_vec_info (bb_vinfo);
2175 return NULL;
2178 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2180 if (dump_enabled_p ())
2181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2182 "not vectorized: not enough data-refs in "
2183 "basic block.\n");
2185 destroy_bb_vec_info (bb_vinfo);
2186 return NULL;
2189 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193 "not vectorized: unhandled data access in "
2194 "basic block.\n");
2196 destroy_bb_vec_info (bb_vinfo);
2197 return NULL;
2200 vect_pattern_recog (NULL, bb_vinfo);
2202 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2206 "not vectorized: bad data alignment in basic "
2207 "block.\n");
2209 destroy_bb_vec_info (bb_vinfo);
2210 return NULL;
2213 /* Check the SLP opportunities in the basic block, analyze and build SLP
2214 trees. */
2215 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2217 if (dump_enabled_p ())
2218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 "not vectorized: failed to find SLP opportunities "
2220 "in basic block.\n");
2222 destroy_bb_vec_info (bb_vinfo);
2223 return NULL;
2226 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2228 /* Mark all the statements that we want to vectorize as pure SLP and
2229 relevant. */
2230 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2232 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2233 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2236 /* Mark all the statements that we do not want to vectorize. */
2237 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2238 !gsi_end_p (gsi); gsi_next (&gsi))
2240 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2241 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2242 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2245 /* Analyze dependences. At this point all stmts not participating in
2246 vectorization have to be marked. Dependence analysis assumes
2247 that we either vectorize all SLP instances or none at all. */
2248 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2250 if (dump_enabled_p ())
2251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2252 "not vectorized: unhandled data dependence "
2253 "in basic block.\n");
2255 destroy_bb_vec_info (bb_vinfo);
2256 return NULL;
2259 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2263 "not vectorized: unsupported alignment in basic "
2264 "block.\n");
2265 destroy_bb_vec_info (bb_vinfo);
2266 return NULL;
2269 if (!vect_slp_analyze_operations (bb_vinfo))
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "not vectorized: bad operation in basic block.\n");
2275 destroy_bb_vec_info (bb_vinfo);
2276 return NULL;
2279 /* Cost model: check if the vectorization is worthwhile. */
2280 if (!unlimited_cost_model (NULL)
2281 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2285 "not vectorized: vectorization is not "
2286 "profitable.\n");
2288 destroy_bb_vec_info (bb_vinfo);
2289 return NULL;
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_NOTE, vect_location,
2294 "Basic block will be vectorized using SLP\n");
2296 return bb_vinfo;
2300 bb_vec_info
2301 vect_slp_analyze_bb (basic_block bb)
2303 bb_vec_info bb_vinfo;
2304 int insns = 0;
2305 gimple_stmt_iterator gsi;
2306 unsigned int vector_sizes;
2308 if (dump_enabled_p ())
2309 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2311 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2313 gimple stmt = gsi_stmt (gsi);
2314 if (!is_gimple_debug (stmt)
2315 && !gimple_nop_p (stmt)
2316 && gimple_code (stmt) != GIMPLE_LABEL)
2317 insns++;
2320 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "not vectorized: too many instructions in "
2325 "basic block.\n");
2327 return NULL;
2330 /* Autodetect first vector size we try. */
2331 current_vector_size = 0;
2332 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2334 while (1)
2336 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2337 if (bb_vinfo)
2338 return bb_vinfo;
2340 destroy_bb_vec_info (bb_vinfo);
2342 vector_sizes &= ~current_vector_size;
2343 if (vector_sizes == 0
2344 || current_vector_size == 0)
2345 return NULL;
2347 /* Try the next biggest vector size. */
2348 current_vector_size = 1 << floor_log2 (vector_sizes);
2349 if (dump_enabled_p ())
2350 dump_printf_loc (MSG_NOTE, vect_location,
2351 "***** Re-trying analysis with "
2352 "vector size %d\n", current_vector_size);
2357 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2358 the number of created vector stmts depends on the unrolling factor).
2359 However, the actual number of vector stmts for every SLP node depends on
2360 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2361 should be updated. In this function we assume that the inside costs
2362 calculated in vect_model_xxx_cost are linear in ncopies. */
2364 void
2365 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2367 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2368 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2369 slp_instance instance;
2370 stmt_vector_for_cost body_cost_vec;
2371 stmt_info_for_cost *si;
2372 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_NOTE, vect_location,
2376 "=== vect_update_slp_costs_according_to_vf ===\n");
2378 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2380 /* We assume that costs are linear in ncopies. */
2381 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2383 /* Record the instance's instructions in the target cost model.
2384 This was delayed until here because the count of instructions
2385 isn't known beforehand. */
2386 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2388 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2389 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2390 vinfo_for_stmt (si->stmt), si->misalign,
2391 vect_body);
2396 /* For constant and loop invariant defs of SLP_NODE this function returns
2397 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2398 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2399 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2400 REDUC_INDEX is the index of the reduction operand in the statements, unless
2401 it is -1. */
2403 static void
2404 vect_get_constant_vectors (tree op, slp_tree slp_node,
2405 vec<tree> *vec_oprnds,
2406 unsigned int op_num, unsigned int number_of_vectors,
2407 int reduc_index)
2409 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2410 gimple stmt = stmts[0];
2411 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2412 unsigned nunits;
2413 tree vec_cst;
2414 tree *elts;
2415 unsigned j, number_of_places_left_in_vector;
2416 tree vector_type;
2417 tree vop;
2418 int group_size = stmts.length ();
2419 unsigned int vec_num, i;
2420 unsigned number_of_copies = 1;
2421 vec<tree> voprnds;
2422 voprnds.create (number_of_vectors);
2423 bool constant_p, is_store;
2424 tree neutral_op = NULL;
2425 enum tree_code code = gimple_expr_code (stmt);
2426 gimple def_stmt;
2427 struct loop *loop;
2428 gimple_seq ctor_seq = NULL;
2430 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2431 && reduc_index != -1)
2433 op_num = reduc_index - 1;
2434 op = gimple_op (stmt, reduc_index);
2435 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2436 we need either neutral operands or the original operands. See
2437 get_initial_def_for_reduction() for details. */
2438 switch (code)
2440 case WIDEN_SUM_EXPR:
2441 case DOT_PROD_EXPR:
2442 case PLUS_EXPR:
2443 case MINUS_EXPR:
2444 case BIT_IOR_EXPR:
2445 case BIT_XOR_EXPR:
2446 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2447 neutral_op = build_real (TREE_TYPE (op), dconst0);
2448 else
2449 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2451 break;
2453 case MULT_EXPR:
2454 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2455 neutral_op = build_real (TREE_TYPE (op), dconst1);
2456 else
2457 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2459 break;
2461 case BIT_AND_EXPR:
2462 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2463 break;
2465 /* For MIN/MAX we don't have an easy neutral operand but
2466 the initial values can be used fine here. Only for
2467 a reduction chain we have to force a neutral element. */
2468 case MAX_EXPR:
2469 case MIN_EXPR:
2470 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2471 neutral_op = NULL;
2472 else
2474 def_stmt = SSA_NAME_DEF_STMT (op);
2475 loop = (gimple_bb (stmt))->loop_father;
2476 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2477 loop_preheader_edge (loop));
2479 break;
2481 default:
2482 neutral_op = NULL;
2486 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2488 is_store = true;
2489 op = gimple_assign_rhs1 (stmt);
2491 else
2492 is_store = false;
2494 gcc_assert (op);
2496 if (CONSTANT_CLASS_P (op))
2497 constant_p = true;
2498 else
2499 constant_p = false;
2501 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2502 gcc_assert (vector_type);
2503 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2505 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2506 created vectors. It is greater than 1 if unrolling is performed.
2508 For example, we have two scalar operands, s1 and s2 (e.g., group of
2509 strided accesses of size two), while NUNITS is four (i.e., four scalars
2510 of this type can be packed in a vector). The output vector will contain
2511 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2512 will be 2).
2514 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2515 containing the operands.
2517 For example, NUNITS is four as before, and the group size is 8
2518 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2519 {s5, s6, s7, s8}. */
2521 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2523 number_of_places_left_in_vector = nunits;
2524 elts = XALLOCAVEC (tree, nunits);
2525 for (j = 0; j < number_of_copies; j++)
2527 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2529 if (is_store)
2530 op = gimple_assign_rhs1 (stmt);
2531 else
2533 switch (code)
2535 case COND_EXPR:
2536 if (op_num == 0 || op_num == 1)
2538 tree cond = gimple_assign_rhs1 (stmt);
2539 op = TREE_OPERAND (cond, op_num);
2541 else
2543 if (op_num == 2)
2544 op = gimple_assign_rhs2 (stmt);
2545 else
2546 op = gimple_assign_rhs3 (stmt);
2548 break;
2550 case CALL_EXPR:
2551 op = gimple_call_arg (stmt, op_num);
2552 break;
2554 case LSHIFT_EXPR:
2555 case RSHIFT_EXPR:
2556 case LROTATE_EXPR:
2557 case RROTATE_EXPR:
2558 op = gimple_op (stmt, op_num + 1);
2559 /* Unlike the other binary operators, shifts/rotates have
2560 the shift count being int, instead of the same type as
2561 the lhs, so make sure the scalar is the right type if
2562 we are dealing with vectors of
2563 long long/long/short/char. */
2564 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2565 op = fold_convert (TREE_TYPE (vector_type), op);
2566 break;
2568 default:
2569 op = gimple_op (stmt, op_num + 1);
2570 break;
2574 if (reduc_index != -1)
2576 loop = (gimple_bb (stmt))->loop_father;
2577 def_stmt = SSA_NAME_DEF_STMT (op);
2579 gcc_assert (loop);
2581 /* Get the def before the loop. In reduction chain we have only
2582 one initial value. */
2583 if ((j != (number_of_copies - 1)
2584 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2585 && i != 0))
2586 && neutral_op)
2587 op = neutral_op;
2588 else
2589 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2590 loop_preheader_edge (loop));
2593 /* Create 'vect_ = {op0,op1,...,opn}'. */
2594 number_of_places_left_in_vector--;
2595 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2597 if (CONSTANT_CLASS_P (op))
2599 op = fold_unary (VIEW_CONVERT_EXPR,
2600 TREE_TYPE (vector_type), op);
2601 gcc_assert (op && CONSTANT_CLASS_P (op));
2603 else
2605 tree new_temp
2606 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2607 gimple init_stmt;
2608 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2609 op);
2610 init_stmt
2611 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2612 new_temp, op, NULL_TREE);
2613 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2614 op = new_temp;
2617 elts[number_of_places_left_in_vector] = op;
2618 if (!CONSTANT_CLASS_P (op))
2619 constant_p = false;
2621 if (number_of_places_left_in_vector == 0)
2623 number_of_places_left_in_vector = nunits;
2625 if (constant_p)
2626 vec_cst = build_vector (vector_type, elts);
2627 else
2629 vec<constructor_elt, va_gc> *v;
2630 unsigned k;
2631 vec_alloc (v, nunits);
2632 for (k = 0; k < nunits; ++k)
2633 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2634 vec_cst = build_constructor (vector_type, v);
2636 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2637 vector_type, NULL));
2638 if (ctor_seq != NULL)
2640 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2641 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2642 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2643 GSI_SAME_STMT);
2644 ctor_seq = NULL;
2650 /* Since the vectors are created in the reverse order, we should invert
2651 them. */
2652 vec_num = voprnds.length ();
2653 for (j = vec_num; j != 0; j--)
2655 vop = voprnds[j - 1];
2656 vec_oprnds->quick_push (vop);
2659 voprnds.release ();
2661 /* In case that VF is greater than the unrolling factor needed for the SLP
2662 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2663 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2664 to replicate the vectors. */
2665 while (number_of_vectors > vec_oprnds->length ())
2667 tree neutral_vec = NULL;
2669 if (neutral_op)
2671 if (!neutral_vec)
2672 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2674 vec_oprnds->quick_push (neutral_vec);
2676 else
2678 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2679 vec_oprnds->quick_push (vop);
2685 /* Get vectorized definitions from SLP_NODE that contains corresponding
2686 vectorized def-stmts. */
2688 static void
2689 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2691 tree vec_oprnd;
2692 gimple vec_def_stmt;
2693 unsigned int i;
2695 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2697 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2699 gcc_assert (vec_def_stmt);
2700 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2701 vec_oprnds->quick_push (vec_oprnd);
2706 /* Get vectorized definitions for SLP_NODE.
2707 If the scalar definitions are loop invariants or constants, collect them and
2708 call vect_get_constant_vectors() to create vector stmts.
2709 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2710 must be stored in the corresponding child of SLP_NODE, and we call
2711 vect_get_slp_vect_defs () to retrieve them. */
2713 void
2714 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2715 vec<vec<tree> > *vec_oprnds, int reduc_index)
2717 gimple first_stmt;
2718 int number_of_vects = 0, i;
2719 unsigned int child_index = 0;
2720 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2721 slp_tree child = NULL;
2722 vec<tree> vec_defs;
2723 tree oprnd;
2724 bool vectorized_defs;
2726 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2727 FOR_EACH_VEC_ELT (ops, i, oprnd)
2729 /* For each operand we check if it has vectorized definitions in a child
2730 node or we need to create them (for invariants and constants). We
2731 check if the LHS of the first stmt of the next child matches OPRND.
2732 If it does, we found the correct child. Otherwise, we call
2733 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2734 to check this child node for the next operand. */
2735 vectorized_defs = false;
2736 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2738 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2740 /* We have to check both pattern and original def, if available. */
2741 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2742 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2744 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2745 || (related
2746 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2748 /* The number of vector defs is determined by the number of
2749 vector statements in the node from which we get those
2750 statements. */
2751 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2752 vectorized_defs = true;
2753 child_index++;
2757 if (!vectorized_defs)
2759 if (i == 0)
2761 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2762 /* Number of vector stmts was calculated according to LHS in
2763 vect_schedule_slp_instance (), fix it by replacing LHS with
2764 RHS, if necessary. See vect_get_smallest_scalar_type () for
2765 details. */
2766 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2767 &rhs_size_unit);
2768 if (rhs_size_unit != lhs_size_unit)
2770 number_of_vects *= rhs_size_unit;
2771 number_of_vects /= lhs_size_unit;
2776 /* Allocate memory for vectorized defs. */
2777 vec_defs = vNULL;
2778 vec_defs.create (number_of_vects);
2780 /* For reduction defs we call vect_get_constant_vectors (), since we are
2781 looking for initial loop invariant values. */
2782 if (vectorized_defs && reduc_index == -1)
2783 /* The defs are already vectorized. */
2784 vect_get_slp_vect_defs (child, &vec_defs);
2785 else
2786 /* Build vectors from scalar defs. */
2787 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2788 number_of_vects, reduc_index);
2790 vec_oprnds->quick_push (vec_defs);
2792 /* For reductions, we only need initial values. */
2793 if (reduc_index != -1)
2794 return;
2799 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2800 building a vector of type MASK_TYPE from it) and two input vectors placed in
2801 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2802 shifting by STRIDE elements of DR_CHAIN for every copy.
2803 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2804 copies).
2805 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2806 the created stmts must be inserted. */
2808 static inline void
2809 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2810 tree mask, int first_vec_indx, int second_vec_indx,
2811 gimple_stmt_iterator *gsi, slp_tree node,
2812 tree vectype, vec<tree> dr_chain,
2813 int ncopies, int vect_stmts_counter)
2815 tree perm_dest;
2816 gimple perm_stmt = NULL;
2817 stmt_vec_info next_stmt_info;
2818 int i, stride;
2819 tree first_vec, second_vec, data_ref;
2821 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2823 /* Initialize the vect stmts of NODE to properly insert the generated
2824 stmts later. */
2825 for (i = SLP_TREE_VEC_STMTS (node).length ();
2826 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2827 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2829 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2830 for (i = 0; i < ncopies; i++)
2832 first_vec = dr_chain[first_vec_indx];
2833 second_vec = dr_chain[second_vec_indx];
2835 /* Generate the permute statement. */
2836 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2837 first_vec, second_vec, mask);
2838 data_ref = make_ssa_name (perm_dest, perm_stmt);
2839 gimple_set_lhs (perm_stmt, data_ref);
2840 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2842 /* Store the vector statement in NODE. */
2843 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2845 first_vec_indx += stride;
2846 second_vec_indx += stride;
2849 /* Mark the scalar stmt as vectorized. */
2850 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2851 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2855 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2856 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2857 representation. Check that the mask is valid and return FALSE if not.
2858 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2859 the next vector, i.e., the current first vector is not needed. */
2861 static bool
2862 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2863 int mask_nunits, bool only_one_vec, int index,
2864 unsigned char *mask, int *current_mask_element,
2865 bool *need_next_vector, int *number_of_mask_fixes,
2866 bool *mask_fixed, bool *needs_first_vector)
2868 int i;
2870 /* Convert to target specific representation. */
2871 *current_mask_element = first_mask_element + m;
2872 /* Adjust the value in case it's a mask for second and third vectors. */
2873 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2875 if (*current_mask_element < mask_nunits)
2876 *needs_first_vector = true;
2878 /* We have only one input vector to permute but the mask accesses values in
2879 the next vector as well. */
2880 if (only_one_vec && *current_mask_element >= mask_nunits)
2882 if (dump_enabled_p ())
2884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2885 "permutation requires at least two vectors ");
2886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2887 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2890 return false;
2893 /* The mask requires the next vector. */
2894 while (*current_mask_element >= mask_nunits * 2)
2896 if (*needs_first_vector || *mask_fixed)
2898 /* We either need the first vector too or have already moved to the
2899 next vector. In both cases, this permutation needs three
2900 vectors. */
2901 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2904 "permutation requires at "
2905 "least three vectors ");
2906 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2907 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2910 return false;
2913 /* We move to the next vector, dropping the first one and working with
2914 the second and the third - we need to adjust the values of the mask
2915 accordingly. */
2916 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2918 for (i = 0; i < index; i++)
2919 mask[i] -= mask_nunits * *number_of_mask_fixes;
2921 (*number_of_mask_fixes)++;
2922 *mask_fixed = true;
2925 *need_next_vector = *mask_fixed;
2927 /* This was the last element of this mask. Start a new one. */
2928 if (index == mask_nunits - 1)
2930 *number_of_mask_fixes = 1;
2931 *mask_fixed = false;
2932 *needs_first_vector = false;
2935 return true;
2939 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2940 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2941 permute statements for the SLP node NODE of the SLP instance
2942 SLP_NODE_INSTANCE. */
2944 bool
2945 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2946 gimple_stmt_iterator *gsi, int vf,
2947 slp_instance slp_node_instance, bool analyze_only)
2949 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2950 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2951 tree mask_element_type = NULL_TREE, mask_type;
2952 int i, j, k, nunits, vec_index = 0, scalar_index;
2953 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2954 gimple next_scalar_stmt;
2955 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2956 int first_mask_element;
2957 int index, unroll_factor, current_mask_element, ncopies;
2958 unsigned char *mask;
2959 bool only_one_vec = false, need_next_vector = false;
2960 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2961 int number_of_mask_fixes = 1;
2962 bool mask_fixed = false;
2963 bool needs_first_vector = false;
2964 machine_mode mode;
2966 mode = TYPE_MODE (vectype);
2968 if (!can_vec_perm_p (mode, false, NULL))
2970 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2973 "no vect permute for ");
2974 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2975 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2977 return false;
2980 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2981 same size as the vector element being permuted. */
2982 mask_element_type = lang_hooks.types.type_for_mode
2983 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2984 mask_type = get_vectype_for_scalar_type (mask_element_type);
2985 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2986 mask = XALLOCAVEC (unsigned char, nunits);
2987 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2989 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2990 unrolling factor. */
2991 orig_vec_stmts_num = group_size *
2992 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2993 if (orig_vec_stmts_num == 1)
2994 only_one_vec = true;
2996 /* Number of copies is determined by the final vectorization factor
2997 relatively to SLP_NODE_INSTANCE unrolling factor. */
2998 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3000 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3001 return false;
3003 /* Generate permutation masks for every NODE. Number of masks for each NODE
3004 is equal to GROUP_SIZE.
3005 E.g., we have a group of three nodes with three loads from the same
3006 location in each node, and the vector size is 4. I.e., we have a
3007 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3008 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3009 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3012 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3013 The last mask is illegal since we assume two operands for permute
3014 operation, and the mask element values can't be outside that range.
3015 Hence, the last mask must be converted into {2,5,5,5}.
3016 For the first two permutations we need the first and the second input
3017 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3018 we need the second and the third vectors: {b1,c1,a2,b2} and
3019 {c2,a3,b3,c3}. */
3022 scalar_index = 0;
3023 index = 0;
3024 vect_stmts_counter = 0;
3025 vec_index = 0;
3026 first_vec_index = vec_index++;
3027 if (only_one_vec)
3028 second_vec_index = first_vec_index;
3029 else
3030 second_vec_index = vec_index++;
3032 for (j = 0; j < unroll_factor; j++)
3034 for (k = 0; k < group_size; k++)
3036 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3037 first_mask_element = i + j * group_size;
3038 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3039 nunits, only_one_vec, index,
3040 mask, &current_mask_element,
3041 &need_next_vector,
3042 &number_of_mask_fixes, &mask_fixed,
3043 &needs_first_vector))
3044 return false;
3045 gcc_assert (current_mask_element < 2 * nunits);
3046 mask[index++] = current_mask_element;
3048 if (index == nunits)
3050 index = 0;
3051 if (!can_vec_perm_p (mode, false, mask))
3053 if (dump_enabled_p ())
3055 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3056 vect_location,
3057 "unsupported vect permute { ");
3058 for (i = 0; i < nunits; ++i)
3059 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3060 mask[i]);
3061 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3063 return false;
3066 if (!analyze_only)
3068 int l;
3069 tree mask_vec, *mask_elts;
3070 mask_elts = XALLOCAVEC (tree, nunits);
3071 for (l = 0; l < nunits; ++l)
3072 mask_elts[l] = build_int_cst (mask_element_type,
3073 mask[l]);
3074 mask_vec = build_vector (mask_type, mask_elts);
3076 if (need_next_vector)
3078 first_vec_index = second_vec_index;
3079 second_vec_index = vec_index;
3082 next_scalar_stmt
3083 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3085 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3086 mask_vec, first_vec_index, second_vec_index,
3087 gsi, node, vectype, dr_chain,
3088 ncopies, vect_stmts_counter++);
3095 return true;
3100 /* Vectorize SLP instance tree in postorder. */
3102 static bool
3103 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3104 unsigned int vectorization_factor)
3106 gimple stmt;
3107 bool grouped_store, is_store;
3108 gimple_stmt_iterator si;
3109 stmt_vec_info stmt_info;
3110 unsigned int vec_stmts_size, nunits, group_size;
3111 tree vectype;
3112 int i;
3113 slp_tree child;
3115 if (!node)
3116 return false;
3118 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3119 vect_schedule_slp_instance (child, instance, vectorization_factor);
3121 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3122 stmt_info = vinfo_for_stmt (stmt);
3124 /* VECTYPE is the type of the destination. */
3125 vectype = STMT_VINFO_VECTYPE (stmt_info);
3126 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3127 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3129 /* For each SLP instance calculate number of vector stmts to be created
3130 for the scalar stmts in each node of the SLP tree. Number of vector
3131 elements in one vector iteration is the number of scalar elements in
3132 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3133 size. */
3134 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3136 if (!SLP_TREE_VEC_STMTS (node).exists ())
3138 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3139 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3142 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_NOTE,vect_location,
3145 "------>vectorizing SLP node starting from: ");
3146 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3147 dump_printf (MSG_NOTE, "\n");
3150 /* Loads should be inserted before the first load. */
3151 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3152 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3153 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3154 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3155 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3156 else if (is_pattern_stmt_p (stmt_info))
3157 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3158 else
3159 si = gsi_for_stmt (stmt);
3161 /* Stores should be inserted just before the last store. */
3162 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3163 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3165 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3166 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3167 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3168 si = gsi_for_stmt (last_store);
3171 /* Mark the first element of the reduction chain as reduction to properly
3172 transform the node. In the analysis phase only the last element of the
3173 chain is marked as reduction. */
3174 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3175 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3177 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3178 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3181 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3182 return is_store;
3185 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3186 For loop vectorization this is done in vectorizable_call, but for SLP
3187 it needs to be deferred until end of vect_schedule_slp, because multiple
3188 SLP instances may refer to the same scalar stmt. */
3190 static void
3191 vect_remove_slp_scalar_calls (slp_tree node)
3193 gimple stmt, new_stmt;
3194 gimple_stmt_iterator gsi;
3195 int i;
3196 slp_tree child;
3197 tree lhs;
3198 stmt_vec_info stmt_info;
3200 if (!node)
3201 return;
3203 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3204 vect_remove_slp_scalar_calls (child);
3206 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3208 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3209 continue;
3210 stmt_info = vinfo_for_stmt (stmt);
3211 if (stmt_info == NULL
3212 || is_pattern_stmt_p (stmt_info)
3213 || !PURE_SLP_STMT (stmt_info))
3214 continue;
3215 lhs = gimple_call_lhs (stmt);
3216 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3217 set_vinfo_for_stmt (new_stmt, stmt_info);
3218 set_vinfo_for_stmt (stmt, NULL);
3219 STMT_VINFO_STMT (stmt_info) = new_stmt;
3220 gsi = gsi_for_stmt (stmt);
3221 gsi_replace (&gsi, new_stmt, false);
3222 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3226 /* Generate vector code for all SLP instances in the loop/basic block. */
3228 bool
3229 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3231 vec<slp_instance> slp_instances;
3232 slp_instance instance;
3233 unsigned int i, vf;
3234 bool is_store = false;
3236 if (loop_vinfo)
3238 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3239 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3241 else
3243 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3244 vf = 1;
3247 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3249 /* Schedule the tree of INSTANCE. */
3250 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3251 instance, vf);
3252 if (dump_enabled_p ())
3253 dump_printf_loc (MSG_NOTE, vect_location,
3254 "vectorizing stmts using SLP.\n");
3257 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3259 slp_tree root = SLP_INSTANCE_TREE (instance);
3260 gimple store;
3261 unsigned int j;
3262 gimple_stmt_iterator gsi;
3264 /* Remove scalar call stmts. Do not do this for basic-block
3265 vectorization as not all uses may be vectorized.
3266 ??? Why should this be necessary? DCE should be able to
3267 remove the stmts itself.
3268 ??? For BB vectorization we can as well remove scalar
3269 stmts starting from the SLP tree root if they have no
3270 uses. */
3271 if (loop_vinfo)
3272 vect_remove_slp_scalar_calls (root);
3274 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3275 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3277 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3278 break;
3280 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3281 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3282 /* Free the attached stmt_vec_info and remove the stmt. */
3283 gsi = gsi_for_stmt (store);
3284 unlink_stmt_vdef (store);
3285 gsi_remove (&gsi, true);
3286 release_defs (store);
3287 free_stmt_vec_info (store);
3291 return is_store;
3295 /* Vectorize the basic block. */
3297 void
3298 vect_slp_transform_bb (basic_block bb)
3300 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3301 gimple_stmt_iterator si;
3303 gcc_assert (bb_vinfo);
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3308 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3310 gimple stmt = gsi_stmt (si);
3311 stmt_vec_info stmt_info;
3313 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_NOTE, vect_location,
3316 "------>SLPing statement: ");
3317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3318 dump_printf (MSG_NOTE, "\n");
3321 stmt_info = vinfo_for_stmt (stmt);
3322 gcc_assert (stmt_info);
3324 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3325 if (STMT_SLP_TYPE (stmt_info))
3327 vect_schedule_slp (NULL, bb_vinfo);
3328 break;
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_NOTE, vect_location,
3334 "BASIC BLOCK VECTORIZED\n");
3336 destroy_bb_vec_info (bb_vinfo);