re checking -fdump-passes
[official-gcc.git] / gcc / tree-vect-slp.c
blobb060935971edae32c4f8239d9c688eb462f4942c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
219 dt[0], slp_node);
222 else
224 if (!*first_stmt_dt1 && i == 1)
226 /* op1 of the first stmt of the group - store its info. */
227 *first_stmt_dt1 = dt[i];
228 if (def)
229 *first_stmt_def1_type = TREE_TYPE (def);
230 else
232 /* We assume that the stmt contains only one constant
233 operand. We fail otherwise, to be on the safe side. */
234 if (*first_stmt_const_oprnd)
236 if (vect_print_dump_info (REPORT_SLP))
237 fprintf (vect_dump, "Build SLP failed: two constant "
238 "oprnds in stmt");
239 return false;
241 *first_stmt_const_oprnd = oprnd;
244 else
246 /* Not first stmt of the group, check that the def-stmt/s match
247 the def-stmt/s of the first stmt. Allow different definition
248 types for reduction chains: the first stmt must be a
249 vect_reduction_def (a phi node), and the rest
250 vect_internal_def. */
251 if ((i == 0
252 && ((*first_stmt_dt0 != dt[i]
253 && !(*first_stmt_dt0 == vect_reduction_def
254 && dt[i] == vect_internal_def))
255 || (*first_stmt_def0_type && def
256 && !types_compatible_p (*first_stmt_def0_type,
257 TREE_TYPE (def)))))
258 || (i == 1
259 && ((*first_stmt_dt1 != dt[i]
260 && !(*first_stmt_dt1 == vect_reduction_def
261 && dt[i] == vect_internal_def))
262 || (*first_stmt_def1_type && def
263 && !types_compatible_p (*first_stmt_def1_type,
264 TREE_TYPE (def)))))
265 || (!def
266 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
267 TREE_TYPE (oprnd))))
269 if (vect_print_dump_info (REPORT_SLP))
270 fprintf (vect_dump, "Build SLP failed: different types ");
272 return false;
277 /* Check the types of the definitions. */
278 switch (dt[i])
280 case vect_constant_def:
281 case vect_external_def:
282 break;
284 case vect_internal_def:
285 case vect_reduction_def:
286 if (i == 0)
287 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
288 else
289 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
290 break;
292 default:
293 /* FORNOW: Not supported. */
294 if (vect_print_dump_info (REPORT_SLP))
296 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
297 print_generic_expr (vect_dump, def, TDF_SLIM);
300 return false;
304 return true;
308 /* Recursively build an SLP tree starting from NODE.
309 Fail (and return FALSE) if def-stmts are not isomorphic, require data
310 permutation or are of unsupported types of operation. Otherwise, return
311 TRUE. */
313 static bool
314 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
315 slp_tree *node, unsigned int group_size,
316 int *inside_cost, int *outside_cost,
317 int ncopies_for_cost, unsigned int *max_nunits,
318 VEC (int, heap) **load_permutation,
319 VEC (slp_tree, heap) **loads,
320 unsigned int vectorization_factor)
322 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
323 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
324 unsigned int i;
325 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
326 gimple stmt = VEC_index (gimple, stmts, 0);
327 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
328 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
329 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
330 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
331 tree lhs;
332 bool stop_recursion = false, need_same_oprnds = false;
333 tree vectype, scalar_type, first_op1 = NULL_TREE;
334 unsigned int ncopies;
335 optab optab;
336 int icode;
337 enum machine_mode optab_op2_mode;
338 enum machine_mode vec_mode;
339 tree first_stmt_const_oprnd = NULL_TREE;
340 struct data_reference *first_dr;
341 bool pattern0 = false, pattern1 = false;
342 HOST_WIDE_INT dummy;
343 bool permutation = false;
344 unsigned int load_place;
345 gimple first_load, prev_first_load = NULL;
347 /* For every stmt in NODE find its def stmt/s. */
348 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
350 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump, "Build SLP for ");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
356 /* Fail to vectorize statements marked as unvectorizable. */
357 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
359 if (vect_print_dump_info (REPORT_SLP))
361 fprintf (vect_dump,
362 "Build SLP failed: unvectorizable statement ");
363 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
366 return false;
369 lhs = gimple_get_lhs (stmt);
370 if (lhs == NULL_TREE)
372 if (vect_print_dump_info (REPORT_SLP))
374 fprintf (vect_dump,
375 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
376 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
379 return false;
382 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
383 vectype = get_vectype_for_scalar_type (scalar_type);
384 if (!vectype)
386 if (vect_print_dump_info (REPORT_SLP))
388 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
389 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
391 return false;
394 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
395 if (ncopies != 1)
397 if (vect_print_dump_info (REPORT_SLP))
398 fprintf (vect_dump, "SLP with multiple types ");
400 /* FORNOW: multiple types are unsupported in BB SLP. */
401 if (bb_vinfo)
402 return false;
405 /* In case of multiple types we need to detect the smallest type. */
406 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
407 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
409 if (is_gimple_call (stmt))
410 rhs_code = CALL_EXPR;
411 else
412 rhs_code = gimple_assign_rhs_code (stmt);
414 /* Check the operation. */
415 if (i == 0)
417 first_stmt_code = rhs_code;
419 /* Shift arguments should be equal in all the packed stmts for a
420 vector shift with scalar shift operand. */
421 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
422 || rhs_code == LROTATE_EXPR
423 || rhs_code == RROTATE_EXPR)
425 vec_mode = TYPE_MODE (vectype);
427 /* First see if we have a vector/vector shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
429 optab_vector);
431 if (!optab
432 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
434 /* No vector/vector shift, try for a vector/scalar shift. */
435 optab = optab_for_tree_code (rhs_code, vectype,
436 optab_scalar);
438 if (!optab)
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: no optab.");
442 return false;
444 icode = (int) optab_handler (optab, vec_mode);
445 if (icode == CODE_FOR_nothing)
447 if (vect_print_dump_info (REPORT_SLP))
448 fprintf (vect_dump, "Build SLP failed: "
449 "op not supported by target.");
450 return false;
452 optab_op2_mode = insn_data[icode].operand[2].mode;
453 if (!VECTOR_MODE_P (optab_op2_mode))
455 need_same_oprnds = true;
456 first_op1 = gimple_assign_rhs2 (stmt);
461 else
463 if (first_stmt_code != rhs_code
464 && (first_stmt_code != IMAGPART_EXPR
465 || rhs_code != REALPART_EXPR)
466 && (first_stmt_code != REALPART_EXPR
467 || rhs_code != IMAGPART_EXPR)
468 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
469 && (first_stmt_code == ARRAY_REF
470 || first_stmt_code == INDIRECT_REF
471 || first_stmt_code == COMPONENT_REF
472 || first_stmt_code == MEM_REF)))
474 if (vect_print_dump_info (REPORT_SLP))
476 fprintf (vect_dump,
477 "Build SLP failed: different operation in stmt ");
478 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
481 return false;
484 if (need_same_oprnds
485 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
487 if (vect_print_dump_info (REPORT_SLP))
489 fprintf (vect_dump,
490 "Build SLP failed: different shift arguments in ");
491 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
494 return false;
498 /* Strided store or load. */
499 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
501 if (REFERENCE_CLASS_P (lhs))
503 /* Store. */
504 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
505 stmt, &def_stmts0, &def_stmts1,
506 &first_stmt_dt0,
507 &first_stmt_dt1,
508 &first_stmt_def0_type,
509 &first_stmt_def1_type,
510 &first_stmt_const_oprnd,
511 ncopies_for_cost,
512 &pattern0, &pattern1))
513 return false;
515 else
517 /* Load. */
518 /* FORNOW: Check that there is no gap between the loads. */
519 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
520 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
521 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
522 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
524 if (vect_print_dump_info (REPORT_SLP))
526 fprintf (vect_dump, "Build SLP failed: strided "
527 "loads have gaps ");
528 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
531 return false;
534 /* Check that the size of interleaved loads group is not
535 greater than the SLP group size. */
536 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
538 if (vect_print_dump_info (REPORT_SLP))
540 fprintf (vect_dump, "Build SLP failed: the number of "
541 "interleaved loads is greater than"
542 " the SLP group size ");
543 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
546 return false;
549 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
550 if (prev_first_load)
552 /* Check that there are no loads from different interleaving
553 chains in the same node. The only exception is complex
554 numbers. */
555 if (prev_first_load != first_load
556 && rhs_code != REALPART_EXPR
557 && rhs_code != IMAGPART_EXPR)
559 if (vect_print_dump_info (REPORT_SLP))
561 fprintf (vect_dump, "Build SLP failed: different "
562 "interleaving chains in one node ");
563 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 return false;
569 else
570 prev_first_load = first_load;
572 if (first_load == stmt)
574 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
575 if (vect_supportable_dr_alignment (first_dr, false)
576 == dr_unaligned_unsupported)
578 if (vect_print_dump_info (REPORT_SLP))
580 fprintf (vect_dump, "Build SLP failed: unsupported "
581 "unaligned load ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585 return false;
588 /* Analyze costs (for the first stmt in the group). */
589 vect_model_load_cost (vinfo_for_stmt (stmt),
590 ncopies_for_cost, false, *node);
593 /* Store the place of this load in the interleaving chain. In
594 case that permutation is needed we later decide if a specific
595 permutation is supported. */
596 load_place = vect_get_place_in_interleaving_chain (stmt,
597 first_load);
598 if (load_place != i)
599 permutation = true;
601 VEC_safe_push (int, heap, *load_permutation, load_place);
603 /* We stop the tree when we reach a group of loads. */
604 stop_recursion = true;
605 continue;
607 } /* Strided access. */
608 else
610 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
612 /* Not strided load. */
613 if (vect_print_dump_info (REPORT_SLP))
615 fprintf (vect_dump, "Build SLP failed: not strided load ");
616 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
619 /* FORNOW: Not strided loads are not supported. */
620 return false;
623 /* Not memory operation. */
624 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
625 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
627 if (vect_print_dump_info (REPORT_SLP))
629 fprintf (vect_dump, "Build SLP failed: operation");
630 fprintf (vect_dump, " unsupported ");
631 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
634 return false;
637 /* Find the def-stmts. */
638 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
639 &def_stmts0, &def_stmts1,
640 &first_stmt_dt0, &first_stmt_dt1,
641 &first_stmt_def0_type,
642 &first_stmt_def1_type,
643 &first_stmt_const_oprnd,
644 ncopies_for_cost,
645 &pattern0, &pattern1))
646 return false;
650 /* Add the costs of the node to the overall instance costs. */
651 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
652 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
654 /* Strided loads were reached - stop the recursion. */
655 if (stop_recursion)
657 if (permutation)
659 VEC_safe_push (slp_tree, heap, *loads, *node);
660 *inside_cost
661 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
662 * group_size;
664 else
666 /* We don't check here complex numbers chains, so we keep them in
667 LOADS for further check in vect_supported_load_permutation_p. */
668 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
669 VEC_safe_push (slp_tree, heap, *loads, *node);
672 return true;
675 /* Create SLP_TREE nodes for the definition node/s. */
676 if (first_stmt_dt0 == vect_internal_def)
678 slp_tree left_node = XNEW (struct _slp_tree);
679 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
680 SLP_TREE_VEC_STMTS (left_node) = NULL;
681 SLP_TREE_LEFT (left_node) = NULL;
682 SLP_TREE_RIGHT (left_node) = NULL;
683 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
684 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
685 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
686 inside_cost, outside_cost, ncopies_for_cost,
687 max_nunits, load_permutation, loads,
688 vectorization_factor))
689 return false;
691 SLP_TREE_LEFT (*node) = left_node;
694 if (first_stmt_dt1 == vect_internal_def)
696 slp_tree right_node = XNEW (struct _slp_tree);
697 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
698 SLP_TREE_VEC_STMTS (right_node) = NULL;
699 SLP_TREE_LEFT (right_node) = NULL;
700 SLP_TREE_RIGHT (right_node) = NULL;
701 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
702 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
703 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
704 inside_cost, outside_cost, ncopies_for_cost,
705 max_nunits, load_permutation, loads,
706 vectorization_factor))
707 return false;
709 SLP_TREE_RIGHT (*node) = right_node;
712 return true;
716 static void
717 vect_print_slp_tree (slp_tree node)
719 int i;
720 gimple stmt;
722 if (!node)
723 return;
725 fprintf (vect_dump, "node ");
726 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
728 fprintf (vect_dump, "\n\tstmt %d ", i);
729 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
731 fprintf (vect_dump, "\n");
733 vect_print_slp_tree (SLP_TREE_LEFT (node));
734 vect_print_slp_tree (SLP_TREE_RIGHT (node));
738 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
739 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
740 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
741 stmts in NODE are to be marked. */
743 static void
744 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
746 int i;
747 gimple stmt;
749 if (!node)
750 return;
752 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
753 if (j < 0 || i == j)
754 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
756 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
757 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
761 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
763 static void
764 vect_mark_slp_stmts_relevant (slp_tree node)
766 int i;
767 gimple stmt;
768 stmt_vec_info stmt_info;
770 if (!node)
771 return;
773 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
775 stmt_info = vinfo_for_stmt (stmt);
776 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
777 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
778 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
781 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
782 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
786 /* Check if the permutation required by the SLP INSTANCE is supported.
787 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
789 static bool
790 vect_supported_slp_permutation_p (slp_instance instance)
792 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
793 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
794 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
795 VEC (slp_tree, heap) *sorted_loads = NULL;
796 int index;
797 slp_tree *tmp_loads = NULL;
798 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
799 slp_tree load;
801 /* FORNOW: The only supported loads permutation is loads from the same
802 location in all the loads in the node, when the data-refs in
803 nodes of LOADS constitute an interleaving chain.
804 Sort the nodes according to the order of accesses in the chain. */
805 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
806 for (i = 0, j = 0;
807 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
808 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
809 i += group_size, j++)
811 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
812 /* Check that the loads are all in the same interleaving chain. */
813 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
815 if (vect_print_dump_info (REPORT_DETAILS))
817 fprintf (vect_dump, "Build SLP failed: unsupported data "
818 "permutation ");
819 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
822 free (tmp_loads);
823 return false;
826 tmp_loads[index] = load;
829 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
830 for (i = 0; i < group_size; i++)
831 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
833 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
834 SLP_INSTANCE_LOADS (instance) = sorted_loads;
835 free (tmp_loads);
837 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
838 SLP_INSTANCE_UNROLLING_FACTOR (instance),
839 instance, true))
840 return false;
842 return true;
846 /* Rearrange the statements of NODE according to PERMUTATION. */
848 static void
849 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
850 VEC (int, heap) *permutation)
852 gimple stmt;
853 VEC (gimple, heap) *tmp_stmts;
854 unsigned int index, i;
856 if (!node)
857 return;
859 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
860 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
862 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
863 tmp_stmts = VEC_alloc (gimple, heap, group_size);
865 for (i = 0; i < group_size; i++)
866 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
868 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
870 index = VEC_index (int, permutation, i);
871 VEC_replace (gimple, tmp_stmts, index, stmt);
874 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
875 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
879 /* Check if the required load permutation is supported.
880 LOAD_PERMUTATION contains a list of indices of the loads.
881 In SLP this permutation is relative to the order of strided stores that are
882 the base of the SLP instance. */
884 static bool
885 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
886 VEC (int, heap) *load_permutation)
888 int i = 0, j, prev = -1, next, k, number_of_groups;
889 bool supported, bad_permutation = false;
890 sbitmap load_index;
891 slp_tree node, other_complex_node;
892 gimple stmt, first = NULL, other_node_first;
893 unsigned complex_numbers = 0;
895 /* FORNOW: permutations are only supported in SLP. */
896 if (!slp_instn)
897 return false;
899 if (vect_print_dump_info (REPORT_SLP))
901 fprintf (vect_dump, "Load permutation ");
902 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
903 fprintf (vect_dump, "%d ", next);
906 /* In case of reduction every load permutation is allowed, since the order
907 of the reduction statements is not important (as opposed to the case of
908 strided stores). The only condition we need to check is that all the
909 load nodes are of the same size and have the same permutation (and then
910 rearrange all the nodes of the SLP instance according to this
911 permutation). */
913 /* Check that all the load nodes are of the same size. */
914 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
916 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
917 != (unsigned) group_size)
918 return false;
920 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
921 if (is_gimple_assign (stmt)
922 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
923 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
924 complex_numbers++;
927 /* Complex operands can be swapped as following:
928 real_c = real_b + real_a;
929 imag_c = imag_a + imag_b;
930 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
931 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
932 chains are mixed, they match the above pattern. */
933 if (complex_numbers)
935 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
937 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
939 if (j == 0)
940 first = stmt;
941 else
943 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
945 if (complex_numbers != 2)
946 return false;
948 if (i == 0)
949 k = 1;
950 else
951 k = 0;
953 other_complex_node = VEC_index (slp_tree,
954 SLP_INSTANCE_LOADS (slp_instn), k);
955 other_node_first = VEC_index (gimple,
956 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
958 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
959 != other_node_first)
960 return false;
967 /* We checked that this case ok, so there is no need to proceed with
968 permutation tests. */
969 if (complex_numbers == 2)
971 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
972 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
973 return true;
976 node = SLP_INSTANCE_TREE (slp_instn);
977 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
978 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
979 instance, not all the loads belong to the same node or interleaving
980 group. Hence, we need to divide them into groups according to
981 GROUP_SIZE. */
982 number_of_groups = VEC_length (int, load_permutation) / group_size;
984 /* Reduction (there are no data-refs in the root).
985 In reduction chain the order of the loads is important. */
986 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
987 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
989 int first_group_load_index;
991 /* Compare all the permutation sequences to the first one. */
992 for (i = 1; i < number_of_groups; i++)
994 k = 0;
995 for (j = i * group_size; j < i * group_size + group_size; j++)
997 next = VEC_index (int, load_permutation, j);
998 first_group_load_index = VEC_index (int, load_permutation, k);
1000 if (next != first_group_load_index)
1002 bad_permutation = true;
1003 break;
1006 k++;
1009 if (bad_permutation)
1010 break;
1013 if (!bad_permutation)
1015 /* Check that the loads in the first sequence are different and there
1016 are no gaps between them. */
1017 load_index = sbitmap_alloc (group_size);
1018 sbitmap_zero (load_index);
1019 for (k = 0; k < group_size; k++)
1021 first_group_load_index = VEC_index (int, load_permutation, k);
1022 if (TEST_BIT (load_index, first_group_load_index))
1024 bad_permutation = true;
1025 break;
1028 SET_BIT (load_index, first_group_load_index);
1031 if (!bad_permutation)
1032 for (k = 0; k < group_size; k++)
1033 if (!TEST_BIT (load_index, k))
1035 bad_permutation = true;
1036 break;
1039 sbitmap_free (load_index);
1042 if (!bad_permutation)
1044 /* This permutation is valid for reduction. Since the order of the
1045 statements in the nodes is not important unless they are memory
1046 accesses, we can rearrange the statements in all the nodes
1047 according to the order of the loads. */
1048 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1049 load_permutation);
1050 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1051 return true;
1055 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1056 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1057 well (unless it's reduction). */
1058 if (VEC_length (int, load_permutation)
1059 != (unsigned int) (group_size * group_size))
1060 return false;
1062 supported = true;
1063 load_index = sbitmap_alloc (group_size);
1064 sbitmap_zero (load_index);
1065 for (j = 0; j < group_size; j++)
1067 for (i = j * group_size, k = 0;
1068 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1069 i++, k++)
1071 if (i != j * group_size && next != prev)
1073 supported = false;
1074 break;
1077 prev = next;
1080 if (TEST_BIT (load_index, prev))
1082 supported = false;
1083 break;
1086 SET_BIT (load_index, prev);
1089 for (j = 0; j < group_size; j++)
1090 if (!TEST_BIT (load_index, j))
1091 return false;
1093 sbitmap_free (load_index);
1095 if (supported && i == group_size * group_size
1096 && vect_supported_slp_permutation_p (slp_instn))
1097 return true;
1099 return false;
1103 /* Find the first load in the loop that belongs to INSTANCE.
1104 When loads are in several SLP nodes, there can be a case in which the first
1105 load does not appear in the first SLP node to be transformed, causing
1106 incorrect order of statements. Since we generate all the loads together,
1107 they must be inserted before the first load of the SLP instance and not
1108 before the first load of the first node of the instance. */
1110 static gimple
1111 vect_find_first_load_in_slp_instance (slp_instance instance)
1113 int i, j;
1114 slp_tree load_node;
1115 gimple first_load = NULL, load;
1117 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1118 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1119 first_load = get_earlier_stmt (load, first_load);
1121 return first_load;
1125 /* Find the last store in SLP INSTANCE. */
1127 static gimple
1128 vect_find_last_store_in_slp_instance (slp_instance instance)
1130 int i;
1131 slp_tree node;
1132 gimple last_store = NULL, store;
1134 node = SLP_INSTANCE_TREE (instance);
1135 for (i = 0;
1136 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1137 i++)
1138 last_store = get_later_stmt (store, last_store);
1140 return last_store;
1144 /* Analyze an SLP instance starting from a group of strided stores. Call
1145 vect_build_slp_tree to build a tree of packed stmts if possible.
1146 Return FALSE if it's impossible to SLP any stmt in the loop. */
1148 static bool
1149 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1150 gimple stmt)
1152 slp_instance new_instance;
1153 slp_tree node = XNEW (struct _slp_tree);
1154 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1155 unsigned int unrolling_factor = 1, nunits;
1156 tree vectype, scalar_type = NULL_TREE;
1157 gimple next;
1158 unsigned int vectorization_factor = 0;
1159 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1160 unsigned int max_nunits = 0;
1161 VEC (int, heap) *load_permutation;
1162 VEC (slp_tree, heap) *loads;
1163 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1165 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1167 if (dr)
1169 scalar_type = TREE_TYPE (DR_REF (dr));
1170 vectype = get_vectype_for_scalar_type (scalar_type);
1172 else
1174 gcc_assert (loop_vinfo);
1175 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1178 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1180 else
1182 gcc_assert (loop_vinfo);
1183 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1184 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1187 if (!vectype)
1189 if (vect_print_dump_info (REPORT_SLP))
1191 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1192 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1195 return false;
1198 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1199 if (loop_vinfo)
1200 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1201 else
1202 /* No multitypes in BB SLP. */
1203 vectorization_factor = nunits;
1205 /* Calculate the unrolling factor. */
1206 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1207 if (unrolling_factor != 1 && !loop_vinfo)
1209 if (vect_print_dump_info (REPORT_SLP))
1210 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1211 " block SLP");
1213 return false;
1216 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1217 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1218 next = stmt;
1219 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1221 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1222 while (next)
1224 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1225 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1228 else
1230 /* Collect reduction statements. */
1231 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1232 next);
1233 i++)
1234 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1237 SLP_TREE_VEC_STMTS (node) = NULL;
1238 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1239 SLP_TREE_LEFT (node) = NULL;
1240 SLP_TREE_RIGHT (node) = NULL;
1241 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1242 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1244 /* Calculate the number of vector stmts to create based on the unrolling
1245 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1246 GROUP_SIZE / NUNITS otherwise. */
1247 ncopies_for_cost = unrolling_factor * group_size / nunits;
1249 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1250 loads = VEC_alloc (slp_tree, heap, group_size);
1252 /* Build the tree for the SLP instance. */
1253 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1254 &inside_cost, &outside_cost, ncopies_for_cost,
1255 &max_nunits, &load_permutation, &loads,
1256 vectorization_factor))
1258 /* Create a new SLP instance. */
1259 new_instance = XNEW (struct _slp_instance);
1260 SLP_INSTANCE_TREE (new_instance) = node;
1261 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1262 /* Calculate the unrolling factor based on the smallest type in the
1263 loop. */
1264 if (max_nunits > nunits)
1265 unrolling_factor = least_common_multiple (max_nunits, group_size)
1266 / group_size;
1268 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1269 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1270 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1271 SLP_INSTANCE_LOADS (new_instance) = loads;
1272 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1273 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1274 if (VEC_length (slp_tree, loads))
1276 if (!vect_supported_load_permutation_p (new_instance, group_size,
1277 load_permutation))
1279 if (vect_print_dump_info (REPORT_SLP))
1281 fprintf (vect_dump, "Build SLP failed: unsupported load "
1282 "permutation ");
1283 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1286 vect_free_slp_instance (new_instance);
1287 return false;
1290 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1291 = vect_find_first_load_in_slp_instance (new_instance);
1293 else
1294 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1296 if (loop_vinfo)
1297 VEC_safe_push (slp_instance, heap,
1298 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1299 new_instance);
1300 else
1301 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1302 new_instance);
1304 if (vect_print_dump_info (REPORT_SLP))
1305 vect_print_slp_tree (node);
1307 return true;
1310 /* Failed to SLP. */
1311 /* Free the allocated memory. */
1312 vect_free_slp_tree (node);
1313 VEC_free (int, heap, load_permutation);
1314 VEC_free (slp_tree, heap, loads);
1316 return false;
1320 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1321 trees of packed scalar stmts if SLP is possible. */
1323 bool
1324 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1326 unsigned int i;
1327 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1328 gimple first_element;
1329 bool ok = false;
1331 if (vect_print_dump_info (REPORT_SLP))
1332 fprintf (vect_dump, "=== vect_analyze_slp ===");
1334 if (loop_vinfo)
1336 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1337 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1338 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1340 else
1341 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1343 /* Find SLP sequences starting from groups of strided stores. */
1344 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1345 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1346 ok = true;
1348 if (bb_vinfo && !ok)
1350 if (vect_print_dump_info (REPORT_SLP))
1351 fprintf (vect_dump, "Failed to SLP the basic block.");
1353 return false;
1356 if (loop_vinfo
1357 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1359 /* Find SLP sequences starting from reduction chains. */
1360 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1361 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1362 ok = true;
1363 else
1364 return false;
1366 /* Don't try to vectorize SLP reductions if reduction chain was
1367 detected. */
1368 return ok;
1371 /* Find SLP sequences starting from groups of reductions. */
1372 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1373 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1374 VEC_index (gimple, reductions, 0)))
1375 ok = true;
1377 return true;
1381 /* For each possible SLP instance decide whether to SLP it and calculate overall
1382 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1383 least one instance. */
1385 bool
1386 vect_make_slp_decision (loop_vec_info loop_vinfo)
1388 unsigned int i, unrolling_factor = 1;
1389 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1390 slp_instance instance;
1391 int decided_to_slp = 0;
1393 if (vect_print_dump_info (REPORT_SLP))
1394 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1396 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1398 /* FORNOW: SLP if you can. */
1399 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1400 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1402 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1403 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1404 loop-based vectorization. Such stmts will be marked as HYBRID. */
1405 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1406 decided_to_slp++;
1409 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1411 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1412 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1413 decided_to_slp, unrolling_factor);
1415 return (decided_to_slp > 0);
1419 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1420 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1422 static void
1423 vect_detect_hybrid_slp_stmts (slp_tree node)
1425 int i;
1426 gimple stmt;
1427 imm_use_iterator imm_iter;
1428 gimple use_stmt;
1429 stmt_vec_info stmt_vinfo;
1431 if (!node)
1432 return;
1434 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1435 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1436 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1437 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1438 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1439 && !STMT_SLP_TYPE (stmt_vinfo)
1440 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1441 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1442 && !(gimple_code (use_stmt) == GIMPLE_PHI
1443 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1444 == vect_reduction_def))
1445 vect_mark_slp_stmts (node, hybrid, i);
1447 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1448 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1452 /* Find stmts that must be both vectorized and SLPed. */
1454 void
1455 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1457 unsigned int i;
1458 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1459 slp_instance instance;
1461 if (vect_print_dump_info (REPORT_SLP))
1462 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1464 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1465 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1469 /* Create and initialize a new bb_vec_info struct for BB, as well as
1470 stmt_vec_info structs for all the stmts in it. */
1472 static bb_vec_info
1473 new_bb_vec_info (basic_block bb)
1475 bb_vec_info res = NULL;
1476 gimple_stmt_iterator gsi;
1478 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1479 BB_VINFO_BB (res) = bb;
1481 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1483 gimple stmt = gsi_stmt (gsi);
1484 gimple_set_uid (stmt, 0);
1485 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1488 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1489 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1491 bb->aux = res;
1492 return res;
1496 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1497 stmts in the basic block. */
1499 static void
1500 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1502 basic_block bb;
1503 gimple_stmt_iterator si;
1505 if (!bb_vinfo)
1506 return;
1508 bb = BB_VINFO_BB (bb_vinfo);
1510 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1512 gimple stmt = gsi_stmt (si);
1513 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1515 if (stmt_info)
1516 /* Free stmt_vec_info. */
1517 free_stmt_vec_info (stmt);
1520 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1521 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1522 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1523 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1524 free (bb_vinfo);
1525 bb->aux = NULL;
1529 /* Analyze statements contained in SLP tree node after recursively analyzing
1530 the subtree. Return TRUE if the operations are supported. */
1532 static bool
1533 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1535 bool dummy;
1536 int i;
1537 gimple stmt;
1539 if (!node)
1540 return true;
1542 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1543 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1544 return false;
1546 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1548 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1549 gcc_assert (stmt_info);
1550 gcc_assert (PURE_SLP_STMT (stmt_info));
1552 if (!vect_analyze_stmt (stmt, &dummy, node))
1553 return false;
1556 return true;
1560 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1561 operations are supported. */
1563 static bool
1564 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1566 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1567 slp_instance instance;
1568 int i;
1570 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1572 if (!vect_slp_analyze_node_operations (bb_vinfo,
1573 SLP_INSTANCE_TREE (instance)))
1575 vect_free_slp_instance (instance);
1576 VEC_ordered_remove (slp_instance, slp_instances, i);
1578 else
1579 i++;
1582 if (!VEC_length (slp_instance, slp_instances))
1583 return false;
1585 return true;
1588 /* Check if loads and stores are mixed in the basic block (in that
1589 case if we are not sure that the accesses differ, we can't vectorize the
1590 basic block). Also return FALSE in case that there is statement marked as
1591 not vectorizable. */
1593 static bool
1594 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1596 basic_block bb = BB_VINFO_BB (bb_vinfo);
1597 gimple_stmt_iterator si;
1598 bool detected_store = false;
1599 gimple stmt;
1600 struct data_reference *dr;
1602 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1604 stmt = gsi_stmt (si);
1606 /* We can't allow not analyzed statements, since they may contain data
1607 accesses. */
1608 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1609 return false;
1611 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1612 continue;
1614 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1615 if (DR_IS_READ (dr) && detected_store)
1616 return false;
1618 if (!DR_IS_READ (dr))
1619 detected_store = true;
1622 return true;
1625 /* Check if vectorization of the basic block is profitable. */
1627 static bool
1628 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1630 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1631 slp_instance instance;
1632 int i;
1633 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1634 unsigned int stmt_cost;
1635 gimple stmt;
1636 gimple_stmt_iterator si;
1637 basic_block bb = BB_VINFO_BB (bb_vinfo);
1638 stmt_vec_info stmt_info = NULL;
1639 tree dummy_type = NULL;
1640 int dummy = 0;
1642 /* Calculate vector costs. */
1643 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1645 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1646 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1649 /* Calculate scalar cost. */
1650 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1652 stmt = gsi_stmt (si);
1653 stmt_info = vinfo_for_stmt (stmt);
1655 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1656 || !PURE_SLP_STMT (stmt_info))
1657 continue;
1659 if (STMT_VINFO_DATA_REF (stmt_info))
1661 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1662 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1663 (scalar_load, dummy_type, dummy);
1664 else
1665 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1666 (scalar_store, dummy_type, dummy);
1668 else
1669 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1670 (scalar_stmt, dummy_type, dummy);
1672 scalar_cost += stmt_cost;
1675 if (vect_print_dump_info (REPORT_COST))
1677 fprintf (vect_dump, "Cost model analysis: \n");
1678 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1679 vec_inside_cost);
1680 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1681 vec_outside_cost);
1682 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1685 /* Vectorization is profitable if its cost is less than the cost of scalar
1686 version. */
1687 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1688 return false;
1690 return true;
1693 /* Check if the basic block can be vectorized. */
1695 bb_vec_info
1696 vect_slp_analyze_bb (basic_block bb)
1698 bb_vec_info bb_vinfo;
1699 VEC (ddr_p, heap) *ddrs;
1700 VEC (slp_instance, heap) *slp_instances;
1701 slp_instance instance;
1702 int i, insns = 0;
1703 gimple_stmt_iterator gsi;
1704 int min_vf = 2;
1705 int max_vf = MAX_VECTORIZATION_FACTOR;
1706 bool data_dependence_in_bb = false;
1708 current_vector_size = 0;
1710 if (vect_print_dump_info (REPORT_DETAILS))
1711 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1713 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1715 gimple stmt = gsi_stmt (gsi);
1716 if (!is_gimple_debug (stmt)
1717 && !gimple_nop_p (stmt)
1718 && gimple_code (stmt) != GIMPLE_LABEL)
1719 insns++;
1722 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1724 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1725 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1726 "block.\n");
1728 return NULL;
1731 bb_vinfo = new_bb_vec_info (bb);
1732 if (!bb_vinfo)
1733 return NULL;
1735 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1737 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1738 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1739 "block.\n");
1741 destroy_bb_vec_info (bb_vinfo);
1742 return NULL;
1745 ddrs = BB_VINFO_DDRS (bb_vinfo);
1746 if (!VEC_length (ddr_p, ddrs))
1748 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1749 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1750 "block.\n");
1752 destroy_bb_vec_info (bb_vinfo);
1753 return NULL;
1756 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1757 &data_dependence_in_bb)
1758 || min_vf > max_vf
1759 || (data_dependence_in_bb
1760 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1762 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1763 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1764 "in basic block.\n");
1766 destroy_bb_vec_info (bb_vinfo);
1767 return NULL;
1770 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1772 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1773 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1774 "block.\n");
1776 destroy_bb_vec_info (bb_vinfo);
1777 return NULL;
1780 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1782 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1783 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1784 "block.\n");
1786 destroy_bb_vec_info (bb_vinfo);
1787 return NULL;
1790 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1792 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1793 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1794 "block.\n");
1796 destroy_bb_vec_info (bb_vinfo);
1797 return NULL;
1800 /* Check the SLP opportunities in the basic block, analyze and build SLP
1801 trees. */
1802 if (!vect_analyze_slp (NULL, bb_vinfo))
1804 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1805 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1806 "in basic block.\n");
1808 destroy_bb_vec_info (bb_vinfo);
1809 return NULL;
1812 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1814 /* Mark all the statements that we want to vectorize as pure SLP and
1815 relevant. */
1816 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1818 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1819 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1822 if (!vect_slp_analyze_operations (bb_vinfo))
1824 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1825 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1827 destroy_bb_vec_info (bb_vinfo);
1828 return NULL;
1831 /* Cost model: check if the vectorization is worthwhile. */
1832 if (flag_vect_cost_model
1833 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1835 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1836 fprintf (vect_dump, "not vectorized: vectorization is not "
1837 "profitable.\n");
1839 destroy_bb_vec_info (bb_vinfo);
1840 return NULL;
1843 if (vect_print_dump_info (REPORT_DETAILS))
1844 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1846 return bb_vinfo;
1850 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1851 the number of created vector stmts depends on the unrolling factor).
1852 However, the actual number of vector stmts for every SLP node depends on
1853 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1854 should be updated. In this function we assume that the inside costs
1855 calculated in vect_model_xxx_cost are linear in ncopies. */
1857 void
1858 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1860 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1861 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1862 slp_instance instance;
1864 if (vect_print_dump_info (REPORT_SLP))
1865 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1867 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1868 /* We assume that costs are linear in ncopies. */
1869 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1870 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1874 /* For constant and loop invariant defs of SLP_NODE this function returns
1875 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1876 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1877 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1878 REDUC_INDEX is the index of the reduction operand in the statements, unless
1879 it is -1. */
1881 static void
1882 vect_get_constant_vectors (tree op, slp_tree slp_node,
1883 VEC (tree, heap) **vec_oprnds,
1884 unsigned int op_num, unsigned int number_of_vectors,
1885 int reduc_index)
1887 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1888 gimple stmt = VEC_index (gimple, stmts, 0);
1889 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1890 int nunits;
1891 tree vec_cst;
1892 tree t = NULL_TREE;
1893 int j, number_of_places_left_in_vector;
1894 tree vector_type;
1895 tree vop;
1896 int group_size = VEC_length (gimple, stmts);
1897 unsigned int vec_num, i;
1898 int number_of_copies = 1;
1899 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1900 bool constant_p, is_store;
1901 tree neutral_op = NULL;
1902 enum tree_code code = gimple_assign_rhs_code (stmt);
1904 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1906 if (reduc_index == -1)
1908 VEC_free (tree, heap, *vec_oprnds);
1909 return;
1912 op_num = reduc_index - 1;
1913 op = gimple_op (stmt, reduc_index);
1914 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1915 we need either neutral operands or the original operands. See
1916 get_initial_def_for_reduction() for details. */
1917 switch (code)
1919 case WIDEN_SUM_EXPR:
1920 case DOT_PROD_EXPR:
1921 case PLUS_EXPR:
1922 case MINUS_EXPR:
1923 case BIT_IOR_EXPR:
1924 case BIT_XOR_EXPR:
1925 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1926 neutral_op = build_real (TREE_TYPE (op), dconst0);
1927 else
1928 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1930 break;
1932 case MULT_EXPR:
1933 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1934 neutral_op = build_real (TREE_TYPE (op), dconst1);
1935 else
1936 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1938 break;
1940 case BIT_AND_EXPR:
1941 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1942 break;
1944 default:
1945 neutral_op = NULL;
1949 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1951 is_store = true;
1952 op = gimple_assign_rhs1 (stmt);
1954 else
1955 is_store = false;
1957 gcc_assert (op);
1959 if (CONSTANT_CLASS_P (op))
1960 constant_p = true;
1961 else
1962 constant_p = false;
1964 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1965 gcc_assert (vector_type);
1966 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1968 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1969 created vectors. It is greater than 1 if unrolling is performed.
1971 For example, we have two scalar operands, s1 and s2 (e.g., group of
1972 strided accesses of size two), while NUNITS is four (i.e., four scalars
1973 of this type can be packed in a vector). The output vector will contain
1974 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1975 will be 2).
1977 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1978 containing the operands.
1980 For example, NUNITS is four as before, and the group size is 8
1981 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1982 {s5, s6, s7, s8}. */
1984 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1986 number_of_places_left_in_vector = nunits;
1987 for (j = 0; j < number_of_copies; j++)
1989 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1991 if (is_store)
1992 op = gimple_assign_rhs1 (stmt);
1993 else
1994 op = gimple_op (stmt, op_num + 1);
1996 if (reduc_index != -1)
1998 struct loop *loop = (gimple_bb (stmt))->loop_father;
1999 gimple def_stmt = SSA_NAME_DEF_STMT (op);
2001 gcc_assert (loop);
2003 /* Get the def before the loop. In reduction chain we have only
2004 one initial value. */
2005 if ((j != (number_of_copies - 1)
2006 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2007 && i != 0))
2008 && neutral_op)
2009 op = neutral_op;
2010 else
2011 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2012 loop_preheader_edge (loop));
2015 /* Create 'vect_ = {op0,op1,...,opn}'. */
2016 t = tree_cons (NULL_TREE, op, t);
2018 number_of_places_left_in_vector--;
2020 if (number_of_places_left_in_vector == 0)
2022 number_of_places_left_in_vector = nunits;
2024 if (constant_p)
2025 vec_cst = build_vector (vector_type, t);
2026 else
2027 vec_cst = build_constructor_from_list (vector_type, t);
2028 VEC_quick_push (tree, voprnds,
2029 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2030 t = NULL_TREE;
2035 /* Since the vectors are created in the reverse order, we should invert
2036 them. */
2037 vec_num = VEC_length (tree, voprnds);
2038 for (j = vec_num - 1; j >= 0; j--)
2040 vop = VEC_index (tree, voprnds, j);
2041 VEC_quick_push (tree, *vec_oprnds, vop);
2044 VEC_free (tree, heap, voprnds);
2046 /* In case that VF is greater than the unrolling factor needed for the SLP
2047 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2048 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2049 to replicate the vectors. */
2050 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2052 tree neutral_vec = NULL;
2054 if (neutral_op)
2056 if (!neutral_vec)
2057 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2059 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2061 else
2063 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2064 VEC_quick_push (tree, *vec_oprnds, vop);
2070 /* Get vectorized definitions from SLP_NODE that contains corresponding
2071 vectorized def-stmts. */
2073 static void
2074 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2076 tree vec_oprnd;
2077 gimple vec_def_stmt;
2078 unsigned int i;
2080 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2082 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2084 gcc_assert (vec_def_stmt);
2085 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2086 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2091 /* Get vectorized definitions for SLP_NODE.
2092 If the scalar definitions are loop invariants or constants, collect them and
2093 call vect_get_constant_vectors() to create vector stmts.
2094 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2095 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2096 vect_get_slp_vect_defs() to retrieve them.
2097 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2098 the right node. This is used when the second operand must remain scalar. */
2100 void
2101 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2102 VEC (tree,heap) **vec_oprnds0,
2103 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2105 gimple first_stmt;
2106 enum tree_code code;
2107 int number_of_vects;
2108 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2110 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2111 /* The number of vector defs is determined by the number of vector statements
2112 in the node from which we get those statements. */
2113 if (SLP_TREE_LEFT (slp_node))
2114 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2115 else
2117 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2118 /* Number of vector stmts was calculated according to LHS in
2119 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2120 necessary. See vect_get_smallest_scalar_type () for details. */
2121 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2122 &rhs_size_unit);
2123 if (rhs_size_unit != lhs_size_unit)
2125 number_of_vects *= rhs_size_unit;
2126 number_of_vects /= lhs_size_unit;
2130 /* Allocate memory for vectorized defs. */
2131 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2133 /* SLP_NODE corresponds either to a group of stores or to a group of
2134 unary/binary operations. We don't call this function for loads.
2135 For reduction defs we call vect_get_constant_vectors(), since we are
2136 looking for initial loop invariant values. */
2137 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2138 /* The defs are already vectorized. */
2139 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2140 else
2141 /* Build vectors from scalar defs. */
2142 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2143 reduc_index);
2145 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2146 /* Since we don't call this function with loads, this is a group of
2147 stores. */
2148 return;
2150 /* For reductions, we only need initial values. */
2151 if (reduc_index != -1)
2152 return;
2154 code = gimple_assign_rhs_code (first_stmt);
2155 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2156 return;
2158 /* The number of vector defs is determined by the number of vector statements
2159 in the node from which we get those statements. */
2160 if (SLP_TREE_RIGHT (slp_node))
2161 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2162 else
2163 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2165 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2167 if (SLP_TREE_RIGHT (slp_node))
2168 /* The defs are already vectorized. */
2169 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2170 else
2171 /* Build vectors from scalar defs. */
2172 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2173 -1);
2177 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2178 building a vector of type MASK_TYPE from it) and two input vectors placed in
2179 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2180 shifting by STRIDE elements of DR_CHAIN for every copy.
2181 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2182 copies).
2183 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2184 the created stmts must be inserted. */
2186 static inline void
2187 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2188 tree mask, int first_vec_indx, int second_vec_indx,
2189 gimple_stmt_iterator *gsi, slp_tree node,
2190 tree builtin_decl, tree vectype,
2191 VEC(tree,heap) *dr_chain,
2192 int ncopies, int vect_stmts_counter)
2194 tree perm_dest;
2195 gimple perm_stmt = NULL;
2196 stmt_vec_info next_stmt_info;
2197 int i, stride;
2198 tree first_vec, second_vec, data_ref;
2200 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2202 /* Initialize the vect stmts of NODE to properly insert the generated
2203 stmts later. */
2204 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2205 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2206 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2208 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2209 for (i = 0; i < ncopies; i++)
2211 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2212 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2214 /* Generate the permute statement. */
2215 perm_stmt = gimple_build_call (builtin_decl,
2216 3, first_vec, second_vec, mask);
2217 data_ref = make_ssa_name (perm_dest, perm_stmt);
2218 gimple_call_set_lhs (perm_stmt, data_ref);
2219 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2221 /* Store the vector statement in NODE. */
2222 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2223 stride * i + vect_stmts_counter, perm_stmt);
2225 first_vec_indx += stride;
2226 second_vec_indx += stride;
2229 /* Mark the scalar stmt as vectorized. */
2230 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2231 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2235 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2236 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2237 representation. Check that the mask is valid and return FALSE if not.
2238 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2239 the next vector, i.e., the current first vector is not needed. */
2241 static bool
2242 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2243 int mask_nunits, bool only_one_vec, int index,
2244 int *mask, int *current_mask_element,
2245 bool *need_next_vector, int *number_of_mask_fixes,
2246 bool *mask_fixed, bool *needs_first_vector)
2248 int i;
2250 /* Convert to target specific representation. */
2251 *current_mask_element = first_mask_element + m;
2252 /* Adjust the value in case it's a mask for second and third vectors. */
2253 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2255 if (*current_mask_element < mask_nunits)
2256 *needs_first_vector = true;
2258 /* We have only one input vector to permute but the mask accesses values in
2259 the next vector as well. */
2260 if (only_one_vec && *current_mask_element >= mask_nunits)
2262 if (vect_print_dump_info (REPORT_DETAILS))
2264 fprintf (vect_dump, "permutation requires at least two vectors ");
2265 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2268 return false;
2271 /* The mask requires the next vector. */
2272 if (*current_mask_element >= mask_nunits * 2)
2274 if (*needs_first_vector || *mask_fixed)
2276 /* We either need the first vector too or have already moved to the
2277 next vector. In both cases, this permutation needs three
2278 vectors. */
2279 if (vect_print_dump_info (REPORT_DETAILS))
2281 fprintf (vect_dump, "permutation requires at "
2282 "least three vectors ");
2283 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2286 return false;
2289 /* We move to the next vector, dropping the first one and working with
2290 the second and the third - we need to adjust the values of the mask
2291 accordingly. */
2292 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2294 for (i = 0; i < index; i++)
2295 mask[i] -= mask_nunits * *number_of_mask_fixes;
2297 (*number_of_mask_fixes)++;
2298 *mask_fixed = true;
2301 *need_next_vector = *mask_fixed;
2303 /* This was the last element of this mask. Start a new one. */
2304 if (index == mask_nunits - 1)
2306 *number_of_mask_fixes = 1;
2307 *mask_fixed = false;
2308 *needs_first_vector = false;
2311 return true;
2315 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2316 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2317 permute statements for SLP_NODE_INSTANCE. */
2318 bool
2319 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2320 gimple_stmt_iterator *gsi, int vf,
2321 slp_instance slp_node_instance, bool analyze_only)
2323 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2324 tree mask_element_type = NULL_TREE, mask_type;
2325 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2326 slp_tree node;
2327 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2328 gimple next_scalar_stmt;
2329 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2330 int first_mask_element;
2331 int index, unroll_factor, *mask, current_mask_element, ncopies;
2332 bool only_one_vec = false, need_next_vector = false;
2333 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2334 int number_of_mask_fixes = 1;
2335 bool mask_fixed = false;
2336 bool needs_first_vector = false;
2338 if (!targetm.vectorize.builtin_vec_perm)
2340 if (vect_print_dump_info (REPORT_DETAILS))
2342 fprintf (vect_dump, "no builtin for vect permute for ");
2343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2346 return false;
2349 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2350 &mask_element_type);
2351 if (!builtin_decl || !mask_element_type)
2353 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "no builtin for vect permute for ");
2356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2359 return false;
2362 mask_type = get_vectype_for_scalar_type (mask_element_type);
2363 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2364 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2365 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2366 scale = mask_nunits / nunits;
2367 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2369 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2370 unrolling factor. */
2371 orig_vec_stmts_num = group_size *
2372 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2373 if (orig_vec_stmts_num == 1)
2374 only_one_vec = true;
2376 /* Number of copies is determined by the final vectorization factor
2377 relatively to SLP_NODE_INSTANCE unrolling factor. */
2378 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2380 /* Generate permutation masks for every NODE. Number of masks for each NODE
2381 is equal to GROUP_SIZE.
2382 E.g., we have a group of three nodes with three loads from the same
2383 location in each node, and the vector size is 4. I.e., we have a
2384 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2385 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2386 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2389 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2390 scpecific type, e.g., in bytes for Altivec.
2391 The last mask is illegal since we assume two operands for permute
2392 operation, and the mask element values can't be outside that range.
2393 Hence, the last mask must be converted into {2,5,5,5}.
2394 For the first two permutations we need the first and the second input
2395 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2396 we need the second and the third vectors: {b1,c1,a2,b2} and
2397 {c2,a3,b3,c3}. */
2399 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2401 scalar_index = 0;
2402 index = 0;
2403 vect_stmts_counter = 0;
2404 vec_index = 0;
2405 first_vec_index = vec_index++;
2406 if (only_one_vec)
2407 second_vec_index = first_vec_index;
2408 else
2409 second_vec_index = vec_index++;
2411 for (j = 0; j < unroll_factor; j++)
2413 for (k = 0; k < group_size; k++)
2415 first_mask_element = (i + j * group_size) * scale;
2416 for (m = 0; m < scale; m++)
2418 if (!vect_get_mask_element (stmt, first_mask_element, m,
2419 mask_nunits, only_one_vec, index, mask,
2420 &current_mask_element, &need_next_vector,
2421 &number_of_mask_fixes, &mask_fixed,
2422 &needs_first_vector))
2423 return false;
2425 mask[index++] = current_mask_element;
2428 if (index == mask_nunits)
2430 tree mask_vec = NULL;
2432 while (--index >= 0)
2434 tree t = build_int_cst (mask_element_type, mask[index]);
2435 mask_vec = tree_cons (NULL, t, mask_vec);
2437 mask_vec = build_vector (mask_type, mask_vec);
2438 index = 0;
2440 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2441 mask_vec))
2443 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "unsupported vect permute ");
2446 print_generic_expr (vect_dump, mask_vec, 0);
2448 free (mask);
2449 return false;
2452 if (!analyze_only)
2454 if (need_next_vector)
2456 first_vec_index = second_vec_index;
2457 second_vec_index = vec_index;
2460 next_scalar_stmt = VEC_index (gimple,
2461 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2463 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2464 mask_vec, first_vec_index, second_vec_index,
2465 gsi, node, builtin_decl, vectype, dr_chain,
2466 ncopies, vect_stmts_counter++);
2473 free (mask);
2474 return true;
2479 /* Vectorize SLP instance tree in postorder. */
2481 static bool
2482 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2483 unsigned int vectorization_factor)
2485 gimple stmt;
2486 bool strided_store, is_store;
2487 gimple_stmt_iterator si;
2488 stmt_vec_info stmt_info;
2489 unsigned int vec_stmts_size, nunits, group_size;
2490 tree vectype;
2491 int i;
2492 slp_tree loads_node;
2494 if (!node)
2495 return false;
2497 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2498 vectorization_factor);
2499 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2500 vectorization_factor);
2502 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2503 stmt_info = vinfo_for_stmt (stmt);
2505 /* VECTYPE is the type of the destination. */
2506 vectype = STMT_VINFO_VECTYPE (stmt_info);
2507 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2508 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2510 /* For each SLP instance calculate number of vector stmts to be created
2511 for the scalar stmts in each node of the SLP tree. Number of vector
2512 elements in one vector iteration is the number of scalar elements in
2513 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2514 size. */
2515 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2517 /* In case of load permutation we have to allocate vectorized statements for
2518 all the nodes that participate in that permutation. */
2519 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2521 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2523 if (!SLP_TREE_VEC_STMTS (loads_node))
2525 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2526 vec_stmts_size);
2527 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2532 if (!SLP_TREE_VEC_STMTS (node))
2534 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2535 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2538 if (vect_print_dump_info (REPORT_DETAILS))
2540 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2541 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2544 /* Loads should be inserted before the first load. */
2545 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2546 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2547 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2548 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2549 else
2550 si = gsi_for_stmt (stmt);
2552 /* Stores should be inserted just before the last store. */
2553 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2554 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2556 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2557 si = gsi_for_stmt (last_store);
2560 /* Mark the first element of the reduction chain as reduction to properly
2561 transform the node. In the analysis phase only the last element of the
2562 chain is marked as reduction. */
2563 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2564 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2566 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2567 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2570 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2571 return is_store;
2575 /* Generate vector code for all SLP instances in the loop/basic block. */
2577 bool
2578 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2580 VEC (slp_instance, heap) *slp_instances;
2581 slp_instance instance;
2582 unsigned int i, vf;
2583 bool is_store = false;
2585 if (loop_vinfo)
2587 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2588 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2590 else
2592 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2593 vf = 1;
2596 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2598 /* Schedule the tree of INSTANCE. */
2599 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2600 instance, vf);
2601 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2602 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2603 fprintf (vect_dump, "vectorizing stmts using SLP.");
2606 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2608 slp_tree root = SLP_INSTANCE_TREE (instance);
2609 gimple store;
2610 unsigned int j;
2611 gimple_stmt_iterator gsi;
2613 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2614 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2616 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2617 break;
2619 /* Free the attached stmt_vec_info and remove the stmt. */
2620 gsi = gsi_for_stmt (store);
2621 gsi_remove (&gsi, true);
2622 free_stmt_vec_info (store);
2626 return is_store;
2630 /* Vectorize the basic block. */
2632 void
2633 vect_slp_transform_bb (basic_block bb)
2635 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2636 gimple_stmt_iterator si;
2638 gcc_assert (bb_vinfo);
2640 if (vect_print_dump_info (REPORT_DETAILS))
2641 fprintf (vect_dump, "SLPing BB\n");
2643 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2645 gimple stmt = gsi_stmt (si);
2646 stmt_vec_info stmt_info;
2648 if (vect_print_dump_info (REPORT_DETAILS))
2650 fprintf (vect_dump, "------>SLPing statement: ");
2651 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2654 stmt_info = vinfo_for_stmt (stmt);
2655 gcc_assert (stmt_info);
2657 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2658 if (STMT_SLP_TYPE (stmt_info))
2660 vect_schedule_slp (NULL, bb_vinfo);
2661 break;
2665 mark_sym_for_renaming (gimple_vop (cfun));
2666 /* The memory tags and pointers in vectorized statements need to
2667 have their SSA forms updated. FIXME, why can't this be delayed
2668 until all the loops have been transformed? */
2669 update_ssa (TODO_update_ssa);
2671 if (vect_print_dump_info (REPORT_DETAILS))
2672 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2674 destroy_bb_vec_info (bb_vinfo);