Fix a bug that broke -freorder-functions
[official-gcc.git] / gcc / tree-vect-slp.c
blob60bc475c75ab435cfe666fb2282cb7919ff2b4bb
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
159 if (!*first_stmt_dt0)
160 *pattern0 = true;
161 else
163 if (i == 1 && !*first_stmt_dt1)
164 *pattern1 = true;
165 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
167 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
174 return false;
178 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
179 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
181 if (*dt == vect_unknown_def_type)
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "Unsupported pattern.");
185 return false;
188 switch (gimple_code (def_stmt))
190 case GIMPLE_PHI:
191 def = gimple_phi_result (def_stmt);
192 break;
194 case GIMPLE_ASSIGN:
195 def = gimple_assign_lhs (def_stmt);
196 break;
198 default:
199 if (vect_print_dump_info (REPORT_DETAILS))
200 fprintf (vect_dump, "unsupported defining stmt: ");
201 return false;
205 if (!*first_stmt_dt0)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0 = dt[i];
209 if (def)
210 *first_stmt_def0_type = TREE_TYPE (def);
211 else
212 *first_stmt_const_oprnd = oprnd;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class != GIMPLE_SINGLE_RHS)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
218 else
219 /* Store. */
220 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
221 dt[0], slp_node);
224 else
226 if (!*first_stmt_dt1 && i == 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1 = dt[i];
230 if (def)
231 *first_stmt_def1_type = TREE_TYPE (def);
232 else
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd)
238 if (vect_print_dump_info (REPORT_SLP))
239 fprintf (vect_dump, "Build SLP failed: two constant "
240 "oprnds in stmt");
241 return false;
243 *first_stmt_const_oprnd = oprnd;
246 else
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
253 if ((i == 0
254 && ((*first_stmt_dt0 != dt[i]
255 && !(*first_stmt_dt0 == vect_reduction_def
256 && dt[i] == vect_internal_def))
257 || (*first_stmt_def0_type && def
258 && !types_compatible_p (*first_stmt_def0_type,
259 TREE_TYPE (def)))))
260 || (i == 1
261 && ((*first_stmt_dt1 != dt[i]
262 && !(*first_stmt_dt1 == vect_reduction_def
263 && dt[i] == vect_internal_def))
264 || (*first_stmt_def1_type && def
265 && !types_compatible_p (*first_stmt_def1_type,
266 TREE_TYPE (def)))))
267 || (!def
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
269 TREE_TYPE (oprnd))))
271 if (vect_print_dump_info (REPORT_SLP))
272 fprintf (vect_dump, "Build SLP failed: different types ");
274 return false;
279 /* Check the types of the definitions. */
280 switch (dt[i])
282 case vect_constant_def:
283 case vect_external_def:
284 break;
286 case vect_internal_def:
287 case vect_reduction_def:
288 if (i == 0)
289 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
290 else
291 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
292 break;
294 default:
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump, def, TDF_SLIM);
302 return false;
306 return true;
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
313 TRUE. */
315 static bool
316 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
317 slp_tree *node, unsigned int group_size,
318 int *inside_cost, int *outside_cost,
319 int ncopies_for_cost, unsigned int *max_nunits,
320 VEC (int, heap) **load_permutation,
321 VEC (slp_tree, heap) **loads,
322 unsigned int vectorization_factor)
324 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
325 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
326 unsigned int i;
327 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
328 gimple stmt = VEC_index (gimple, stmts, 0);
329 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
330 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
331 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
332 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
333 tree lhs;
334 bool stop_recursion = false, need_same_oprnds = false;
335 tree vectype, scalar_type, first_op1 = NULL_TREE;
336 unsigned int ncopies;
337 optab optab;
338 int icode;
339 enum machine_mode optab_op2_mode;
340 enum machine_mode vec_mode;
341 tree first_stmt_const_oprnd = NULL_TREE;
342 struct data_reference *first_dr;
343 bool pattern0 = false, pattern1 = false;
344 HOST_WIDE_INT dummy;
345 bool permutation = false;
346 unsigned int load_place;
347 gimple first_load, prev_first_load = NULL;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
352 if (vect_print_dump_info (REPORT_SLP))
354 fprintf (vect_dump, "Build SLP for ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
361 if (vect_print_dump_info (REPORT_SLP))
363 fprintf (vect_dump,
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
368 return false;
371 lhs = gimple_get_lhs (stmt);
372 if (lhs == NULL_TREE)
374 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump,
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
381 return false;
384 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
385 vectype = get_vectype_for_scalar_type (scalar_type);
386 if (!vectype)
388 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
393 return false;
396 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
397 if (ncopies != 1)
399 if (vect_print_dump_info (REPORT_SLP))
400 fprintf (vect_dump, "SLP with multiple types ");
402 /* FORNOW: multiple types are unsupported in BB SLP. */
403 if (bb_vinfo)
404 return false;
407 /* In case of multiple types we need to detect the smallest type. */
408 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
409 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
411 if (is_gimple_call (stmt))
412 rhs_code = CALL_EXPR;
413 else
414 rhs_code = gimple_assign_rhs_code (stmt);
416 /* Check the operation. */
417 if (i == 0)
419 first_stmt_code = rhs_code;
421 /* Shift arguments should be equal in all the packed stmts for a
422 vector shift with scalar shift operand. */
423 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
424 || rhs_code == LROTATE_EXPR
425 || rhs_code == RROTATE_EXPR)
427 vec_mode = TYPE_MODE (vectype);
429 /* First see if we have a vector/vector shift. */
430 optab = optab_for_tree_code (rhs_code, vectype,
431 optab_vector);
433 if (!optab
434 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
436 /* No vector/vector shift, try for a vector/scalar shift. */
437 optab = optab_for_tree_code (rhs_code, vectype,
438 optab_scalar);
440 if (!optab)
442 if (vect_print_dump_info (REPORT_SLP))
443 fprintf (vect_dump, "Build SLP failed: no optab.");
444 return false;
446 icode = (int) optab_handler (optab, vec_mode);
447 if (icode == CODE_FOR_nothing)
449 if (vect_print_dump_info (REPORT_SLP))
450 fprintf (vect_dump, "Build SLP failed: "
451 "op not supported by target.");
452 return false;
454 optab_op2_mode = insn_data[icode].operand[2].mode;
455 if (!VECTOR_MODE_P (optab_op2_mode))
457 need_same_oprnds = true;
458 first_op1 = gimple_assign_rhs2 (stmt);
463 else
465 if (first_stmt_code != rhs_code
466 && (first_stmt_code != IMAGPART_EXPR
467 || rhs_code != REALPART_EXPR)
468 && (first_stmt_code != REALPART_EXPR
469 || rhs_code != IMAGPART_EXPR)
470 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
471 && (first_stmt_code == ARRAY_REF
472 || first_stmt_code == INDIRECT_REF
473 || first_stmt_code == COMPONENT_REF
474 || first_stmt_code == MEM_REF)))
476 if (vect_print_dump_info (REPORT_SLP))
478 fprintf (vect_dump,
479 "Build SLP failed: different operation in stmt ");
480 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
483 return false;
486 if (need_same_oprnds
487 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
489 if (vect_print_dump_info (REPORT_SLP))
491 fprintf (vect_dump,
492 "Build SLP failed: different shift arguments in ");
493 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
496 return false;
500 /* Strided store or load. */
501 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
503 if (REFERENCE_CLASS_P (lhs))
505 /* Store. */
506 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
507 stmt, &def_stmts0, &def_stmts1,
508 &first_stmt_dt0,
509 &first_stmt_dt1,
510 &first_stmt_def0_type,
511 &first_stmt_def1_type,
512 &first_stmt_const_oprnd,
513 ncopies_for_cost,
514 &pattern0, &pattern1))
515 return false;
517 else
519 /* Load. */
520 /* FORNOW: Check that there is no gap between the loads. */
521 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
522 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
523 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
524 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
526 if (vect_print_dump_info (REPORT_SLP))
528 fprintf (vect_dump, "Build SLP failed: strided "
529 "loads have gaps ");
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
533 return false;
536 /* Check that the size of interleaved loads group is not
537 greater than the SLP group size. */
538 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
540 if (vect_print_dump_info (REPORT_SLP))
542 fprintf (vect_dump, "Build SLP failed: the number of "
543 "interleaved loads is greater than"
544 " the SLP group size ");
545 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
548 return false;
551 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
552 if (prev_first_load)
554 /* Check that there are no loads from different interleaving
555 chains in the same node. The only exception is complex
556 numbers. */
557 if (prev_first_load != first_load
558 && rhs_code != REALPART_EXPR
559 && rhs_code != IMAGPART_EXPR)
561 if (vect_print_dump_info (REPORT_SLP))
563 fprintf (vect_dump, "Build SLP failed: different "
564 "interleaving chains in one node ");
565 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
568 return false;
571 else
572 prev_first_load = first_load;
574 if (first_load == stmt)
576 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
577 if (vect_supportable_dr_alignment (first_dr, false)
578 == dr_unaligned_unsupported)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: unsupported "
583 "unaligned load ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
587 return false;
590 /* Analyze costs (for the first stmt in the group). */
591 vect_model_load_cost (vinfo_for_stmt (stmt),
592 ncopies_for_cost, false, *node);
595 /* Store the place of this load in the interleaving chain. In
596 case that permutation is needed we later decide if a specific
597 permutation is supported. */
598 load_place = vect_get_place_in_interleaving_chain (stmt,
599 first_load);
600 if (load_place != i)
601 permutation = true;
603 VEC_safe_push (int, heap, *load_permutation, load_place);
605 /* We stop the tree when we reach a group of loads. */
606 stop_recursion = true;
607 continue;
609 } /* Strided access. */
610 else
612 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
614 /* Not strided load. */
615 if (vect_print_dump_info (REPORT_SLP))
617 fprintf (vect_dump, "Build SLP failed: not strided load ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 /* FORNOW: Not strided loads are not supported. */
622 return false;
625 /* Not memory operation. */
626 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
627 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
629 if (vect_print_dump_info (REPORT_SLP))
631 fprintf (vect_dump, "Build SLP failed: operation");
632 fprintf (vect_dump, " unsupported ");
633 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
636 return false;
639 /* Find the def-stmts. */
640 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
641 &def_stmts0, &def_stmts1,
642 &first_stmt_dt0, &first_stmt_dt1,
643 &first_stmt_def0_type,
644 &first_stmt_def1_type,
645 &first_stmt_const_oprnd,
646 ncopies_for_cost,
647 &pattern0, &pattern1))
648 return false;
652 /* Add the costs of the node to the overall instance costs. */
653 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
654 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
656 /* Strided loads were reached - stop the recursion. */
657 if (stop_recursion)
659 if (permutation)
661 VEC_safe_push (slp_tree, heap, *loads, *node);
662 *inside_cost
663 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
664 * group_size;
666 else
668 /* We don't check here complex numbers chains, so we keep them in
669 LOADS for further check in vect_supported_load_permutation_p. */
670 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
671 VEC_safe_push (slp_tree, heap, *loads, *node);
674 return true;
677 /* Create SLP_TREE nodes for the definition node/s. */
678 if (first_stmt_dt0 == vect_internal_def)
680 slp_tree left_node = XNEW (struct _slp_tree);
681 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
682 SLP_TREE_VEC_STMTS (left_node) = NULL;
683 SLP_TREE_LEFT (left_node) = NULL;
684 SLP_TREE_RIGHT (left_node) = NULL;
685 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
686 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
687 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
688 inside_cost, outside_cost, ncopies_for_cost,
689 max_nunits, load_permutation, loads,
690 vectorization_factor))
691 return false;
693 SLP_TREE_LEFT (*node) = left_node;
696 if (first_stmt_dt1 == vect_internal_def)
698 slp_tree right_node = XNEW (struct _slp_tree);
699 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
700 SLP_TREE_VEC_STMTS (right_node) = NULL;
701 SLP_TREE_LEFT (right_node) = NULL;
702 SLP_TREE_RIGHT (right_node) = NULL;
703 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
704 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
705 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
706 inside_cost, outside_cost, ncopies_for_cost,
707 max_nunits, load_permutation, loads,
708 vectorization_factor))
709 return false;
711 SLP_TREE_RIGHT (*node) = right_node;
714 return true;
718 static void
719 vect_print_slp_tree (slp_tree node)
721 int i;
722 gimple stmt;
724 if (!node)
725 return;
727 fprintf (vect_dump, "node ");
728 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
730 fprintf (vect_dump, "\n\tstmt %d ", i);
731 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
733 fprintf (vect_dump, "\n");
735 vect_print_slp_tree (SLP_TREE_LEFT (node));
736 vect_print_slp_tree (SLP_TREE_RIGHT (node));
740 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
741 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
742 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
743 stmts in NODE are to be marked. */
745 static void
746 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
748 int i;
749 gimple stmt;
751 if (!node)
752 return;
754 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
755 if (j < 0 || i == j)
756 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
758 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
759 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
763 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
765 static void
766 vect_mark_slp_stmts_relevant (slp_tree node)
768 int i;
769 gimple stmt;
770 stmt_vec_info stmt_info;
772 if (!node)
773 return;
775 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
777 stmt_info = vinfo_for_stmt (stmt);
778 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
779 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
780 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
783 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
784 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
788 /* Check if the permutation required by the SLP INSTANCE is supported.
789 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
791 static bool
792 vect_supported_slp_permutation_p (slp_instance instance)
794 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
795 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
796 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
797 VEC (slp_tree, heap) *sorted_loads = NULL;
798 int index;
799 slp_tree *tmp_loads = NULL;
800 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
801 slp_tree load;
803 /* FORNOW: The only supported loads permutation is loads from the same
804 location in all the loads in the node, when the data-refs in
805 nodes of LOADS constitute an interleaving chain.
806 Sort the nodes according to the order of accesses in the chain. */
807 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
808 for (i = 0, j = 0;
809 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
810 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
811 i += group_size, j++)
813 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
814 /* Check that the loads are all in the same interleaving chain. */
815 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
817 if (vect_print_dump_info (REPORT_DETAILS))
819 fprintf (vect_dump, "Build SLP failed: unsupported data "
820 "permutation ");
821 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
824 free (tmp_loads);
825 return false;
828 tmp_loads[index] = load;
831 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
832 for (i = 0; i < group_size; i++)
833 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
835 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
836 SLP_INSTANCE_LOADS (instance) = sorted_loads;
837 free (tmp_loads);
839 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
840 SLP_INSTANCE_UNROLLING_FACTOR (instance),
841 instance, true))
842 return false;
844 return true;
848 /* Rearrange the statements of NODE according to PERMUTATION. */
850 static void
851 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
852 VEC (int, heap) *permutation)
854 gimple stmt;
855 VEC (gimple, heap) *tmp_stmts;
856 unsigned int index, i;
858 if (!node)
859 return;
861 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
862 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
864 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
865 tmp_stmts = VEC_alloc (gimple, heap, group_size);
867 for (i = 0; i < group_size; i++)
868 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
870 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
872 index = VEC_index (int, permutation, i);
873 VEC_replace (gimple, tmp_stmts, index, stmt);
876 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
877 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
881 /* Check if the required load permutation is supported.
882 LOAD_PERMUTATION contains a list of indices of the loads.
883 In SLP this permutation is relative to the order of strided stores that are
884 the base of the SLP instance. */
886 static bool
887 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
888 VEC (int, heap) *load_permutation)
890 int i = 0, j, prev = -1, next, k, number_of_groups;
891 bool supported, bad_permutation = false;
892 sbitmap load_index;
893 slp_tree node, other_complex_node;
894 gimple stmt, first = NULL, other_node_first;
895 unsigned complex_numbers = 0;
897 /* FORNOW: permutations are only supported in SLP. */
898 if (!slp_instn)
899 return false;
901 if (vect_print_dump_info (REPORT_SLP))
903 fprintf (vect_dump, "Load permutation ");
904 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
905 fprintf (vect_dump, "%d ", next);
908 /* In case of reduction every load permutation is allowed, since the order
909 of the reduction statements is not important (as opposed to the case of
910 strided stores). The only condition we need to check is that all the
911 load nodes are of the same size and have the same permutation (and then
912 rearrange all the nodes of the SLP instance according to this
913 permutation). */
915 /* Check that all the load nodes are of the same size. */
916 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
918 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
919 != (unsigned) group_size)
920 return false;
922 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
923 if (is_gimple_assign (stmt)
924 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
925 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
926 complex_numbers++;
929 /* Complex operands can be swapped as following:
930 real_c = real_b + real_a;
931 imag_c = imag_a + imag_b;
932 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
933 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
934 chains are mixed, they match the above pattern. */
935 if (complex_numbers)
937 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
939 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
941 if (j == 0)
942 first = stmt;
943 else
945 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
947 if (complex_numbers != 2)
948 return false;
950 if (i == 0)
951 k = 1;
952 else
953 k = 0;
955 other_complex_node = VEC_index (slp_tree,
956 SLP_INSTANCE_LOADS (slp_instn), k);
957 other_node_first = VEC_index (gimple,
958 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
960 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
961 != other_node_first)
962 return false;
969 /* We checked that this case ok, so there is no need to proceed with
970 permutation tests. */
971 if (complex_numbers == 2)
973 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
974 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
975 return true;
978 node = SLP_INSTANCE_TREE (slp_instn);
979 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
980 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
981 instance, not all the loads belong to the same node or interleaving
982 group. Hence, we need to divide them into groups according to
983 GROUP_SIZE. */
984 number_of_groups = VEC_length (int, load_permutation) / group_size;
986 /* Reduction (there are no data-refs in the root).
987 In reduction chain the order of the loads is important. */
988 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
989 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
991 int first_group_load_index;
993 /* Compare all the permutation sequences to the first one. */
994 for (i = 1; i < number_of_groups; i++)
996 k = 0;
997 for (j = i * group_size; j < i * group_size + group_size; j++)
999 next = VEC_index (int, load_permutation, j);
1000 first_group_load_index = VEC_index (int, load_permutation, k);
1002 if (next != first_group_load_index)
1004 bad_permutation = true;
1005 break;
1008 k++;
1011 if (bad_permutation)
1012 break;
1015 if (!bad_permutation)
1017 /* Check that the loads in the first sequence are different and there
1018 are no gaps between them. */
1019 load_index = sbitmap_alloc (group_size);
1020 sbitmap_zero (load_index);
1021 for (k = 0; k < group_size; k++)
1023 first_group_load_index = VEC_index (int, load_permutation, k);
1024 if (TEST_BIT (load_index, first_group_load_index))
1026 bad_permutation = true;
1027 break;
1030 SET_BIT (load_index, first_group_load_index);
1033 if (!bad_permutation)
1034 for (k = 0; k < group_size; k++)
1035 if (!TEST_BIT (load_index, k))
1037 bad_permutation = true;
1038 break;
1041 sbitmap_free (load_index);
1044 if (!bad_permutation)
1046 /* This permutation is valid for reduction. Since the order of the
1047 statements in the nodes is not important unless they are memory
1048 accesses, we can rearrange the statements in all the nodes
1049 according to the order of the loads. */
1050 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1051 load_permutation);
1052 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1053 return true;
1057 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1058 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1059 well (unless it's reduction). */
1060 if (VEC_length (int, load_permutation)
1061 != (unsigned int) (group_size * group_size))
1062 return false;
1064 supported = true;
1065 load_index = sbitmap_alloc (group_size);
1066 sbitmap_zero (load_index);
1067 for (j = 0; j < group_size; j++)
1069 for (i = j * group_size, k = 0;
1070 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1071 i++, k++)
1073 if (i != j * group_size && next != prev)
1075 supported = false;
1076 break;
1079 prev = next;
1082 if (TEST_BIT (load_index, prev))
1084 supported = false;
1085 break;
1088 SET_BIT (load_index, prev);
1091 for (j = 0; j < group_size; j++)
1092 if (!TEST_BIT (load_index, j))
1093 return false;
1095 sbitmap_free (load_index);
1097 if (supported && i == group_size * group_size
1098 && vect_supported_slp_permutation_p (slp_instn))
1099 return true;
1101 return false;
1105 /* Find the first load in the loop that belongs to INSTANCE.
1106 When loads are in several SLP nodes, there can be a case in which the first
1107 load does not appear in the first SLP node to be transformed, causing
1108 incorrect order of statements. Since we generate all the loads together,
1109 they must be inserted before the first load of the SLP instance and not
1110 before the first load of the first node of the instance. */
1112 static gimple
1113 vect_find_first_load_in_slp_instance (slp_instance instance)
1115 int i, j;
1116 slp_tree load_node;
1117 gimple first_load = NULL, load;
1119 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1120 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1121 first_load = get_earlier_stmt (load, first_load);
1123 return first_load;
1127 /* Find the last store in SLP INSTANCE. */
1129 static gimple
1130 vect_find_last_store_in_slp_instance (slp_instance instance)
1132 int i;
1133 slp_tree node;
1134 gimple last_store = NULL, store;
1136 node = SLP_INSTANCE_TREE (instance);
1137 for (i = 0;
1138 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1139 i++)
1140 last_store = get_later_stmt (store, last_store);
1142 return last_store;
1146 /* Analyze an SLP instance starting from a group of strided stores. Call
1147 vect_build_slp_tree to build a tree of packed stmts if possible.
1148 Return FALSE if it's impossible to SLP any stmt in the loop. */
1150 static bool
1151 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1152 gimple stmt)
1154 slp_instance new_instance;
1155 slp_tree node = XNEW (struct _slp_tree);
1156 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1157 unsigned int unrolling_factor = 1, nunits;
1158 tree vectype, scalar_type = NULL_TREE;
1159 gimple next;
1160 unsigned int vectorization_factor = 0;
1161 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1162 unsigned int max_nunits = 0;
1163 VEC (int, heap) *load_permutation;
1164 VEC (slp_tree, heap) *loads;
1165 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1167 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1169 if (dr)
1171 scalar_type = TREE_TYPE (DR_REF (dr));
1172 vectype = get_vectype_for_scalar_type (scalar_type);
1174 else
1176 gcc_assert (loop_vinfo);
1177 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1180 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1182 else
1184 gcc_assert (loop_vinfo);
1185 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1186 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1189 if (!vectype)
1191 if (vect_print_dump_info (REPORT_SLP))
1193 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1194 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1197 return false;
1200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1201 if (loop_vinfo)
1202 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1203 else
1204 /* No multitypes in BB SLP. */
1205 vectorization_factor = nunits;
1207 /* Calculate the unrolling factor. */
1208 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1209 if (unrolling_factor != 1 && !loop_vinfo)
1211 if (vect_print_dump_info (REPORT_SLP))
1212 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1213 " block SLP");
1215 return false;
1218 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1219 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1220 next = stmt;
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1223 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1224 while (next)
1226 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1227 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1230 else
1232 /* Collect reduction statements. */
1233 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1234 next);
1235 i++)
1236 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1239 SLP_TREE_VEC_STMTS (node) = NULL;
1240 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1241 SLP_TREE_LEFT (node) = NULL;
1242 SLP_TREE_RIGHT (node) = NULL;
1243 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1244 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1246 /* Calculate the number of vector stmts to create based on the unrolling
1247 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1248 GROUP_SIZE / NUNITS otherwise. */
1249 ncopies_for_cost = unrolling_factor * group_size / nunits;
1251 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1252 loads = VEC_alloc (slp_tree, heap, group_size);
1254 /* Build the tree for the SLP instance. */
1255 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1256 &inside_cost, &outside_cost, ncopies_for_cost,
1257 &max_nunits, &load_permutation, &loads,
1258 vectorization_factor))
1260 /* Create a new SLP instance. */
1261 new_instance = XNEW (struct _slp_instance);
1262 SLP_INSTANCE_TREE (new_instance) = node;
1263 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1264 /* Calculate the unrolling factor based on the smallest type in the
1265 loop. */
1266 if (max_nunits > nunits)
1267 unrolling_factor = least_common_multiple (max_nunits, group_size)
1268 / group_size;
1270 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1271 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1272 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1273 SLP_INSTANCE_LOADS (new_instance) = loads;
1274 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1275 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1276 if (VEC_length (slp_tree, loads))
1278 if (!vect_supported_load_permutation_p (new_instance, group_size,
1279 load_permutation))
1281 if (vect_print_dump_info (REPORT_SLP))
1283 fprintf (vect_dump, "Build SLP failed: unsupported load "
1284 "permutation ");
1285 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1288 vect_free_slp_instance (new_instance);
1289 return false;
1292 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1293 = vect_find_first_load_in_slp_instance (new_instance);
1295 else
1296 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1298 if (loop_vinfo)
1299 VEC_safe_push (slp_instance, heap,
1300 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1301 new_instance);
1302 else
1303 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1304 new_instance);
1306 if (vect_print_dump_info (REPORT_SLP))
1307 vect_print_slp_tree (node);
1309 return true;
1312 /* Failed to SLP. */
1313 /* Free the allocated memory. */
1314 vect_free_slp_tree (node);
1315 VEC_free (int, heap, load_permutation);
1316 VEC_free (slp_tree, heap, loads);
1318 return false;
1322 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1323 trees of packed scalar stmts if SLP is possible. */
1325 bool
1326 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1328 unsigned int i;
1329 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1330 gimple first_element;
1331 bool ok = false;
1333 if (vect_print_dump_info (REPORT_SLP))
1334 fprintf (vect_dump, "=== vect_analyze_slp ===");
1336 if (loop_vinfo)
1338 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1339 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1340 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1342 else
1343 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1345 /* Find SLP sequences starting from groups of strided stores. */
1346 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1347 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1348 ok = true;
1350 if (bb_vinfo && !ok)
1352 if (vect_print_dump_info (REPORT_SLP))
1353 fprintf (vect_dump, "Failed to SLP the basic block.");
1355 return false;
1358 if (loop_vinfo
1359 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1361 /* Find SLP sequences starting from reduction chains. */
1362 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1363 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1364 ok = true;
1365 else
1366 return false;
1368 /* Don't try to vectorize SLP reductions if reduction chain was
1369 detected. */
1370 return ok;
1373 /* Find SLP sequences starting from groups of reductions. */
1374 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1375 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1376 VEC_index (gimple, reductions, 0)))
1377 ok = true;
1379 return true;
1383 /* For each possible SLP instance decide whether to SLP it and calculate overall
1384 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1385 least one instance. */
1387 bool
1388 vect_make_slp_decision (loop_vec_info loop_vinfo)
1390 unsigned int i, unrolling_factor = 1;
1391 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1392 slp_instance instance;
1393 int decided_to_slp = 0;
1395 if (vect_print_dump_info (REPORT_SLP))
1396 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1398 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1400 /* FORNOW: SLP if you can. */
1401 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1402 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1404 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1405 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1406 loop-based vectorization. Such stmts will be marked as HYBRID. */
1407 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1408 decided_to_slp++;
1411 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1413 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1414 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1415 decided_to_slp, unrolling_factor);
1417 return (decided_to_slp > 0);
1421 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1422 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1424 static void
1425 vect_detect_hybrid_slp_stmts (slp_tree node)
1427 int i;
1428 gimple stmt;
1429 imm_use_iterator imm_iter;
1430 gimple use_stmt;
1431 stmt_vec_info stmt_vinfo;
1433 if (!node)
1434 return;
1436 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1437 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1438 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1439 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1440 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1441 && !STMT_SLP_TYPE (stmt_vinfo)
1442 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1443 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1444 && !(gimple_code (use_stmt) == GIMPLE_PHI
1445 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1446 == vect_reduction_def))
1447 vect_mark_slp_stmts (node, hybrid, i);
1449 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1454 /* Find stmts that must be both vectorized and SLPed. */
1456 void
1457 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1459 unsigned int i;
1460 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1461 slp_instance instance;
1463 if (vect_print_dump_info (REPORT_SLP))
1464 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1466 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1467 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1471 /* Create and initialize a new bb_vec_info struct for BB, as well as
1472 stmt_vec_info structs for all the stmts in it. */
1474 static bb_vec_info
1475 new_bb_vec_info (basic_block bb)
1477 bb_vec_info res = NULL;
1478 gimple_stmt_iterator gsi;
1480 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1481 BB_VINFO_BB (res) = bb;
1483 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1485 gimple stmt = gsi_stmt (gsi);
1486 gimple_set_uid (stmt, 0);
1487 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1490 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1491 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1493 bb->aux = res;
1494 return res;
1498 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1499 stmts in the basic block. */
1501 static void
1502 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1504 basic_block bb;
1505 gimple_stmt_iterator si;
1507 if (!bb_vinfo)
1508 return;
1510 bb = BB_VINFO_BB (bb_vinfo);
1512 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1514 gimple stmt = gsi_stmt (si);
1515 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1517 if (stmt_info)
1518 /* Free stmt_vec_info. */
1519 free_stmt_vec_info (stmt);
1522 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1523 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1524 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1525 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1526 free (bb_vinfo);
1527 bb->aux = NULL;
1531 /* Analyze statements contained in SLP tree node after recursively analyzing
1532 the subtree. Return TRUE if the operations are supported. */
1534 static bool
1535 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1537 bool dummy;
1538 int i;
1539 gimple stmt;
1541 if (!node)
1542 return true;
1544 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1545 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1546 return false;
1548 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1551 gcc_assert (stmt_info);
1552 gcc_assert (PURE_SLP_STMT (stmt_info));
1554 if (!vect_analyze_stmt (stmt, &dummy, node))
1555 return false;
1558 return true;
1562 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1563 operations are supported. */
1565 static bool
1566 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1568 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1569 slp_instance instance;
1570 int i;
1572 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1574 if (!vect_slp_analyze_node_operations (bb_vinfo,
1575 SLP_INSTANCE_TREE (instance)))
1577 vect_free_slp_instance (instance);
1578 VEC_ordered_remove (slp_instance, slp_instances, i);
1580 else
1581 i++;
1584 if (!VEC_length (slp_instance, slp_instances))
1585 return false;
1587 return true;
1590 /* Check if loads and stores are mixed in the basic block (in that
1591 case if we are not sure that the accesses differ, we can't vectorize the
1592 basic block). Also return FALSE in case that there is statement marked as
1593 not vectorizable. */
1595 static bool
1596 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1598 basic_block bb = BB_VINFO_BB (bb_vinfo);
1599 gimple_stmt_iterator si;
1600 bool detected_store = false;
1601 gimple stmt;
1602 struct data_reference *dr;
1604 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1606 stmt = gsi_stmt (si);
1608 /* We can't allow not analyzed statements, since they may contain data
1609 accesses. */
1610 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1611 return false;
1613 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1614 continue;
1616 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1617 if (DR_IS_READ (dr) && detected_store)
1618 return false;
1620 if (!DR_IS_READ (dr))
1621 detected_store = true;
1624 return true;
1627 /* Check if vectorization of the basic block is profitable. */
1629 static bool
1630 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1632 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1633 slp_instance instance;
1634 int i;
1635 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1636 unsigned int stmt_cost;
1637 gimple stmt;
1638 gimple_stmt_iterator si;
1639 basic_block bb = BB_VINFO_BB (bb_vinfo);
1640 stmt_vec_info stmt_info = NULL;
1641 tree dummy_type = NULL;
1642 int dummy = 0;
1644 /* Calculate vector costs. */
1645 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1647 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1648 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1651 /* Calculate scalar cost. */
1652 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1654 stmt = gsi_stmt (si);
1655 stmt_info = vinfo_for_stmt (stmt);
1657 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1658 || !PURE_SLP_STMT (stmt_info))
1659 continue;
1661 if (STMT_VINFO_DATA_REF (stmt_info))
1663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1664 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1665 (scalar_load, dummy_type, dummy);
1666 else
1667 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1668 (scalar_store, dummy_type, dummy);
1670 else
1671 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1672 (scalar_stmt, dummy_type, dummy);
1674 scalar_cost += stmt_cost;
1677 if (vect_print_dump_info (REPORT_COST))
1679 fprintf (vect_dump, "Cost model analysis: \n");
1680 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1681 vec_inside_cost);
1682 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1683 vec_outside_cost);
1684 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1687 /* Vectorization is profitable if its cost is less than the cost of scalar
1688 version. */
1689 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1690 return false;
1692 return true;
1695 /* Check if the basic block can be vectorized. */
1697 bb_vec_info
1698 vect_slp_analyze_bb (basic_block bb)
1700 bb_vec_info bb_vinfo;
1701 VEC (ddr_p, heap) *ddrs;
1702 VEC (slp_instance, heap) *slp_instances;
1703 slp_instance instance;
1704 int i, insns = 0;
1705 gimple_stmt_iterator gsi;
1706 int min_vf = 2;
1707 int max_vf = MAX_VECTORIZATION_FACTOR;
1708 bool data_dependence_in_bb = false;
1710 current_vector_size = 0;
1712 if (vect_print_dump_info (REPORT_DETAILS))
1713 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1717 gimple stmt = gsi_stmt (gsi);
1718 if (!is_gimple_debug (stmt)
1719 && !gimple_nop_p (stmt)
1720 && gimple_code (stmt) != GIMPLE_LABEL)
1721 insns++;
1724 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1726 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1727 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1728 "block.\n");
1730 return NULL;
1733 bb_vinfo = new_bb_vec_info (bb);
1734 if (!bb_vinfo)
1735 return NULL;
1737 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1740 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1741 "block.\n");
1743 destroy_bb_vec_info (bb_vinfo);
1744 return NULL;
1747 ddrs = BB_VINFO_DDRS (bb_vinfo);
1748 if (!VEC_length (ddr_p, ddrs))
1750 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1751 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1752 "block.\n");
1754 destroy_bb_vec_info (bb_vinfo);
1755 return NULL;
1758 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1759 &data_dependence_in_bb)
1760 || min_vf > max_vf
1761 || (data_dependence_in_bb
1762 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1764 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1765 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1766 "in basic block.\n");
1768 destroy_bb_vec_info (bb_vinfo);
1769 return NULL;
1772 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1774 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1775 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1776 "block.\n");
1778 destroy_bb_vec_info (bb_vinfo);
1779 return NULL;
1782 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1784 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1785 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1786 "block.\n");
1788 destroy_bb_vec_info (bb_vinfo);
1789 return NULL;
1792 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1794 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1795 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1796 "block.\n");
1798 destroy_bb_vec_info (bb_vinfo);
1799 return NULL;
1802 /* Check the SLP opportunities in the basic block, analyze and build SLP
1803 trees. */
1804 if (!vect_analyze_slp (NULL, bb_vinfo))
1806 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1807 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1808 "in basic block.\n");
1810 destroy_bb_vec_info (bb_vinfo);
1811 return NULL;
1814 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1816 /* Mark all the statements that we want to vectorize as pure SLP and
1817 relevant. */
1818 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1820 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1821 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1824 if (!vect_slp_analyze_operations (bb_vinfo))
1826 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1827 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1829 destroy_bb_vec_info (bb_vinfo);
1830 return NULL;
1833 /* Cost model: check if the vectorization is worthwhile. */
1834 if (flag_vect_cost_model
1835 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1837 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1838 fprintf (vect_dump, "not vectorized: vectorization is not "
1839 "profitable.\n");
1841 destroy_bb_vec_info (bb_vinfo);
1842 return NULL;
1845 if (vect_print_dump_info (REPORT_DETAILS))
1846 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1848 return bb_vinfo;
1852 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1853 the number of created vector stmts depends on the unrolling factor).
1854 However, the actual number of vector stmts for every SLP node depends on
1855 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1856 should be updated. In this function we assume that the inside costs
1857 calculated in vect_model_xxx_cost are linear in ncopies. */
1859 void
1860 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1862 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1863 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1864 slp_instance instance;
1866 if (vect_print_dump_info (REPORT_SLP))
1867 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1869 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1870 /* We assume that costs are linear in ncopies. */
1871 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1872 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1876 /* For constant and loop invariant defs of SLP_NODE this function returns
1877 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1878 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1879 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1880 REDUC_INDEX is the index of the reduction operand in the statements, unless
1881 it is -1. */
1883 static void
1884 vect_get_constant_vectors (tree op, slp_tree slp_node,
1885 VEC (tree, heap) **vec_oprnds,
1886 unsigned int op_num, unsigned int number_of_vectors,
1887 int reduc_index)
1889 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1890 gimple stmt = VEC_index (gimple, stmts, 0);
1891 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1892 int nunits;
1893 tree vec_cst;
1894 tree t = NULL_TREE;
1895 int j, number_of_places_left_in_vector;
1896 tree vector_type;
1897 tree vop;
1898 int group_size = VEC_length (gimple, stmts);
1899 unsigned int vec_num, i;
1900 int number_of_copies = 1;
1901 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1902 bool constant_p, is_store;
1903 tree neutral_op = NULL;
1904 enum tree_code code = gimple_assign_rhs_code (stmt);
1906 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1908 if (reduc_index == -1)
1910 VEC_free (tree, heap, *vec_oprnds);
1911 return;
1914 op_num = reduc_index - 1;
1915 op = gimple_op (stmt, reduc_index);
1916 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1917 we need either neutral operands or the original operands. See
1918 get_initial_def_for_reduction() for details. */
1919 switch (code)
1921 case WIDEN_SUM_EXPR:
1922 case DOT_PROD_EXPR:
1923 case PLUS_EXPR:
1924 case MINUS_EXPR:
1925 case BIT_IOR_EXPR:
1926 case BIT_XOR_EXPR:
1927 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1928 neutral_op = build_real (TREE_TYPE (op), dconst0);
1929 else
1930 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1932 break;
1934 case MULT_EXPR:
1935 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1936 neutral_op = build_real (TREE_TYPE (op), dconst1);
1937 else
1938 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1940 break;
1942 case BIT_AND_EXPR:
1943 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1944 break;
1946 default:
1947 neutral_op = NULL;
1951 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1953 is_store = true;
1954 op = gimple_assign_rhs1 (stmt);
1956 else
1957 is_store = false;
1959 gcc_assert (op);
1961 if (CONSTANT_CLASS_P (op))
1962 constant_p = true;
1963 else
1964 constant_p = false;
1966 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1967 gcc_assert (vector_type);
1968 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1970 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1971 created vectors. It is greater than 1 if unrolling is performed.
1973 For example, we have two scalar operands, s1 and s2 (e.g., group of
1974 strided accesses of size two), while NUNITS is four (i.e., four scalars
1975 of this type can be packed in a vector). The output vector will contain
1976 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1977 will be 2).
1979 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1980 containing the operands.
1982 For example, NUNITS is four as before, and the group size is 8
1983 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1984 {s5, s6, s7, s8}. */
1986 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1988 number_of_places_left_in_vector = nunits;
1989 for (j = 0; j < number_of_copies; j++)
1991 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1993 if (is_store)
1994 op = gimple_assign_rhs1 (stmt);
1995 else
1996 op = gimple_op (stmt, op_num + 1);
1998 if (reduc_index != -1)
2000 struct loop *loop = (gimple_bb (stmt))->loop_father;
2001 gimple def_stmt = SSA_NAME_DEF_STMT (op);
2003 gcc_assert (loop);
2005 /* Get the def before the loop. In reduction chain we have only
2006 one initial value. */
2007 if ((j != (number_of_copies - 1)
2008 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2009 && i != 0))
2010 && neutral_op)
2011 op = neutral_op;
2012 else
2013 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2014 loop_preheader_edge (loop));
2017 /* Create 'vect_ = {op0,op1,...,opn}'. */
2018 t = tree_cons (NULL_TREE, op, t);
2020 number_of_places_left_in_vector--;
2022 if (number_of_places_left_in_vector == 0)
2024 number_of_places_left_in_vector = nunits;
2026 if (constant_p)
2027 vec_cst = build_vector (vector_type, t);
2028 else
2029 vec_cst = build_constructor_from_list (vector_type, t);
2030 VEC_quick_push (tree, voprnds,
2031 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2032 t = NULL_TREE;
2037 /* Since the vectors are created in the reverse order, we should invert
2038 them. */
2039 vec_num = VEC_length (tree, voprnds);
2040 for (j = vec_num - 1; j >= 0; j--)
2042 vop = VEC_index (tree, voprnds, j);
2043 VEC_quick_push (tree, *vec_oprnds, vop);
2046 VEC_free (tree, heap, voprnds);
2048 /* In case that VF is greater than the unrolling factor needed for the SLP
2049 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2050 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2051 to replicate the vectors. */
2052 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2054 tree neutral_vec = NULL;
2056 if (neutral_op)
2058 if (!neutral_vec)
2059 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2061 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2063 else
2065 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2066 VEC_quick_push (tree, *vec_oprnds, vop);
2072 /* Get vectorized definitions from SLP_NODE that contains corresponding
2073 vectorized def-stmts. */
2075 static void
2076 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2078 tree vec_oprnd;
2079 gimple vec_def_stmt;
2080 unsigned int i;
2082 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2084 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2086 gcc_assert (vec_def_stmt);
2087 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2088 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2093 /* Get vectorized definitions for SLP_NODE.
2094 If the scalar definitions are loop invariants or constants, collect them and
2095 call vect_get_constant_vectors() to create vector stmts.
2096 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2097 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2098 vect_get_slp_vect_defs() to retrieve them.
2099 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2100 the right node. This is used when the second operand must remain scalar. */
2102 void
2103 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2104 VEC (tree,heap) **vec_oprnds0,
2105 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2107 gimple first_stmt;
2108 enum tree_code code;
2109 int number_of_vects;
2110 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2112 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2113 /* The number of vector defs is determined by the number of vector statements
2114 in the node from which we get those statements. */
2115 if (SLP_TREE_LEFT (slp_node))
2116 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2117 else
2119 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2120 /* Number of vector stmts was calculated according to LHS in
2121 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2122 necessary. See vect_get_smallest_scalar_type () for details. */
2123 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2124 &rhs_size_unit);
2125 if (rhs_size_unit != lhs_size_unit)
2127 number_of_vects *= rhs_size_unit;
2128 number_of_vects /= lhs_size_unit;
2132 /* Allocate memory for vectorized defs. */
2133 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2135 /* SLP_NODE corresponds either to a group of stores or to a group of
2136 unary/binary operations. We don't call this function for loads.
2137 For reduction defs we call vect_get_constant_vectors(), since we are
2138 looking for initial loop invariant values. */
2139 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2140 /* The defs are already vectorized. */
2141 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2142 else
2143 /* Build vectors from scalar defs. */
2144 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2145 reduc_index);
2147 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2148 /* Since we don't call this function with loads, this is a group of
2149 stores. */
2150 return;
2152 /* For reductions, we only need initial values. */
2153 if (reduc_index != -1)
2154 return;
2156 code = gimple_assign_rhs_code (first_stmt);
2157 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2158 return;
2160 /* The number of vector defs is determined by the number of vector statements
2161 in the node from which we get those statements. */
2162 if (SLP_TREE_RIGHT (slp_node))
2163 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2164 else
2165 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2167 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2169 if (SLP_TREE_RIGHT (slp_node))
2170 /* The defs are already vectorized. */
2171 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2172 else
2173 /* Build vectors from scalar defs. */
2174 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2175 -1);
2179 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2180 building a vector of type MASK_TYPE from it) and two input vectors placed in
2181 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2182 shifting by STRIDE elements of DR_CHAIN for every copy.
2183 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2184 copies).
2185 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2186 the created stmts must be inserted. */
2188 static inline void
2189 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2190 tree mask, int first_vec_indx, int second_vec_indx,
2191 gimple_stmt_iterator *gsi, slp_tree node,
2192 tree builtin_decl, tree vectype,
2193 VEC(tree,heap) *dr_chain,
2194 int ncopies, int vect_stmts_counter)
2196 tree perm_dest;
2197 gimple perm_stmt = NULL;
2198 stmt_vec_info next_stmt_info;
2199 int i, stride;
2200 tree first_vec, second_vec, data_ref;
2202 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2204 /* Initialize the vect stmts of NODE to properly insert the generated
2205 stmts later. */
2206 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2207 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2208 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2210 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2211 for (i = 0; i < ncopies; i++)
2213 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2214 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2216 /* Generate the permute statement. */
2217 perm_stmt = gimple_build_call (builtin_decl,
2218 3, first_vec, second_vec, mask);
2219 data_ref = make_ssa_name (perm_dest, perm_stmt);
2220 gimple_call_set_lhs (perm_stmt, data_ref);
2221 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2223 /* Store the vector statement in NODE. */
2224 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2225 stride * i + vect_stmts_counter, perm_stmt);
2227 first_vec_indx += stride;
2228 second_vec_indx += stride;
2231 /* Mark the scalar stmt as vectorized. */
2232 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2233 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2237 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2238 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2239 representation. Check that the mask is valid and return FALSE if not.
2240 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2241 the next vector, i.e., the current first vector is not needed. */
2243 static bool
2244 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2245 int mask_nunits, bool only_one_vec, int index,
2246 int *mask, int *current_mask_element,
2247 bool *need_next_vector, int *number_of_mask_fixes,
2248 bool *mask_fixed, bool *needs_first_vector)
2250 int i;
2252 /* Convert to target specific representation. */
2253 *current_mask_element = first_mask_element + m;
2254 /* Adjust the value in case it's a mask for second and third vectors. */
2255 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2257 if (*current_mask_element < mask_nunits)
2258 *needs_first_vector = true;
2260 /* We have only one input vector to permute but the mask accesses values in
2261 the next vector as well. */
2262 if (only_one_vec && *current_mask_element >= mask_nunits)
2264 if (vect_print_dump_info (REPORT_DETAILS))
2266 fprintf (vect_dump, "permutation requires at least two vectors ");
2267 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2270 return false;
2273 /* The mask requires the next vector. */
2274 if (*current_mask_element >= mask_nunits * 2)
2276 if (*needs_first_vector || *mask_fixed)
2278 /* We either need the first vector too or have already moved to the
2279 next vector. In both cases, this permutation needs three
2280 vectors. */
2281 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "permutation requires at "
2284 "least three vectors ");
2285 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2288 return false;
2291 /* We move to the next vector, dropping the first one and working with
2292 the second and the third - we need to adjust the values of the mask
2293 accordingly. */
2294 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2296 for (i = 0; i < index; i++)
2297 mask[i] -= mask_nunits * *number_of_mask_fixes;
2299 (*number_of_mask_fixes)++;
2300 *mask_fixed = true;
2303 *need_next_vector = *mask_fixed;
2305 /* This was the last element of this mask. Start a new one. */
2306 if (index == mask_nunits - 1)
2308 *number_of_mask_fixes = 1;
2309 *mask_fixed = false;
2310 *needs_first_vector = false;
2313 return true;
2317 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2318 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2319 permute statements for SLP_NODE_INSTANCE. */
2320 bool
2321 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2322 gimple_stmt_iterator *gsi, int vf,
2323 slp_instance slp_node_instance, bool analyze_only)
2325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2326 tree mask_element_type = NULL_TREE, mask_type;
2327 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2328 slp_tree node;
2329 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2330 gimple next_scalar_stmt;
2331 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2332 int first_mask_element;
2333 int index, unroll_factor, *mask, current_mask_element, ncopies;
2334 bool only_one_vec = false, need_next_vector = false;
2335 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2336 int number_of_mask_fixes = 1;
2337 bool mask_fixed = false;
2338 bool needs_first_vector = false;
2340 if (!targetm.vectorize.builtin_vec_perm)
2342 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "no builtin for vect permute for ");
2345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2348 return false;
2351 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2352 &mask_element_type);
2353 if (!builtin_decl || !mask_element_type)
2355 if (vect_print_dump_info (REPORT_DETAILS))
2357 fprintf (vect_dump, "no builtin for vect permute for ");
2358 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2361 return false;
2364 mask_type = get_vectype_for_scalar_type (mask_element_type);
2365 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2366 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2367 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2368 scale = mask_nunits / nunits;
2369 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2371 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2372 unrolling factor. */
2373 orig_vec_stmts_num = group_size *
2374 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2375 if (orig_vec_stmts_num == 1)
2376 only_one_vec = true;
2378 /* Number of copies is determined by the final vectorization factor
2379 relatively to SLP_NODE_INSTANCE unrolling factor. */
2380 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2382 /* Generate permutation masks for every NODE. Number of masks for each NODE
2383 is equal to GROUP_SIZE.
2384 E.g., we have a group of three nodes with three loads from the same
2385 location in each node, and the vector size is 4. I.e., we have a
2386 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2387 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2388 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2391 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2392 scpecific type, e.g., in bytes for Altivec.
2393 The last mask is illegal since we assume two operands for permute
2394 operation, and the mask element values can't be outside that range.
2395 Hence, the last mask must be converted into {2,5,5,5}.
2396 For the first two permutations we need the first and the second input
2397 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2398 we need the second and the third vectors: {b1,c1,a2,b2} and
2399 {c2,a3,b3,c3}. */
2401 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2403 scalar_index = 0;
2404 index = 0;
2405 vect_stmts_counter = 0;
2406 vec_index = 0;
2407 first_vec_index = vec_index++;
2408 if (only_one_vec)
2409 second_vec_index = first_vec_index;
2410 else
2411 second_vec_index = vec_index++;
2413 for (j = 0; j < unroll_factor; j++)
2415 for (k = 0; k < group_size; k++)
2417 first_mask_element = (i + j * group_size) * scale;
2418 for (m = 0; m < scale; m++)
2420 if (!vect_get_mask_element (stmt, first_mask_element, m,
2421 mask_nunits, only_one_vec, index, mask,
2422 &current_mask_element, &need_next_vector,
2423 &number_of_mask_fixes, &mask_fixed,
2424 &needs_first_vector))
2425 return false;
2427 mask[index++] = current_mask_element;
2430 if (index == mask_nunits)
2432 tree mask_vec = NULL;
2434 while (--index >= 0)
2436 tree t = build_int_cst (mask_element_type, mask[index]);
2437 mask_vec = tree_cons (NULL, t, mask_vec);
2439 mask_vec = build_vector (mask_type, mask_vec);
2440 index = 0;
2442 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2443 mask_vec))
2445 if (vect_print_dump_info (REPORT_DETAILS))
2447 fprintf (vect_dump, "unsupported vect permute ");
2448 print_generic_expr (vect_dump, mask_vec, 0);
2450 free (mask);
2451 return false;
2454 if (!analyze_only)
2456 if (need_next_vector)
2458 first_vec_index = second_vec_index;
2459 second_vec_index = vec_index;
2462 next_scalar_stmt = VEC_index (gimple,
2463 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2465 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2466 mask_vec, first_vec_index, second_vec_index,
2467 gsi, node, builtin_decl, vectype, dr_chain,
2468 ncopies, vect_stmts_counter++);
2475 free (mask);
2476 return true;
2481 /* Vectorize SLP instance tree in postorder. */
2483 static bool
2484 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2485 unsigned int vectorization_factor)
2487 gimple stmt;
2488 bool strided_store, is_store;
2489 gimple_stmt_iterator si;
2490 stmt_vec_info stmt_info;
2491 unsigned int vec_stmts_size, nunits, group_size;
2492 tree vectype;
2493 int i;
2494 slp_tree loads_node;
2496 if (!node)
2497 return false;
2499 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2500 vectorization_factor);
2501 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2502 vectorization_factor);
2504 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2505 stmt_info = vinfo_for_stmt (stmt);
2507 /* VECTYPE is the type of the destination. */
2508 vectype = STMT_VINFO_VECTYPE (stmt_info);
2509 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2510 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2512 /* For each SLP instance calculate number of vector stmts to be created
2513 for the scalar stmts in each node of the SLP tree. Number of vector
2514 elements in one vector iteration is the number of scalar elements in
2515 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2516 size. */
2517 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2519 /* In case of load permutation we have to allocate vectorized statements for
2520 all the nodes that participate in that permutation. */
2521 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2523 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2525 if (!SLP_TREE_VEC_STMTS (loads_node))
2527 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2528 vec_stmts_size);
2529 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2534 if (!SLP_TREE_VEC_STMTS (node))
2536 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2537 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2540 if (vect_print_dump_info (REPORT_DETAILS))
2542 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2543 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2546 /* Loads should be inserted before the first load. */
2547 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2548 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2549 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2550 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2551 else if (is_pattern_stmt_p (stmt_info))
2552 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2553 else
2554 si = gsi_for_stmt (stmt);
2556 /* Stores should be inserted just before the last store. */
2557 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2558 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2560 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2561 si = gsi_for_stmt (last_store);
2564 /* Mark the first element of the reduction chain as reduction to properly
2565 transform the node. In the analysis phase only the last element of the
2566 chain is marked as reduction. */
2567 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2568 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2570 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2571 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2574 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2575 return is_store;
2579 /* Generate vector code for all SLP instances in the loop/basic block. */
2581 bool
2582 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2584 VEC (slp_instance, heap) *slp_instances;
2585 slp_instance instance;
2586 unsigned int i, vf;
2587 bool is_store = false;
2589 if (loop_vinfo)
2591 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2592 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2594 else
2596 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2597 vf = 1;
2600 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2602 /* Schedule the tree of INSTANCE. */
2603 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2604 instance, vf);
2605 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2606 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2607 fprintf (vect_dump, "vectorizing stmts using SLP.");
2610 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2612 slp_tree root = SLP_INSTANCE_TREE (instance);
2613 gimple store;
2614 unsigned int j;
2615 gimple_stmt_iterator gsi;
2617 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2618 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2620 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2621 break;
2623 /* Free the attached stmt_vec_info and remove the stmt. */
2624 gsi = gsi_for_stmt (store);
2625 gsi_remove (&gsi, true);
2626 free_stmt_vec_info (store);
2630 return is_store;
2634 /* Vectorize the basic block. */
2636 void
2637 vect_slp_transform_bb (basic_block bb)
2639 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2640 gimple_stmt_iterator si;
2642 gcc_assert (bb_vinfo);
2644 if (vect_print_dump_info (REPORT_DETAILS))
2645 fprintf (vect_dump, "SLPing BB\n");
2647 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2649 gimple stmt = gsi_stmt (si);
2650 stmt_vec_info stmt_info;
2652 if (vect_print_dump_info (REPORT_DETAILS))
2654 fprintf (vect_dump, "------>SLPing statement: ");
2655 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2658 stmt_info = vinfo_for_stmt (stmt);
2659 gcc_assert (stmt_info);
2661 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2662 if (STMT_SLP_TYPE (stmt_info))
2664 vect_schedule_slp (NULL, bb_vinfo);
2665 break;
2669 mark_sym_for_renaming (gimple_vop (cfun));
2670 /* The memory tags and pointers in vectorized statements need to
2671 have their SSA forms updated. FIXME, why can't this be delayed
2672 until all the loops have been transformed? */
2673 update_ssa (TODO_update_ssa);
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2678 destroy_bb_vec_info (bb_vinfo);