Fix typo in my last change
[official-gcc.git] / gcc / tree-vect-slp.c
blob4b205bf332ac1e642be923537b31f795f1723347
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
156 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
157 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
159 if (!*first_stmt_dt0)
160 *pattern0 = true;
161 else
163 if (i == 1 && !*first_stmt_dt1)
164 *pattern1 = true;
165 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
167 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "Build SLP failed: some of the stmts"
170 " are in a pattern, and others are not ");
171 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
174 return false;
178 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
179 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
181 if (*dt == vect_unknown_def_type)
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "Unsupported pattern.");
185 return false;
188 switch (gimple_code (def_stmt))
190 case GIMPLE_PHI:
191 def = gimple_phi_result (def_stmt);
192 break;
194 case GIMPLE_ASSIGN:
195 def = gimple_assign_lhs (def_stmt);
196 break;
198 default:
199 if (vect_print_dump_info (REPORT_DETAILS))
200 fprintf (vect_dump, "unsupported defining stmt: ");
201 return false;
205 if (!*first_stmt_dt0)
207 /* op0 of the first stmt of the group - store its info. */
208 *first_stmt_dt0 = dt[i];
209 if (def)
210 *first_stmt_def0_type = TREE_TYPE (def);
211 else
212 *first_stmt_const_oprnd = oprnd;
214 /* Analyze costs (for the first stmt of the group only). */
215 if (rhs_class != GIMPLE_SINGLE_RHS)
216 /* Not memory operation (we don't call this functions for loads). */
217 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
218 else
219 /* Store. */
220 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
221 dt[0], slp_node);
224 else
226 if (!*first_stmt_dt1 && i == 1)
228 /* op1 of the first stmt of the group - store its info. */
229 *first_stmt_dt1 = dt[i];
230 if (def)
231 *first_stmt_def1_type = TREE_TYPE (def);
232 else
234 /* We assume that the stmt contains only one constant
235 operand. We fail otherwise, to be on the safe side. */
236 if (*first_stmt_const_oprnd)
238 if (vect_print_dump_info (REPORT_SLP))
239 fprintf (vect_dump, "Build SLP failed: two constant "
240 "oprnds in stmt");
241 return false;
243 *first_stmt_const_oprnd = oprnd;
246 else
248 /* Not first stmt of the group, check that the def-stmt/s match
249 the def-stmt/s of the first stmt. Allow different definition
250 types for reduction chains: the first stmt must be a
251 vect_reduction_def (a phi node), and the rest
252 vect_internal_def. */
253 if ((i == 0
254 && ((*first_stmt_dt0 != dt[i]
255 && !(*first_stmt_dt0 == vect_reduction_def
256 && dt[i] == vect_internal_def))
257 || (*first_stmt_def0_type && def
258 && !types_compatible_p (*first_stmt_def0_type,
259 TREE_TYPE (def)))))
260 || (i == 1
261 && ((*first_stmt_dt1 != dt[i]
262 && !(*first_stmt_dt1 == vect_reduction_def
263 && dt[i] == vect_internal_def))
264 || (*first_stmt_def1_type && def
265 && !types_compatible_p (*first_stmt_def1_type,
266 TREE_TYPE (def)))))
267 || (!def
268 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
269 TREE_TYPE (oprnd))))
271 if (vect_print_dump_info (REPORT_SLP))
272 fprintf (vect_dump, "Build SLP failed: different types ");
274 return false;
279 /* Check the types of the definitions. */
280 switch (dt[i])
282 case vect_constant_def:
283 case vect_external_def:
284 break;
286 case vect_internal_def:
287 case vect_reduction_def:
288 if (i == 0)
289 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
290 else
291 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
292 break;
294 default:
295 /* FORNOW: Not supported. */
296 if (vect_print_dump_info (REPORT_SLP))
298 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
299 print_generic_expr (vect_dump, def, TDF_SLIM);
302 return false;
306 return true;
310 /* Recursively build an SLP tree starting from NODE.
311 Fail (and return FALSE) if def-stmts are not isomorphic, require data
312 permutation or are of unsupported types of operation. Otherwise, return
313 TRUE. */
315 static bool
316 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
317 slp_tree *node, unsigned int group_size,
318 int *inside_cost, int *outside_cost,
319 int ncopies_for_cost, unsigned int *max_nunits,
320 VEC (int, heap) **load_permutation,
321 VEC (slp_tree, heap) **loads,
322 unsigned int vectorization_factor)
324 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
325 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
326 unsigned int i;
327 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
328 gimple stmt = VEC_index (gimple, stmts, 0);
329 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
330 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
331 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
332 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
333 tree lhs;
334 bool stop_recursion = false, need_same_oprnds = false;
335 tree vectype, scalar_type, first_op1 = NULL_TREE;
336 unsigned int ncopies;
337 optab optab;
338 int icode;
339 enum machine_mode optab_op2_mode;
340 enum machine_mode vec_mode;
341 tree first_stmt_const_oprnd = NULL_TREE;
342 struct data_reference *first_dr;
343 bool pattern0 = false, pattern1 = false;
344 HOST_WIDE_INT dummy;
345 bool permutation = false;
346 unsigned int load_place;
347 gimple first_load, prev_first_load = NULL;
349 /* For every stmt in NODE find its def stmt/s. */
350 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
352 if (vect_print_dump_info (REPORT_SLP))
354 fprintf (vect_dump, "Build SLP for ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 /* Fail to vectorize statements marked as unvectorizable. */
359 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
361 if (vect_print_dump_info (REPORT_SLP))
363 fprintf (vect_dump,
364 "Build SLP failed: unvectorizable statement ");
365 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
368 return false;
371 lhs = gimple_get_lhs (stmt);
372 if (lhs == NULL_TREE)
374 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump,
377 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
378 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
381 return false;
384 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
385 vectype = get_vectype_for_scalar_type (scalar_type);
386 if (!vectype)
388 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
391 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
393 return false;
396 /* In case of multiple types we need to detect the smallest type. */
397 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
400 if (bb_vinfo)
401 vectorization_factor = *max_nunits;
404 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
406 if (is_gimple_call (stmt))
407 rhs_code = CALL_EXPR;
408 else
409 rhs_code = gimple_assign_rhs_code (stmt);
411 /* Check the operation. */
412 if (i == 0)
414 first_stmt_code = rhs_code;
416 /* Shift arguments should be equal in all the packed stmts for a
417 vector shift with scalar shift operand. */
418 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
419 || rhs_code == LROTATE_EXPR
420 || rhs_code == RROTATE_EXPR)
422 vec_mode = TYPE_MODE (vectype);
424 /* First see if we have a vector/vector shift. */
425 optab = optab_for_tree_code (rhs_code, vectype,
426 optab_vector);
428 if (!optab
429 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
431 /* No vector/vector shift, try for a vector/scalar shift. */
432 optab = optab_for_tree_code (rhs_code, vectype,
433 optab_scalar);
435 if (!optab)
437 if (vect_print_dump_info (REPORT_SLP))
438 fprintf (vect_dump, "Build SLP failed: no optab.");
439 return false;
441 icode = (int) optab_handler (optab, vec_mode);
442 if (icode == CODE_FOR_nothing)
444 if (vect_print_dump_info (REPORT_SLP))
445 fprintf (vect_dump, "Build SLP failed: "
446 "op not supported by target.");
447 return false;
449 optab_op2_mode = insn_data[icode].operand[2].mode;
450 if (!VECTOR_MODE_P (optab_op2_mode))
452 need_same_oprnds = true;
453 first_op1 = gimple_assign_rhs2 (stmt);
458 else
460 if (first_stmt_code != rhs_code
461 && (first_stmt_code != IMAGPART_EXPR
462 || rhs_code != REALPART_EXPR)
463 && (first_stmt_code != REALPART_EXPR
464 || rhs_code != IMAGPART_EXPR)
465 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
466 && (first_stmt_code == ARRAY_REF
467 || first_stmt_code == INDIRECT_REF
468 || first_stmt_code == COMPONENT_REF
469 || first_stmt_code == MEM_REF)))
471 if (vect_print_dump_info (REPORT_SLP))
473 fprintf (vect_dump,
474 "Build SLP failed: different operation in stmt ");
475 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
478 return false;
481 if (need_same_oprnds
482 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
484 if (vect_print_dump_info (REPORT_SLP))
486 fprintf (vect_dump,
487 "Build SLP failed: different shift arguments in ");
488 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
491 return false;
495 /* Strided store or load. */
496 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
498 if (REFERENCE_CLASS_P (lhs))
500 /* Store. */
501 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
502 stmt, &def_stmts0, &def_stmts1,
503 &first_stmt_dt0,
504 &first_stmt_dt1,
505 &first_stmt_def0_type,
506 &first_stmt_def1_type,
507 &first_stmt_const_oprnd,
508 ncopies_for_cost,
509 &pattern0, &pattern1))
510 return false;
512 else
514 /* Load. */
515 /* FORNOW: Check that there is no gap between the loads. */
516 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
517 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
518 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
519 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
521 if (vect_print_dump_info (REPORT_SLP))
523 fprintf (vect_dump, "Build SLP failed: strided "
524 "loads have gaps ");
525 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
528 return false;
531 /* Check that the size of interleaved loads group is not
532 greater than the SLP group size. */
533 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
535 if (vect_print_dump_info (REPORT_SLP))
537 fprintf (vect_dump, "Build SLP failed: the number of "
538 "interleaved loads is greater than"
539 " the SLP group size ");
540 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
543 return false;
546 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
547 if (prev_first_load)
549 /* Check that there are no loads from different interleaving
550 chains in the same node. The only exception is complex
551 numbers. */
552 if (prev_first_load != first_load
553 && rhs_code != REALPART_EXPR
554 && rhs_code != IMAGPART_EXPR)
556 if (vect_print_dump_info (REPORT_SLP))
558 fprintf (vect_dump, "Build SLP failed: different "
559 "interleaving chains in one node ");
560 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
563 return false;
566 else
567 prev_first_load = first_load;
569 if (first_load == stmt)
571 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
572 if (vect_supportable_dr_alignment (first_dr, false)
573 == dr_unaligned_unsupported)
575 if (vect_print_dump_info (REPORT_SLP))
577 fprintf (vect_dump, "Build SLP failed: unsupported "
578 "unaligned load ");
579 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
582 return false;
585 /* Analyze costs (for the first stmt in the group). */
586 vect_model_load_cost (vinfo_for_stmt (stmt),
587 ncopies_for_cost, false, *node);
590 /* Store the place of this load in the interleaving chain. In
591 case that permutation is needed we later decide if a specific
592 permutation is supported. */
593 load_place = vect_get_place_in_interleaving_chain (stmt,
594 first_load);
595 if (load_place != i)
596 permutation = true;
598 VEC_safe_push (int, heap, *load_permutation, load_place);
600 /* We stop the tree when we reach a group of loads. */
601 stop_recursion = true;
602 continue;
604 } /* Strided access. */
605 else
607 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
609 /* Not strided load. */
610 if (vect_print_dump_info (REPORT_SLP))
612 fprintf (vect_dump, "Build SLP failed: not strided load ");
613 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
616 /* FORNOW: Not strided loads are not supported. */
617 return false;
620 /* Not memory operation. */
621 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
622 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
624 if (vect_print_dump_info (REPORT_SLP))
626 fprintf (vect_dump, "Build SLP failed: operation");
627 fprintf (vect_dump, " unsupported ");
628 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
631 return false;
634 /* Find the def-stmts. */
635 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
636 &def_stmts0, &def_stmts1,
637 &first_stmt_dt0, &first_stmt_dt1,
638 &first_stmt_def0_type,
639 &first_stmt_def1_type,
640 &first_stmt_const_oprnd,
641 ncopies_for_cost,
642 &pattern0, &pattern1))
643 return false;
647 /* Add the costs of the node to the overall instance costs. */
648 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
649 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
651 /* Strided loads were reached - stop the recursion. */
652 if (stop_recursion)
654 if (permutation)
656 VEC_safe_push (slp_tree, heap, *loads, *node);
657 *inside_cost
658 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
659 * group_size;
661 else
663 /* We don't check here complex numbers chains, so we keep them in
664 LOADS for further check in vect_supported_load_permutation_p. */
665 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
666 VEC_safe_push (slp_tree, heap, *loads, *node);
669 return true;
672 /* Create SLP_TREE nodes for the definition node/s. */
673 if (first_stmt_dt0 == vect_internal_def)
675 slp_tree left_node = XNEW (struct _slp_tree);
676 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
677 SLP_TREE_VEC_STMTS (left_node) = NULL;
678 SLP_TREE_LEFT (left_node) = NULL;
679 SLP_TREE_RIGHT (left_node) = NULL;
680 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
681 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
682 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
683 inside_cost, outside_cost, ncopies_for_cost,
684 max_nunits, load_permutation, loads,
685 vectorization_factor))
686 return false;
688 SLP_TREE_LEFT (*node) = left_node;
691 if (first_stmt_dt1 == vect_internal_def)
693 slp_tree right_node = XNEW (struct _slp_tree);
694 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
695 SLP_TREE_VEC_STMTS (right_node) = NULL;
696 SLP_TREE_LEFT (right_node) = NULL;
697 SLP_TREE_RIGHT (right_node) = NULL;
698 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
699 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
700 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
701 inside_cost, outside_cost, ncopies_for_cost,
702 max_nunits, load_permutation, loads,
703 vectorization_factor))
704 return false;
706 SLP_TREE_RIGHT (*node) = right_node;
709 return true;
713 static void
714 vect_print_slp_tree (slp_tree node)
716 int i;
717 gimple stmt;
719 if (!node)
720 return;
722 fprintf (vect_dump, "node ");
723 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
725 fprintf (vect_dump, "\n\tstmt %d ", i);
726 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
728 fprintf (vect_dump, "\n");
730 vect_print_slp_tree (SLP_TREE_LEFT (node));
731 vect_print_slp_tree (SLP_TREE_RIGHT (node));
735 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
736 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
737 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
738 stmts in NODE are to be marked. */
740 static void
741 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
743 int i;
744 gimple stmt;
746 if (!node)
747 return;
749 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
750 if (j < 0 || i == j)
751 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
753 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
754 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
758 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
760 static void
761 vect_mark_slp_stmts_relevant (slp_tree node)
763 int i;
764 gimple stmt;
765 stmt_vec_info stmt_info;
767 if (!node)
768 return;
770 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
772 stmt_info = vinfo_for_stmt (stmt);
773 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
774 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
775 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
778 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
779 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
783 /* Check if the permutation required by the SLP INSTANCE is supported.
784 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
786 static bool
787 vect_supported_slp_permutation_p (slp_instance instance)
789 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
790 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
791 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
792 VEC (slp_tree, heap) *sorted_loads = NULL;
793 int index;
794 slp_tree *tmp_loads = NULL;
795 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
796 slp_tree load;
798 /* FORNOW: The only supported loads permutation is loads from the same
799 location in all the loads in the node, when the data-refs in
800 nodes of LOADS constitute an interleaving chain.
801 Sort the nodes according to the order of accesses in the chain. */
802 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
803 for (i = 0, j = 0;
804 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
805 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
806 i += group_size, j++)
808 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
809 /* Check that the loads are all in the same interleaving chain. */
810 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
812 if (vect_print_dump_info (REPORT_DETAILS))
814 fprintf (vect_dump, "Build SLP failed: unsupported data "
815 "permutation ");
816 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
819 free (tmp_loads);
820 return false;
823 tmp_loads[index] = load;
826 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
827 for (i = 0; i < group_size; i++)
828 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
830 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
831 SLP_INSTANCE_LOADS (instance) = sorted_loads;
832 free (tmp_loads);
834 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
835 SLP_INSTANCE_UNROLLING_FACTOR (instance),
836 instance, true))
837 return false;
839 return true;
843 /* Rearrange the statements of NODE according to PERMUTATION. */
845 static void
846 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
847 VEC (int, heap) *permutation)
849 gimple stmt;
850 VEC (gimple, heap) *tmp_stmts;
851 unsigned int index, i;
853 if (!node)
854 return;
856 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
857 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
859 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
860 tmp_stmts = VEC_alloc (gimple, heap, group_size);
862 for (i = 0; i < group_size; i++)
863 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
865 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
867 index = VEC_index (int, permutation, i);
868 VEC_replace (gimple, tmp_stmts, index, stmt);
871 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
872 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
876 /* Check if the required load permutation is supported.
877 LOAD_PERMUTATION contains a list of indices of the loads.
878 In SLP this permutation is relative to the order of strided stores that are
879 the base of the SLP instance. */
881 static bool
882 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
883 VEC (int, heap) *load_permutation)
885 int i = 0, j, prev = -1, next, k, number_of_groups;
886 bool supported, bad_permutation = false;
887 sbitmap load_index;
888 slp_tree node, other_complex_node;
889 gimple stmt, first = NULL, other_node_first;
890 unsigned complex_numbers = 0;
892 /* FORNOW: permutations are only supported in SLP. */
893 if (!slp_instn)
894 return false;
896 if (vect_print_dump_info (REPORT_SLP))
898 fprintf (vect_dump, "Load permutation ");
899 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
900 fprintf (vect_dump, "%d ", next);
903 /* In case of reduction every load permutation is allowed, since the order
904 of the reduction statements is not important (as opposed to the case of
905 strided stores). The only condition we need to check is that all the
906 load nodes are of the same size and have the same permutation (and then
907 rearrange all the nodes of the SLP instance according to this
908 permutation). */
910 /* Check that all the load nodes are of the same size. */
911 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
913 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
914 != (unsigned) group_size)
915 return false;
917 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
918 if (is_gimple_assign (stmt)
919 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
920 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
921 complex_numbers++;
924 /* Complex operands can be swapped as following:
925 real_c = real_b + real_a;
926 imag_c = imag_a + imag_b;
927 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
928 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
929 chains are mixed, they match the above pattern. */
930 if (complex_numbers)
932 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
934 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
936 if (j == 0)
937 first = stmt;
938 else
940 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
942 if (complex_numbers != 2)
943 return false;
945 if (i == 0)
946 k = 1;
947 else
948 k = 0;
950 other_complex_node = VEC_index (slp_tree,
951 SLP_INSTANCE_LOADS (slp_instn), k);
952 other_node_first = VEC_index (gimple,
953 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
955 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
956 != other_node_first)
957 return false;
964 /* We checked that this case ok, so there is no need to proceed with
965 permutation tests. */
966 if (complex_numbers == 2)
968 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
969 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
970 return true;
973 node = SLP_INSTANCE_TREE (slp_instn);
974 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
975 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
976 instance, not all the loads belong to the same node or interleaving
977 group. Hence, we need to divide them into groups according to
978 GROUP_SIZE. */
979 number_of_groups = VEC_length (int, load_permutation) / group_size;
981 /* Reduction (there are no data-refs in the root).
982 In reduction chain the order of the loads is important. */
983 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
984 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
986 int first_group_load_index;
988 /* Compare all the permutation sequences to the first one. */
989 for (i = 1; i < number_of_groups; i++)
991 k = 0;
992 for (j = i * group_size; j < i * group_size + group_size; j++)
994 next = VEC_index (int, load_permutation, j);
995 first_group_load_index = VEC_index (int, load_permutation, k);
997 if (next != first_group_load_index)
999 bad_permutation = true;
1000 break;
1003 k++;
1006 if (bad_permutation)
1007 break;
1010 if (!bad_permutation)
1012 /* Check that the loads in the first sequence are different and there
1013 are no gaps between them. */
1014 load_index = sbitmap_alloc (group_size);
1015 sbitmap_zero (load_index);
1016 for (k = 0; k < group_size; k++)
1018 first_group_load_index = VEC_index (int, load_permutation, k);
1019 if (TEST_BIT (load_index, first_group_load_index))
1021 bad_permutation = true;
1022 break;
1025 SET_BIT (load_index, first_group_load_index);
1028 if (!bad_permutation)
1029 for (k = 0; k < group_size; k++)
1030 if (!TEST_BIT (load_index, k))
1032 bad_permutation = true;
1033 break;
1036 sbitmap_free (load_index);
1039 if (!bad_permutation)
1041 /* This permutation is valid for reduction. Since the order of the
1042 statements in the nodes is not important unless they are memory
1043 accesses, we can rearrange the statements in all the nodes
1044 according to the order of the loads. */
1045 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1046 load_permutation);
1047 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1048 return true;
1052 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1053 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1054 well (unless it's reduction). */
1055 if (VEC_length (int, load_permutation)
1056 != (unsigned int) (group_size * group_size))
1057 return false;
1059 supported = true;
1060 load_index = sbitmap_alloc (group_size);
1061 sbitmap_zero (load_index);
1062 for (j = 0; j < group_size; j++)
1064 for (i = j * group_size, k = 0;
1065 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1066 i++, k++)
1068 if (i != j * group_size && next != prev)
1070 supported = false;
1071 break;
1074 prev = next;
1077 if (TEST_BIT (load_index, prev))
1079 supported = false;
1080 break;
1083 SET_BIT (load_index, prev);
1086 for (j = 0; j < group_size; j++)
1087 if (!TEST_BIT (load_index, j))
1088 return false;
1090 sbitmap_free (load_index);
1092 if (supported && i == group_size * group_size
1093 && vect_supported_slp_permutation_p (slp_instn))
1094 return true;
1096 return false;
1100 /* Find the first load in the loop that belongs to INSTANCE.
1101 When loads are in several SLP nodes, there can be a case in which the first
1102 load does not appear in the first SLP node to be transformed, causing
1103 incorrect order of statements. Since we generate all the loads together,
1104 they must be inserted before the first load of the SLP instance and not
1105 before the first load of the first node of the instance. */
1107 static gimple
1108 vect_find_first_load_in_slp_instance (slp_instance instance)
1110 int i, j;
1111 slp_tree load_node;
1112 gimple first_load = NULL, load;
1114 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1115 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1116 first_load = get_earlier_stmt (load, first_load);
1118 return first_load;
1122 /* Find the last store in SLP INSTANCE. */
1124 static gimple
1125 vect_find_last_store_in_slp_instance (slp_instance instance)
1127 int i;
1128 slp_tree node;
1129 gimple last_store = NULL, store;
1131 node = SLP_INSTANCE_TREE (instance);
1132 for (i = 0;
1133 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1134 i++)
1135 last_store = get_later_stmt (store, last_store);
1137 return last_store;
1141 /* Analyze an SLP instance starting from a group of strided stores. Call
1142 vect_build_slp_tree to build a tree of packed stmts if possible.
1143 Return FALSE if it's impossible to SLP any stmt in the loop. */
1145 static bool
1146 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1147 gimple stmt)
1149 slp_instance new_instance;
1150 slp_tree node = XNEW (struct _slp_tree);
1151 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1152 unsigned int unrolling_factor = 1, nunits;
1153 tree vectype, scalar_type = NULL_TREE;
1154 gimple next;
1155 unsigned int vectorization_factor = 0;
1156 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1157 unsigned int max_nunits = 0;
1158 VEC (int, heap) *load_permutation;
1159 VEC (slp_tree, heap) *loads;
1160 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1162 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1164 if (dr)
1166 scalar_type = TREE_TYPE (DR_REF (dr));
1167 vectype = get_vectype_for_scalar_type (scalar_type);
1169 else
1171 gcc_assert (loop_vinfo);
1172 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1175 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1177 else
1179 gcc_assert (loop_vinfo);
1180 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1181 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1184 if (!vectype)
1186 if (vect_print_dump_info (REPORT_SLP))
1188 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1189 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1192 return false;
1195 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1196 if (loop_vinfo)
1197 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1198 else
1199 vectorization_factor = nunits;
1201 /* Calculate the unrolling factor. */
1202 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1203 if (unrolling_factor != 1 && !loop_vinfo)
1205 if (vect_print_dump_info (REPORT_SLP))
1206 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1207 " block SLP");
1209 return false;
1212 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1213 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1214 next = stmt;
1215 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1217 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1218 while (next)
1220 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1221 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1224 else
1226 /* Collect reduction statements. */
1227 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1228 next);
1229 i++)
1230 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1233 SLP_TREE_VEC_STMTS (node) = NULL;
1234 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1235 SLP_TREE_LEFT (node) = NULL;
1236 SLP_TREE_RIGHT (node) = NULL;
1237 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1238 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1240 /* Calculate the number of vector stmts to create based on the unrolling
1241 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1242 GROUP_SIZE / NUNITS otherwise. */
1243 ncopies_for_cost = unrolling_factor * group_size / nunits;
1245 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1246 loads = VEC_alloc (slp_tree, heap, group_size);
1248 /* Build the tree for the SLP instance. */
1249 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1250 &inside_cost, &outside_cost, ncopies_for_cost,
1251 &max_nunits, &load_permutation, &loads,
1252 vectorization_factor))
1254 /* Calculate the unrolling factor based on the smallest type. */
1255 if (max_nunits > nunits)
1256 unrolling_factor = least_common_multiple (max_nunits, group_size)
1257 / group_size;
1259 if (unrolling_factor != 1 && !loop_vinfo)
1261 if (vect_print_dump_info (REPORT_SLP))
1262 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1263 " block SLP");
1264 return false;
1267 /* Create a new SLP instance. */
1268 new_instance = XNEW (struct _slp_instance);
1269 SLP_INSTANCE_TREE (new_instance) = node;
1270 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1271 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1272 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1273 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1274 SLP_INSTANCE_LOADS (new_instance) = loads;
1275 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1276 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1277 if (VEC_length (slp_tree, loads))
1279 if (!vect_supported_load_permutation_p (new_instance, group_size,
1280 load_permutation))
1282 if (vect_print_dump_info (REPORT_SLP))
1284 fprintf (vect_dump, "Build SLP failed: unsupported load "
1285 "permutation ");
1286 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1289 vect_free_slp_instance (new_instance);
1290 return false;
1293 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1294 = vect_find_first_load_in_slp_instance (new_instance);
1296 else
1297 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1299 if (loop_vinfo)
1300 VEC_safe_push (slp_instance, heap,
1301 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1302 new_instance);
1303 else
1304 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1305 new_instance);
1307 if (vect_print_dump_info (REPORT_SLP))
1308 vect_print_slp_tree (node);
1310 return true;
1313 /* Failed to SLP. */
1314 /* Free the allocated memory. */
1315 vect_free_slp_tree (node);
1316 VEC_free (int, heap, load_permutation);
1317 VEC_free (slp_tree, heap, loads);
1319 return false;
1323 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1324 trees of packed scalar stmts if SLP is possible. */
1326 bool
1327 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1329 unsigned int i;
1330 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1331 gimple first_element;
1332 bool ok = false;
1334 if (vect_print_dump_info (REPORT_SLP))
1335 fprintf (vect_dump, "=== vect_analyze_slp ===");
1337 if (loop_vinfo)
1339 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1340 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1341 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1343 else
1344 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1346 /* Find SLP sequences starting from groups of strided stores. */
1347 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1348 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1349 ok = true;
1351 if (bb_vinfo && !ok)
1353 if (vect_print_dump_info (REPORT_SLP))
1354 fprintf (vect_dump, "Failed to SLP the basic block.");
1356 return false;
1359 if (loop_vinfo
1360 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1362 /* Find SLP sequences starting from reduction chains. */
1363 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1364 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1365 ok = true;
1366 else
1367 return false;
1369 /* Don't try to vectorize SLP reductions if reduction chain was
1370 detected. */
1371 return ok;
1374 /* Find SLP sequences starting from groups of reductions. */
1375 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1376 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1377 VEC_index (gimple, reductions, 0)))
1378 ok = true;
1380 return true;
1384 /* For each possible SLP instance decide whether to SLP it and calculate overall
1385 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1386 least one instance. */
1388 bool
1389 vect_make_slp_decision (loop_vec_info loop_vinfo)
1391 unsigned int i, unrolling_factor = 1;
1392 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1393 slp_instance instance;
1394 int decided_to_slp = 0;
1396 if (vect_print_dump_info (REPORT_SLP))
1397 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1399 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1401 /* FORNOW: SLP if you can. */
1402 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1403 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1405 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1406 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1407 loop-based vectorization. Such stmts will be marked as HYBRID. */
1408 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1409 decided_to_slp++;
1412 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1414 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1415 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1416 decided_to_slp, unrolling_factor);
1418 return (decided_to_slp > 0);
1422 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1423 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1425 static void
1426 vect_detect_hybrid_slp_stmts (slp_tree node)
1428 int i;
1429 gimple stmt;
1430 imm_use_iterator imm_iter;
1431 gimple use_stmt;
1432 stmt_vec_info stmt_vinfo;
1434 if (!node)
1435 return;
1437 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1438 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1439 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1440 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1441 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1442 && !STMT_SLP_TYPE (stmt_vinfo)
1443 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1444 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1445 && !(gimple_code (use_stmt) == GIMPLE_PHI
1446 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1447 == vect_reduction_def))
1448 vect_mark_slp_stmts (node, hybrid, i);
1450 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1451 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1455 /* Find stmts that must be both vectorized and SLPed. */
1457 void
1458 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1460 unsigned int i;
1461 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1462 slp_instance instance;
1464 if (vect_print_dump_info (REPORT_SLP))
1465 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1467 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1468 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1472 /* Create and initialize a new bb_vec_info struct for BB, as well as
1473 stmt_vec_info structs for all the stmts in it. */
1475 static bb_vec_info
1476 new_bb_vec_info (basic_block bb)
1478 bb_vec_info res = NULL;
1479 gimple_stmt_iterator gsi;
1481 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1482 BB_VINFO_BB (res) = bb;
1484 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1486 gimple stmt = gsi_stmt (gsi);
1487 gimple_set_uid (stmt, 0);
1488 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1491 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1492 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1494 bb->aux = res;
1495 return res;
1499 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1500 stmts in the basic block. */
1502 static void
1503 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1505 basic_block bb;
1506 gimple_stmt_iterator si;
1508 if (!bb_vinfo)
1509 return;
1511 bb = BB_VINFO_BB (bb_vinfo);
1513 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1515 gimple stmt = gsi_stmt (si);
1516 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1518 if (stmt_info)
1519 /* Free stmt_vec_info. */
1520 free_stmt_vec_info (stmt);
1523 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1524 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1525 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1526 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1527 free (bb_vinfo);
1528 bb->aux = NULL;
1532 /* Analyze statements contained in SLP tree node after recursively analyzing
1533 the subtree. Return TRUE if the operations are supported. */
1535 static bool
1536 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1538 bool dummy;
1539 int i;
1540 gimple stmt;
1542 if (!node)
1543 return true;
1545 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1546 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1547 return false;
1549 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1551 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1552 gcc_assert (stmt_info);
1553 gcc_assert (PURE_SLP_STMT (stmt_info));
1555 if (!vect_analyze_stmt (stmt, &dummy, node))
1556 return false;
1559 return true;
1563 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1564 operations are supported. */
1566 static bool
1567 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1569 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1570 slp_instance instance;
1571 int i;
1573 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1575 if (!vect_slp_analyze_node_operations (bb_vinfo,
1576 SLP_INSTANCE_TREE (instance)))
1578 vect_free_slp_instance (instance);
1579 VEC_ordered_remove (slp_instance, slp_instances, i);
1581 else
1582 i++;
1585 if (!VEC_length (slp_instance, slp_instances))
1586 return false;
1588 return true;
1591 /* Check if loads and stores are mixed in the basic block (in that
1592 case if we are not sure that the accesses differ, we can't vectorize the
1593 basic block). Also return FALSE in case that there is statement marked as
1594 not vectorizable. */
1596 static bool
1597 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1599 basic_block bb = BB_VINFO_BB (bb_vinfo);
1600 gimple_stmt_iterator si;
1601 bool detected_store = false;
1602 gimple stmt;
1603 struct data_reference *dr;
1605 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1607 stmt = gsi_stmt (si);
1609 /* We can't allow not analyzed statements, since they may contain data
1610 accesses. */
1611 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1612 return false;
1614 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1615 continue;
1617 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1618 if (DR_IS_READ (dr) && detected_store)
1619 return false;
1621 if (!DR_IS_READ (dr))
1622 detected_store = true;
1625 return true;
1628 /* Check if vectorization of the basic block is profitable. */
1630 static bool
1631 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1633 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1634 slp_instance instance;
1635 int i;
1636 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1637 unsigned int stmt_cost;
1638 gimple stmt;
1639 gimple_stmt_iterator si;
1640 basic_block bb = BB_VINFO_BB (bb_vinfo);
1641 stmt_vec_info stmt_info = NULL;
1642 tree dummy_type = NULL;
1643 int dummy = 0;
1645 /* Calculate vector costs. */
1646 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1648 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1649 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1652 /* Calculate scalar cost. */
1653 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1655 stmt = gsi_stmt (si);
1656 stmt_info = vinfo_for_stmt (stmt);
1658 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1659 || !PURE_SLP_STMT (stmt_info))
1660 continue;
1662 if (STMT_VINFO_DATA_REF (stmt_info))
1664 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1665 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1666 (scalar_load, dummy_type, dummy);
1667 else
1668 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1669 (scalar_store, dummy_type, dummy);
1671 else
1672 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1673 (scalar_stmt, dummy_type, dummy);
1675 scalar_cost += stmt_cost;
1678 if (vect_print_dump_info (REPORT_COST))
1680 fprintf (vect_dump, "Cost model analysis: \n");
1681 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1682 vec_inside_cost);
1683 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1684 vec_outside_cost);
1685 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1688 /* Vectorization is profitable if its cost is less than the cost of scalar
1689 version. */
1690 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1691 return false;
1693 return true;
1696 /* Check if the basic block can be vectorized. */
1698 static bb_vec_info
1699 vect_slp_analyze_bb_1 (basic_block bb)
1701 bb_vec_info bb_vinfo;
1702 VEC (ddr_p, heap) *ddrs;
1703 VEC (slp_instance, heap) *slp_instances;
1704 slp_instance instance;
1705 int i;
1706 int min_vf = 2;
1707 int max_vf = MAX_VECTORIZATION_FACTOR;
1708 bool data_dependence_in_bb = false;
1710 bb_vinfo = new_bb_vec_info (bb);
1711 if (!bb_vinfo)
1712 return NULL;
1714 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1716 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1717 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1718 "block.\n");
1720 destroy_bb_vec_info (bb_vinfo);
1721 return NULL;
1724 ddrs = BB_VINFO_DDRS (bb_vinfo);
1725 if (!VEC_length (ddr_p, ddrs))
1727 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1728 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1729 "block.\n");
1731 destroy_bb_vec_info (bb_vinfo);
1732 return NULL;
1735 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1736 &data_dependence_in_bb)
1737 || min_vf > max_vf
1738 || (data_dependence_in_bb
1739 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1741 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1742 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1743 "in basic block.\n");
1745 destroy_bb_vec_info (bb_vinfo);
1746 return NULL;
1749 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1751 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1752 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1753 "block.\n");
1755 destroy_bb_vec_info (bb_vinfo);
1756 return NULL;
1759 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1761 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1762 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1763 "block.\n");
1765 destroy_bb_vec_info (bb_vinfo);
1766 return NULL;
1769 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1771 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1772 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1773 "block.\n");
1775 destroy_bb_vec_info (bb_vinfo);
1776 return NULL;
1779 /* Check the SLP opportunities in the basic block, analyze and build SLP
1780 trees. */
1781 if (!vect_analyze_slp (NULL, bb_vinfo))
1783 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1784 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1785 "in basic block.\n");
1787 destroy_bb_vec_info (bb_vinfo);
1788 return NULL;
1791 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1793 /* Mark all the statements that we want to vectorize as pure SLP and
1794 relevant. */
1795 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1797 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1798 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1801 if (!vect_slp_analyze_operations (bb_vinfo))
1803 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1804 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1806 destroy_bb_vec_info (bb_vinfo);
1807 return NULL;
1810 /* Cost model: check if the vectorization is worthwhile. */
1811 if (flag_vect_cost_model
1812 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1814 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1815 fprintf (vect_dump, "not vectorized: vectorization is not "
1816 "profitable.\n");
1818 destroy_bb_vec_info (bb_vinfo);
1819 return NULL;
1822 if (vect_print_dump_info (REPORT_DETAILS))
1823 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1825 return bb_vinfo;
1829 bb_vec_info
1830 vect_slp_analyze_bb (basic_block bb)
1832 bb_vec_info bb_vinfo;
1833 int insns = 0;
1834 gimple_stmt_iterator gsi;
1835 unsigned int vector_sizes;
1837 if (vect_print_dump_info (REPORT_DETAILS))
1838 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1840 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1842 gimple stmt = gsi_stmt (gsi);
1843 if (!is_gimple_debug (stmt)
1844 && !gimple_nop_p (stmt)
1845 && gimple_code (stmt) != GIMPLE_LABEL)
1846 insns++;
1849 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1851 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1852 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1853 "block.\n");
1855 return NULL;
1858 /* Autodetect first vector size we try. */
1859 current_vector_size = 0;
1860 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1862 while (1)
1864 bb_vinfo = vect_slp_analyze_bb_1 (bb);
1865 if (bb_vinfo)
1866 return bb_vinfo;
1868 destroy_bb_vec_info (bb_vinfo);
1870 vector_sizes &= ~current_vector_size;
1871 if (vector_sizes == 0
1872 || current_vector_size == 0)
1873 return NULL;
1875 /* Try the next biggest vector size. */
1876 current_vector_size = 1 << floor_log2 (vector_sizes);
1877 if (vect_print_dump_info (REPORT_DETAILS))
1878 fprintf (vect_dump, "***** Re-trying analysis with "
1879 "vector size %d\n", current_vector_size);
1884 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1885 the number of created vector stmts depends on the unrolling factor).
1886 However, the actual number of vector stmts for every SLP node depends on
1887 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1888 should be updated. In this function we assume that the inside costs
1889 calculated in vect_model_xxx_cost are linear in ncopies. */
1891 void
1892 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1894 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1895 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1896 slp_instance instance;
1898 if (vect_print_dump_info (REPORT_SLP))
1899 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1901 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1902 /* We assume that costs are linear in ncopies. */
1903 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1904 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1908 /* For constant and loop invariant defs of SLP_NODE this function returns
1909 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1910 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1911 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1912 REDUC_INDEX is the index of the reduction operand in the statements, unless
1913 it is -1. */
1915 static void
1916 vect_get_constant_vectors (tree op, slp_tree slp_node,
1917 VEC (tree, heap) **vec_oprnds,
1918 unsigned int op_num, unsigned int number_of_vectors,
1919 int reduc_index)
1921 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1922 gimple stmt = VEC_index (gimple, stmts, 0);
1923 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1924 int nunits;
1925 tree vec_cst;
1926 tree t = NULL_TREE;
1927 int j, number_of_places_left_in_vector;
1928 tree vector_type;
1929 tree vop;
1930 int group_size = VEC_length (gimple, stmts);
1931 unsigned int vec_num, i;
1932 int number_of_copies = 1;
1933 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1934 bool constant_p, is_store;
1935 tree neutral_op = NULL;
1936 enum tree_code code = gimple_assign_rhs_code (stmt);
1937 gimple def_stmt;
1938 struct loop *loop;
1940 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1941 && reduc_index != -1)
1943 op_num = reduc_index - 1;
1944 op = gimple_op (stmt, reduc_index);
1945 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1946 we need either neutral operands or the original operands. See
1947 get_initial_def_for_reduction() for details. */
1948 switch (code)
1950 case WIDEN_SUM_EXPR:
1951 case DOT_PROD_EXPR:
1952 case PLUS_EXPR:
1953 case MINUS_EXPR:
1954 case BIT_IOR_EXPR:
1955 case BIT_XOR_EXPR:
1956 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1957 neutral_op = build_real (TREE_TYPE (op), dconst0);
1958 else
1959 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1961 break;
1963 case MULT_EXPR:
1964 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1965 neutral_op = build_real (TREE_TYPE (op), dconst1);
1966 else
1967 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1969 break;
1971 case BIT_AND_EXPR:
1972 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1973 break;
1975 case MAX_EXPR:
1976 case MIN_EXPR:
1977 def_stmt = SSA_NAME_DEF_STMT (op);
1978 loop = (gimple_bb (stmt))->loop_father;
1979 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1980 loop_preheader_edge (loop));
1981 break;
1983 default:
1984 neutral_op = NULL;
1988 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1990 is_store = true;
1991 op = gimple_assign_rhs1 (stmt);
1993 else
1994 is_store = false;
1996 gcc_assert (op);
1998 if (CONSTANT_CLASS_P (op))
1999 constant_p = true;
2000 else
2001 constant_p = false;
2003 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2004 gcc_assert (vector_type);
2005 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2007 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2008 created vectors. It is greater than 1 if unrolling is performed.
2010 For example, we have two scalar operands, s1 and s2 (e.g., group of
2011 strided accesses of size two), while NUNITS is four (i.e., four scalars
2012 of this type can be packed in a vector). The output vector will contain
2013 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2014 will be 2).
2016 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2017 containing the operands.
2019 For example, NUNITS is four as before, and the group size is 8
2020 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2021 {s5, s6, s7, s8}. */
2023 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2025 number_of_places_left_in_vector = nunits;
2026 for (j = 0; j < number_of_copies; j++)
2028 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2030 if (is_store)
2031 op = gimple_assign_rhs1 (stmt);
2032 else
2033 op = gimple_op (stmt, op_num + 1);
2035 if (reduc_index != -1)
2037 loop = (gimple_bb (stmt))->loop_father;
2038 def_stmt = SSA_NAME_DEF_STMT (op);
2040 gcc_assert (loop);
2042 /* Get the def before the loop. In reduction chain we have only
2043 one initial value. */
2044 if ((j != (number_of_copies - 1)
2045 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2046 && i != 0))
2047 && neutral_op)
2048 op = neutral_op;
2049 else
2050 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2051 loop_preheader_edge (loop));
2054 /* Create 'vect_ = {op0,op1,...,opn}'. */
2055 t = tree_cons (NULL_TREE, op, t);
2057 number_of_places_left_in_vector--;
2059 if (number_of_places_left_in_vector == 0)
2061 number_of_places_left_in_vector = nunits;
2063 if (constant_p)
2064 vec_cst = build_vector (vector_type, t);
2065 else
2066 vec_cst = build_constructor_from_list (vector_type, t);
2067 VEC_quick_push (tree, voprnds,
2068 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2069 t = NULL_TREE;
2074 /* Since the vectors are created in the reverse order, we should invert
2075 them. */
2076 vec_num = VEC_length (tree, voprnds);
2077 for (j = vec_num - 1; j >= 0; j--)
2079 vop = VEC_index (tree, voprnds, j);
2080 VEC_quick_push (tree, *vec_oprnds, vop);
2083 VEC_free (tree, heap, voprnds);
2085 /* In case that VF is greater than the unrolling factor needed for the SLP
2086 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2087 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2088 to replicate the vectors. */
2089 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2091 tree neutral_vec = NULL;
2093 if (neutral_op)
2095 if (!neutral_vec)
2096 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2098 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2100 else
2102 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2103 VEC_quick_push (tree, *vec_oprnds, vop);
2109 /* Get vectorized definitions from SLP_NODE that contains corresponding
2110 vectorized def-stmts. */
2112 static void
2113 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2115 tree vec_oprnd;
2116 gimple vec_def_stmt;
2117 unsigned int i;
2119 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2121 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2123 gcc_assert (vec_def_stmt);
2124 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2125 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2130 /* Get vectorized definitions for SLP_NODE.
2131 If the scalar definitions are loop invariants or constants, collect them and
2132 call vect_get_constant_vectors() to create vector stmts.
2133 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2134 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2135 vect_get_slp_vect_defs() to retrieve them.
2136 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2137 the right node. This is used when the second operand must remain scalar. */
2139 void
2140 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2141 VEC (tree,heap) **vec_oprnds0,
2142 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2144 gimple first_stmt;
2145 enum tree_code code;
2146 int number_of_vects;
2147 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2149 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2150 /* The number of vector defs is determined by the number of vector statements
2151 in the node from which we get those statements. */
2152 if (SLP_TREE_LEFT (slp_node))
2153 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2154 else
2156 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2157 /* Number of vector stmts was calculated according to LHS in
2158 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2159 necessary. See vect_get_smallest_scalar_type () for details. */
2160 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2161 &rhs_size_unit);
2162 if (rhs_size_unit != lhs_size_unit)
2164 number_of_vects *= rhs_size_unit;
2165 number_of_vects /= lhs_size_unit;
2169 /* Allocate memory for vectorized defs. */
2170 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2172 /* SLP_NODE corresponds either to a group of stores or to a group of
2173 unary/binary operations. We don't call this function for loads.
2174 For reduction defs we call vect_get_constant_vectors(), since we are
2175 looking for initial loop invariant values. */
2176 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2177 /* The defs are already vectorized. */
2178 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2179 else
2180 /* Build vectors from scalar defs. */
2181 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2182 reduc_index);
2184 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2185 /* Since we don't call this function with loads, this is a group of
2186 stores. */
2187 return;
2189 /* For reductions, we only need initial values. */
2190 if (reduc_index != -1)
2191 return;
2193 code = gimple_assign_rhs_code (first_stmt);
2194 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
2195 return;
2197 /* The number of vector defs is determined by the number of vector statements
2198 in the node from which we get those statements. */
2199 if (SLP_TREE_RIGHT (slp_node))
2200 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2201 else
2202 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2204 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2206 if (SLP_TREE_RIGHT (slp_node))
2207 /* The defs are already vectorized. */
2208 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2209 else
2210 /* Build vectors from scalar defs. */
2211 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2212 -1);
2216 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2217 building a vector of type MASK_TYPE from it) and two input vectors placed in
2218 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2219 shifting by STRIDE elements of DR_CHAIN for every copy.
2220 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2221 copies).
2222 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2223 the created stmts must be inserted. */
2225 static inline void
2226 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2227 tree mask, int first_vec_indx, int second_vec_indx,
2228 gimple_stmt_iterator *gsi, slp_tree node,
2229 tree builtin_decl, tree vectype,
2230 VEC(tree,heap) *dr_chain,
2231 int ncopies, int vect_stmts_counter)
2233 tree perm_dest;
2234 gimple perm_stmt = NULL;
2235 stmt_vec_info next_stmt_info;
2236 int i, stride;
2237 tree first_vec, second_vec, data_ref;
2239 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2241 /* Initialize the vect stmts of NODE to properly insert the generated
2242 stmts later. */
2243 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2244 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2245 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2247 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2248 for (i = 0; i < ncopies; i++)
2250 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2251 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2253 /* Generate the permute statement. */
2254 perm_stmt = gimple_build_call (builtin_decl,
2255 3, first_vec, second_vec, mask);
2256 data_ref = make_ssa_name (perm_dest, perm_stmt);
2257 gimple_call_set_lhs (perm_stmt, data_ref);
2258 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2260 /* Store the vector statement in NODE. */
2261 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2262 stride * i + vect_stmts_counter, perm_stmt);
2264 first_vec_indx += stride;
2265 second_vec_indx += stride;
2268 /* Mark the scalar stmt as vectorized. */
2269 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2270 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2274 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2275 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2276 representation. Check that the mask is valid and return FALSE if not.
2277 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2278 the next vector, i.e., the current first vector is not needed. */
2280 static bool
2281 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2282 int mask_nunits, bool only_one_vec, int index,
2283 int *mask, int *current_mask_element,
2284 bool *need_next_vector, int *number_of_mask_fixes,
2285 bool *mask_fixed, bool *needs_first_vector)
2287 int i;
2289 /* Convert to target specific representation. */
2290 *current_mask_element = first_mask_element + m;
2291 /* Adjust the value in case it's a mask for second and third vectors. */
2292 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2294 if (*current_mask_element < mask_nunits)
2295 *needs_first_vector = true;
2297 /* We have only one input vector to permute but the mask accesses values in
2298 the next vector as well. */
2299 if (only_one_vec && *current_mask_element >= mask_nunits)
2301 if (vect_print_dump_info (REPORT_DETAILS))
2303 fprintf (vect_dump, "permutation requires at least two vectors ");
2304 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2307 return false;
2310 /* The mask requires the next vector. */
2311 if (*current_mask_element >= mask_nunits * 2)
2313 if (*needs_first_vector || *mask_fixed)
2315 /* We either need the first vector too or have already moved to the
2316 next vector. In both cases, this permutation needs three
2317 vectors. */
2318 if (vect_print_dump_info (REPORT_DETAILS))
2320 fprintf (vect_dump, "permutation requires at "
2321 "least three vectors ");
2322 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2325 return false;
2328 /* We move to the next vector, dropping the first one and working with
2329 the second and the third - we need to adjust the values of the mask
2330 accordingly. */
2331 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2333 for (i = 0; i < index; i++)
2334 mask[i] -= mask_nunits * *number_of_mask_fixes;
2336 (*number_of_mask_fixes)++;
2337 *mask_fixed = true;
2340 *need_next_vector = *mask_fixed;
2342 /* This was the last element of this mask. Start a new one. */
2343 if (index == mask_nunits - 1)
2345 *number_of_mask_fixes = 1;
2346 *mask_fixed = false;
2347 *needs_first_vector = false;
2350 return true;
2354 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2355 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2356 permute statements for SLP_NODE_INSTANCE. */
2357 bool
2358 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2359 gimple_stmt_iterator *gsi, int vf,
2360 slp_instance slp_node_instance, bool analyze_only)
2362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2363 tree mask_element_type = NULL_TREE, mask_type;
2364 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2365 slp_tree node;
2366 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2367 gimple next_scalar_stmt;
2368 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2369 int first_mask_element;
2370 int index, unroll_factor, *mask, current_mask_element, ncopies;
2371 bool only_one_vec = false, need_next_vector = false;
2372 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2373 int number_of_mask_fixes = 1;
2374 bool mask_fixed = false;
2375 bool needs_first_vector = false;
2377 if (!targetm.vectorize.builtin_vec_perm)
2379 if (vect_print_dump_info (REPORT_DETAILS))
2381 fprintf (vect_dump, "no builtin for vect permute for ");
2382 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2385 return false;
2388 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2389 &mask_element_type);
2390 if (!builtin_decl || !mask_element_type)
2392 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "no builtin for vect permute for ");
2395 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2398 return false;
2401 mask_type = get_vectype_for_scalar_type (mask_element_type);
2402 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2403 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2404 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2405 scale = mask_nunits / nunits;
2406 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2408 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2409 unrolling factor. */
2410 orig_vec_stmts_num = group_size *
2411 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2412 if (orig_vec_stmts_num == 1)
2413 only_one_vec = true;
2415 /* Number of copies is determined by the final vectorization factor
2416 relatively to SLP_NODE_INSTANCE unrolling factor. */
2417 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2419 /* Generate permutation masks for every NODE. Number of masks for each NODE
2420 is equal to GROUP_SIZE.
2421 E.g., we have a group of three nodes with three loads from the same
2422 location in each node, and the vector size is 4. I.e., we have a
2423 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2424 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2425 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2428 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2429 scpecific type, e.g., in bytes for Altivec.
2430 The last mask is illegal since we assume two operands for permute
2431 operation, and the mask element values can't be outside that range.
2432 Hence, the last mask must be converted into {2,5,5,5}.
2433 For the first two permutations we need the first and the second input
2434 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2435 we need the second and the third vectors: {b1,c1,a2,b2} and
2436 {c2,a3,b3,c3}. */
2438 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2440 scalar_index = 0;
2441 index = 0;
2442 vect_stmts_counter = 0;
2443 vec_index = 0;
2444 first_vec_index = vec_index++;
2445 if (only_one_vec)
2446 second_vec_index = first_vec_index;
2447 else
2448 second_vec_index = vec_index++;
2450 for (j = 0; j < unroll_factor; j++)
2452 for (k = 0; k < group_size; k++)
2454 first_mask_element = (i + j * group_size) * scale;
2455 for (m = 0; m < scale; m++)
2457 if (!vect_get_mask_element (stmt, first_mask_element, m,
2458 mask_nunits, only_one_vec, index, mask,
2459 &current_mask_element, &need_next_vector,
2460 &number_of_mask_fixes, &mask_fixed,
2461 &needs_first_vector))
2462 return false;
2464 mask[index++] = current_mask_element;
2467 if (index == mask_nunits)
2469 tree mask_vec = NULL;
2471 while (--index >= 0)
2473 tree t = build_int_cst (mask_element_type, mask[index]);
2474 mask_vec = tree_cons (NULL, t, mask_vec);
2476 mask_vec = build_vector (mask_type, mask_vec);
2477 index = 0;
2479 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2480 mask_vec))
2482 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "unsupported vect permute ");
2485 print_generic_expr (vect_dump, mask_vec, 0);
2487 free (mask);
2488 return false;
2491 if (!analyze_only)
2493 if (need_next_vector)
2495 first_vec_index = second_vec_index;
2496 second_vec_index = vec_index;
2499 next_scalar_stmt = VEC_index (gimple,
2500 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2502 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2503 mask_vec, first_vec_index, second_vec_index,
2504 gsi, node, builtin_decl, vectype, dr_chain,
2505 ncopies, vect_stmts_counter++);
2512 free (mask);
2513 return true;
2518 /* Vectorize SLP instance tree in postorder. */
2520 static bool
2521 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2522 unsigned int vectorization_factor)
2524 gimple stmt;
2525 bool strided_store, is_store;
2526 gimple_stmt_iterator si;
2527 stmt_vec_info stmt_info;
2528 unsigned int vec_stmts_size, nunits, group_size;
2529 tree vectype;
2530 int i;
2531 slp_tree loads_node;
2533 if (!node)
2534 return false;
2536 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2537 vectorization_factor);
2538 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2539 vectorization_factor);
2541 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2542 stmt_info = vinfo_for_stmt (stmt);
2544 /* VECTYPE is the type of the destination. */
2545 vectype = STMT_VINFO_VECTYPE (stmt_info);
2546 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2547 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2549 /* For each SLP instance calculate number of vector stmts to be created
2550 for the scalar stmts in each node of the SLP tree. Number of vector
2551 elements in one vector iteration is the number of scalar elements in
2552 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2553 size. */
2554 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2556 /* In case of load permutation we have to allocate vectorized statements for
2557 all the nodes that participate in that permutation. */
2558 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2560 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2562 if (!SLP_TREE_VEC_STMTS (loads_node))
2564 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2565 vec_stmts_size);
2566 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2571 if (!SLP_TREE_VEC_STMTS (node))
2573 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2574 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2577 if (vect_print_dump_info (REPORT_DETAILS))
2579 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2580 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2583 /* Loads should be inserted before the first load. */
2584 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2585 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2586 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2587 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2588 else if (is_pattern_stmt_p (stmt_info))
2589 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2590 else
2591 si = gsi_for_stmt (stmt);
2593 /* Stores should be inserted just before the last store. */
2594 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2595 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2597 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2598 si = gsi_for_stmt (last_store);
2601 /* Mark the first element of the reduction chain as reduction to properly
2602 transform the node. In the analysis phase only the last element of the
2603 chain is marked as reduction. */
2604 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2605 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2607 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2608 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2611 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2612 return is_store;
2616 /* Generate vector code for all SLP instances in the loop/basic block. */
2618 bool
2619 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2621 VEC (slp_instance, heap) *slp_instances;
2622 slp_instance instance;
2623 unsigned int i, vf;
2624 bool is_store = false;
2626 if (loop_vinfo)
2628 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2629 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2631 else
2633 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2634 vf = 1;
2637 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2639 /* Schedule the tree of INSTANCE. */
2640 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2641 instance, vf);
2642 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2643 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2644 fprintf (vect_dump, "vectorizing stmts using SLP.");
2647 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2649 slp_tree root = SLP_INSTANCE_TREE (instance);
2650 gimple store;
2651 unsigned int j;
2652 gimple_stmt_iterator gsi;
2654 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2655 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2657 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2658 break;
2660 /* Free the attached stmt_vec_info and remove the stmt. */
2661 gsi = gsi_for_stmt (store);
2662 gsi_remove (&gsi, true);
2663 free_stmt_vec_info (store);
2667 return is_store;
2671 /* Vectorize the basic block. */
2673 void
2674 vect_slp_transform_bb (basic_block bb)
2676 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2677 gimple_stmt_iterator si;
2679 gcc_assert (bb_vinfo);
2681 if (vect_print_dump_info (REPORT_DETAILS))
2682 fprintf (vect_dump, "SLPing BB\n");
2684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2686 gimple stmt = gsi_stmt (si);
2687 stmt_vec_info stmt_info;
2689 if (vect_print_dump_info (REPORT_DETAILS))
2691 fprintf (vect_dump, "------>SLPing statement: ");
2692 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2695 stmt_info = vinfo_for_stmt (stmt);
2696 gcc_assert (stmt_info);
2698 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2699 if (STMT_SLP_TYPE (stmt_info))
2701 vect_schedule_slp (NULL, bb_vinfo);
2702 break;
2706 mark_sym_for_renaming (gimple_vop (cfun));
2707 /* The memory tags and pointers in vectorized statements need to
2708 have their SSA forms updated. FIXME, why can't this be delayed
2709 until all the loops have been transformed? */
2710 update_ssa (TODO_update_ssa);
2712 if (vect_print_dump_info (REPORT_DETAILS))
2713 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2715 destroy_bb_vec_info (bb_vinfo);