2009-05-29 Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-slp.c
blob6c932109baf960b32eccdfef128c78205a57b1b6
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
44 LOC
45 find_bb_location (basic_block bb)
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
50 if (!bb)
51 return UNKNOWN_LOC;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
60 return UNKNOWN_LOC;
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66 static void
67 vect_free_slp_tree (slp_tree node)
69 if (!node)
70 return;
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83 free (node);
87 /* Free the memory allocated for the SLP instance. */
89 void
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136 &dt[i])
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
145 return false;
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
150 pattern. */
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
171 return false;
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
185 switch (gimple_code (def_stmt))
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
220 else
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
236 "oprnds in stmt");
237 return false;
239 *first_stmt_const_oprnd = oprnd;
242 else
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
246 if ((i == 0
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && *first_stmt_def0_type != TREE_TYPE (def))))
250 || (i == 1
251 && (*first_stmt_dt1 != dt[i]
252 || (*first_stmt_def1_type && def
253 && *first_stmt_def1_type != TREE_TYPE (def))))
254 || (!def
255 && TREE_TYPE (*first_stmt_const_oprnd)
256 != TREE_TYPE (oprnd)))
258 if (vect_print_dump_info (REPORT_SLP))
259 fprintf (vect_dump, "Build SLP failed: different types ");
261 return false;
266 /* Check the types of the definitions. */
267 switch (dt[i])
269 case vect_constant_def:
270 case vect_external_def:
271 break;
273 case vect_internal_def:
274 if (i == 0)
275 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
276 else
277 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
278 break;
280 default:
281 /* FORNOW: Not supported. */
282 if (vect_print_dump_info (REPORT_SLP))
284 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
285 print_generic_expr (vect_dump, def, TDF_SLIM);
288 return false;
292 return true;
296 /* Recursively build an SLP tree starting from NODE.
297 Fail (and return FALSE) if def-stmts are not isomorphic, require data
298 permutation or are of unsupported types of operation. Otherwise, return
299 TRUE. */
301 static bool
302 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
303 slp_tree *node, unsigned int group_size,
304 int *inside_cost, int *outside_cost,
305 int ncopies_for_cost, unsigned int *max_nunits,
306 VEC (int, heap) **load_permutation,
307 VEC (slp_tree, heap) **loads,
308 unsigned int vectorization_factor)
310 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
311 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
312 unsigned int i;
313 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
314 gimple stmt = VEC_index (gimple, stmts, 0);
315 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
316 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
317 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
318 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
319 tree lhs;
320 bool stop_recursion = false, need_same_oprnds = false;
321 tree vectype, scalar_type, first_op1 = NULL_TREE;
322 unsigned int ncopies;
323 optab optab;
324 int icode;
325 enum machine_mode optab_op2_mode;
326 enum machine_mode vec_mode;
327 tree first_stmt_const_oprnd = NULL_TREE;
328 struct data_reference *first_dr;
329 bool pattern0 = false, pattern1 = false;
330 HOST_WIDE_INT dummy;
331 bool permutation = false;
332 unsigned int load_place;
333 gimple first_load;
335 /* For every stmt in NODE find its def stmt/s. */
336 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
338 if (vect_print_dump_info (REPORT_SLP))
340 fprintf (vect_dump, "Build SLP for ");
341 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
344 lhs = gimple_get_lhs (stmt);
345 if (lhs == NULL_TREE)
347 if (vect_print_dump_info (REPORT_SLP))
349 fprintf (vect_dump,
350 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
351 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
354 return false;
357 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
358 vectype = get_vectype_for_scalar_type (scalar_type);
359 if (!vectype)
361 if (vect_print_dump_info (REPORT_SLP))
363 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
364 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
366 return false;
369 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
370 if (ncopies != 1)
372 if (vect_print_dump_info (REPORT_SLP))
373 fprintf (vect_dump, "SLP with multiple types ");
375 /* FORNOW: multiple types are unsupported in BB SLP. */
376 if (bb_vinfo)
377 return false;
380 /* In case of multiple types we need to detect the smallest type. */
381 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
382 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
384 if (is_gimple_call (stmt))
385 rhs_code = CALL_EXPR;
386 else
387 rhs_code = gimple_assign_rhs_code (stmt);
389 /* Check the operation. */
390 if (i == 0)
392 first_stmt_code = rhs_code;
394 /* Shift arguments should be equal in all the packed stmts for a
395 vector shift with scalar shift operand. */
396 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
397 || rhs_code == LROTATE_EXPR
398 || rhs_code == RROTATE_EXPR)
400 vec_mode = TYPE_MODE (vectype);
402 /* First see if we have a vector/vector shift. */
403 optab = optab_for_tree_code (rhs_code, vectype,
404 optab_vector);
406 if (!optab
407 || (optab->handlers[(int) vec_mode].insn_code
408 == CODE_FOR_nothing))
410 /* No vector/vector shift, try for a vector/scalar shift. */
411 optab = optab_for_tree_code (rhs_code, vectype,
412 optab_scalar);
414 if (!optab)
416 if (vect_print_dump_info (REPORT_SLP))
417 fprintf (vect_dump, "Build SLP failed: no optab.");
418 return false;
420 icode = (int) optab->handlers[(int) vec_mode].insn_code;
421 if (icode == CODE_FOR_nothing)
423 if (vect_print_dump_info (REPORT_SLP))
424 fprintf (vect_dump, "Build SLP failed: "
425 "op not supported by target.");
426 return false;
428 optab_op2_mode = insn_data[icode].operand[2].mode;
429 if (!VECTOR_MODE_P (optab_op2_mode))
431 need_same_oprnds = true;
432 first_op1 = gimple_assign_rhs2 (stmt);
437 else
439 if (first_stmt_code != rhs_code
440 && (first_stmt_code != IMAGPART_EXPR
441 || rhs_code != REALPART_EXPR)
442 && (first_stmt_code != REALPART_EXPR
443 || rhs_code != IMAGPART_EXPR))
445 if (vect_print_dump_info (REPORT_SLP))
447 fprintf (vect_dump,
448 "Build SLP failed: different operation in stmt ");
449 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
452 return false;
455 if (need_same_oprnds
456 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
458 if (vect_print_dump_info (REPORT_SLP))
460 fprintf (vect_dump,
461 "Build SLP failed: different shift arguments in ");
462 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
465 return false;
469 /* Strided store or load. */
470 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
472 if (REFERENCE_CLASS_P (lhs))
474 /* Store. */
475 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
476 stmt, &def_stmts0, &def_stmts1,
477 &first_stmt_dt0,
478 &first_stmt_dt1,
479 &first_stmt_def0_type,
480 &first_stmt_def1_type,
481 &first_stmt_const_oprnd,
482 ncopies_for_cost,
483 &pattern0, &pattern1))
484 return false;
486 else
488 /* Load. */
489 /* FORNOW: Check that there is no gap between the loads. */
490 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
491 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
492 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
495 if (vect_print_dump_info (REPORT_SLP))
497 fprintf (vect_dump, "Build SLP failed: strided "
498 "loads have gaps ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
502 return false;
505 /* Check that the size of interleaved loads group is not
506 greater than the SLP group size. */
507 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
508 > ncopies * group_size)
510 if (vect_print_dump_info (REPORT_SLP))
512 fprintf (vect_dump, "Build SLP failed: the number of "
513 "interleaved loads is greater than"
514 " the SLP group size ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518 return false;
521 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
523 if (first_load == stmt)
525 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
526 if (vect_supportable_dr_alignment (first_dr)
527 == dr_unaligned_unsupported)
529 if (vect_print_dump_info (REPORT_SLP))
531 fprintf (vect_dump, "Build SLP failed: unsupported "
532 "unaligned load ");
533 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536 return false;
539 /* Analyze costs (for the first stmt in the group). */
540 vect_model_load_cost (vinfo_for_stmt (stmt),
541 ncopies_for_cost, *node);
544 /* Store the place of this load in the interleaving chain. In
545 case that permutation is needed we later decide if a specific
546 permutation is supported. */
547 load_place = vect_get_place_in_interleaving_chain (stmt,
548 first_load);
549 if (load_place != i)
550 permutation = true;
552 VEC_safe_push (int, heap, *load_permutation, load_place);
554 /* We stop the tree when we reach a group of loads. */
555 stop_recursion = true;
556 continue;
558 } /* Strided access. */
559 else
561 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
563 /* Not strided load. */
564 if (vect_print_dump_info (REPORT_SLP))
566 fprintf (vect_dump, "Build SLP failed: not strided load ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 /* FORNOW: Not strided loads are not supported. */
571 return false;
574 /* Not memory operation. */
575 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
576 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
578 if (vect_print_dump_info (REPORT_SLP))
580 fprintf (vect_dump, "Build SLP failed: operation");
581 fprintf (vect_dump, " unsupported ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585 return false;
588 /* Find the def-stmts. */
589 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
590 &def_stmts0, &def_stmts1,
591 &first_stmt_dt0, &first_stmt_dt1,
592 &first_stmt_def0_type,
593 &first_stmt_def1_type,
594 &first_stmt_const_oprnd,
595 ncopies_for_cost,
596 &pattern0, &pattern1))
597 return false;
601 /* Add the costs of the node to the overall instance costs. */
602 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
603 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
605 /* Strided loads were reached - stop the recursion. */
606 if (stop_recursion)
608 if (permutation)
610 VEC_safe_push (slp_tree, heap, *loads, *node);
611 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
614 return true;
617 /* Create SLP_TREE nodes for the definition node/s. */
618 if (first_stmt_dt0 == vect_internal_def)
620 slp_tree left_node = XNEW (struct _slp_tree);
621 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
622 SLP_TREE_VEC_STMTS (left_node) = NULL;
623 SLP_TREE_LEFT (left_node) = NULL;
624 SLP_TREE_RIGHT (left_node) = NULL;
625 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
626 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
627 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
628 inside_cost, outside_cost, ncopies_for_cost,
629 max_nunits, load_permutation, loads,
630 vectorization_factor))
631 return false;
633 SLP_TREE_LEFT (*node) = left_node;
636 if (first_stmt_dt1 == vect_internal_def)
638 slp_tree right_node = XNEW (struct _slp_tree);
639 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
640 SLP_TREE_VEC_STMTS (right_node) = NULL;
641 SLP_TREE_LEFT (right_node) = NULL;
642 SLP_TREE_RIGHT (right_node) = NULL;
643 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
644 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
645 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
646 inside_cost, outside_cost, ncopies_for_cost,
647 max_nunits, load_permutation, loads,
648 vectorization_factor))
649 return false;
651 SLP_TREE_RIGHT (*node) = right_node;
654 return true;
658 static void
659 vect_print_slp_tree (slp_tree node)
661 int i;
662 gimple stmt;
664 if (!node)
665 return;
667 fprintf (vect_dump, "node ");
668 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
670 fprintf (vect_dump, "\n\tstmt %d ", i);
671 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
673 fprintf (vect_dump, "\n");
675 vect_print_slp_tree (SLP_TREE_LEFT (node));
676 vect_print_slp_tree (SLP_TREE_RIGHT (node));
680 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
681 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
682 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
683 stmts in NODE are to be marked. */
685 static void
686 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
688 int i;
689 gimple stmt;
691 if (!node)
692 return;
694 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
695 if (j < 0 || i == j)
696 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
698 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
699 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
703 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
705 static void
706 vect_mark_slp_stmts_relevant (slp_tree node)
708 int i;
709 gimple stmt;
710 stmt_vec_info stmt_info;
712 if (!node)
713 return;
715 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
717 stmt_info = vinfo_for_stmt (stmt);
718 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
719 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
720 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
723 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
724 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
728 /* Check if the permutation required by the SLP INSTANCE is supported.
729 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
731 static bool
732 vect_supported_slp_permutation_p (slp_instance instance)
734 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
735 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
736 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
737 VEC (slp_tree, heap) *sorted_loads = NULL;
738 int index;
739 slp_tree *tmp_loads = NULL;
740 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
741 slp_tree load;
743 /* FORNOW: The only supported loads permutation is loads from the same
744 location in all the loads in the node, when the data-refs in
745 nodes of LOADS constitute an interleaving chain.
746 Sort the nodes according to the order of accesses in the chain. */
747 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
748 for (i = 0, j = 0;
749 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
750 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
751 i += group_size, j++)
753 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
754 /* Check that the loads are all in the same interleaving chain. */
755 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
757 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "Build SLP failed: unsupported data "
760 "permutation ");
761 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
764 free (tmp_loads);
765 return false;
768 tmp_loads[index] = load;
771 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
772 for (i = 0; i < group_size; i++)
773 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
775 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
776 SLP_INSTANCE_LOADS (instance) = sorted_loads;
777 free (tmp_loads);
779 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
780 SLP_INSTANCE_UNROLLING_FACTOR (instance),
781 instance, true))
782 return false;
784 return true;
788 /* Check if the required load permutation is supported.
789 LOAD_PERMUTATION contains a list of indices of the loads.
790 In SLP this permutation is relative to the order of strided stores that are
791 the base of the SLP instance. */
793 static bool
794 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
795 VEC (int, heap) *load_permutation)
797 int i = 0, j, prev = -1, next, k;
798 bool supported;
800 /* FORNOW: permutations are only supported in SLP. */
801 if (!slp_instn)
802 return false;
804 if (vect_print_dump_info (REPORT_SLP))
806 fprintf (vect_dump, "Load permutation ");
807 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
808 fprintf (vect_dump, "%d ", next);
811 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
812 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
813 well. */
814 if (VEC_length (int, load_permutation)
815 != (unsigned int) (group_size * group_size))
816 return false;
818 supported = true;
819 for (j = 0; j < group_size; j++)
821 for (i = j * group_size, k = 0;
822 VEC_iterate (int, load_permutation, i, next) && k < group_size;
823 i++, k++)
825 if (i != j * group_size && next != prev)
827 supported = false;
828 break;
831 prev = next;
835 if (supported && i == group_size * group_size
836 && vect_supported_slp_permutation_p (slp_instn))
837 return true;
839 return false;
843 /* Find the first load in the loop that belongs to INSTANCE.
844 When loads are in several SLP nodes, there can be a case in which the first
845 load does not appear in the first SLP node to be transformed, causing
846 incorrect order of statements. Since we generate all the loads together,
847 they must be inserted before the first load of the SLP instance and not
848 before the first load of the first node of the instance. */
849 static gimple
850 vect_find_first_load_in_slp_instance (slp_instance instance)
852 int i, j;
853 slp_tree load_node;
854 gimple first_load = NULL, load;
856 for (i = 0;
857 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
858 i++)
859 for (j = 0;
860 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
861 j++)
862 first_load = get_earlier_stmt (load, first_load);
864 return first_load;
868 /* Analyze an SLP instance starting from a group of strided stores. Call
869 vect_build_slp_tree to build a tree of packed stmts if possible.
870 Return FALSE if it's impossible to SLP any stmt in the loop. */
872 static bool
873 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
874 gimple stmt)
876 slp_instance new_instance;
877 slp_tree node = XNEW (struct _slp_tree);
878 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
879 unsigned int unrolling_factor = 1, nunits;
880 tree vectype, scalar_type;
881 gimple next;
882 unsigned int vectorization_factor = 0, ncopies;
883 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
884 unsigned int max_nunits = 0;
885 VEC (int, heap) *load_permutation;
886 VEC (slp_tree, heap) *loads;
888 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
889 vinfo_for_stmt (stmt))));
890 vectype = get_vectype_for_scalar_type (scalar_type);
891 if (!vectype)
893 if (vect_print_dump_info (REPORT_SLP))
895 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
896 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
898 return false;
901 nunits = TYPE_VECTOR_SUBPARTS (vectype);
902 if (loop_vinfo)
903 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
904 else
905 /* No multitypes in BB SLP. */
906 vectorization_factor = nunits;
908 ncopies = vectorization_factor / nunits;
910 /* Calculate the unrolling factor. */
911 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
912 if (unrolling_factor != 1 && !loop_vinfo)
914 if (vect_print_dump_info (REPORT_SLP))
915 fprintf (vect_dump, "Build SLP failed: unrolling required in BB SLP");
917 return false;
920 /* Create a node (a root of the SLP tree) for the packed strided stores. */
921 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
922 next = stmt;
923 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
924 while (next)
926 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
927 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
930 SLP_TREE_VEC_STMTS (node) = NULL;
931 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
932 SLP_TREE_LEFT (node) = NULL;
933 SLP_TREE_RIGHT (node) = NULL;
934 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
935 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
937 /* Calculate the number of vector stmts to create based on the unrolling
938 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
939 GROUP_SIZE / NUNITS otherwise. */
940 ncopies_for_cost = unrolling_factor * group_size / nunits;
942 load_permutation = VEC_alloc (int, heap, group_size * group_size);
943 loads = VEC_alloc (slp_tree, heap, group_size);
945 /* Build the tree for the SLP instance. */
946 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
947 &inside_cost, &outside_cost, ncopies_for_cost,
948 &max_nunits, &load_permutation, &loads,
949 vectorization_factor))
951 /* Create a new SLP instance. */
952 new_instance = XNEW (struct _slp_instance);
953 SLP_INSTANCE_TREE (new_instance) = node;
954 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
955 /* Calculate the unrolling factor based on the smallest type in the
956 loop. */
957 if (max_nunits > nunits)
958 unrolling_factor = least_common_multiple (max_nunits, group_size)
959 / group_size;
961 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
962 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
963 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
964 SLP_INSTANCE_LOADS (new_instance) = loads;
965 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
966 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
967 if (VEC_length (slp_tree, loads))
969 if (!vect_supported_load_permutation_p (new_instance, group_size,
970 load_permutation))
972 if (vect_print_dump_info (REPORT_SLP))
974 fprintf (vect_dump, "Build SLP failed: unsupported load "
975 "permutation ");
976 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
979 vect_free_slp_instance (new_instance);
980 return false;
983 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
984 = vect_find_first_load_in_slp_instance (new_instance);
986 else
987 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
989 if (loop_vinfo)
990 VEC_safe_push (slp_instance, heap,
991 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
992 new_instance);
993 else
994 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
995 new_instance);
997 if (vect_print_dump_info (REPORT_SLP))
998 vect_print_slp_tree (node);
1000 return true;
1003 /* Failed to SLP. */
1004 /* Free the allocated memory. */
1005 vect_free_slp_tree (node);
1006 VEC_free (int, heap, load_permutation);
1007 VEC_free (slp_tree, heap, loads);
1009 return false;
1013 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1014 trees of packed scalar stmts if SLP is possible. */
1016 bool
1017 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1019 unsigned int i;
1020 VEC (gimple, heap) *strided_stores;
1021 gimple store;
1022 bool ok = false;
1024 if (vect_print_dump_info (REPORT_SLP))
1025 fprintf (vect_dump, "=== vect_analyze_slp ===");
1027 if (loop_vinfo)
1028 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1029 else
1030 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1032 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1033 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1034 ok = true;
1036 if (bb_vinfo && !ok)
1038 if (vect_print_dump_info (REPORT_SLP))
1039 fprintf (vect_dump, "Failed to SLP the basic block.");
1041 return false;
1044 return true;
1048 /* For each possible SLP instance decide whether to SLP it and calculate overall
1049 unrolling factor needed to SLP the loop. */
1051 void
1052 vect_make_slp_decision (loop_vec_info loop_vinfo)
1054 unsigned int i, unrolling_factor = 1;
1055 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1056 slp_instance instance;
1057 int decided_to_slp = 0;
1059 if (vect_print_dump_info (REPORT_SLP))
1060 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1062 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1064 /* FORNOW: SLP if you can. */
1065 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1066 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1068 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1069 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1070 loop-based vectorization. Such stmts will be marked as HYBRID. */
1071 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1072 decided_to_slp++;
1075 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1077 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1078 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1079 decided_to_slp, unrolling_factor);
1083 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1084 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1086 static void
1087 vect_detect_hybrid_slp_stmts (slp_tree node)
1089 int i;
1090 gimple stmt;
1091 imm_use_iterator imm_iter;
1092 gimple use_stmt;
1094 if (!node)
1095 return;
1097 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1098 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1099 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1100 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1101 if (vinfo_for_stmt (use_stmt)
1102 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1103 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1104 vect_mark_slp_stmts (node, hybrid, i);
1106 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1107 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1111 /* Find stmts that must be both vectorized and SLPed. */
1113 void
1114 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1116 unsigned int i;
1117 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1118 slp_instance instance;
1120 if (vect_print_dump_info (REPORT_SLP))
1121 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1123 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1124 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1128 /* Create and initialize a new bb_vec_info struct for BB, as well as
1129 stmt_vec_info structs for all the stmts in it. */
1131 static bb_vec_info
1132 new_bb_vec_info (basic_block bb)
1134 bb_vec_info res = NULL;
1135 gimple_stmt_iterator gsi;
1137 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1138 BB_VINFO_BB (res) = bb;
1140 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1142 gimple stmt = gsi_stmt (gsi);
1143 gimple_set_uid (stmt, 0);
1144 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1147 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1148 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1150 bb->aux = res;
1151 return res;
1155 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1156 stmts in the basic block. */
1158 static void
1159 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1161 basic_block bb;
1162 gimple_stmt_iterator si;
1164 if (!bb_vinfo)
1165 return;
1167 bb = BB_VINFO_BB (bb_vinfo);
1169 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1171 gimple stmt = gsi_stmt (si);
1172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174 if (stmt_info)
1175 /* Free stmt_vec_info. */
1176 free_stmt_vec_info (stmt);
1179 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1180 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1181 free (bb_vinfo);
1182 bb->aux = NULL;
1186 /* Analyze statements contained in SLP tree node after recursively analyzing
1187 the subtree. Return TRUE if the operations are supported. */
1189 static bool
1190 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1192 bool dummy;
1193 int i;
1194 gimple stmt;
1196 if (!node)
1197 return true;
1199 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1200 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1201 return false;
1203 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1206 gcc_assert (stmt_info);
1207 gcc_assert (PURE_SLP_STMT (stmt_info));
1209 if (!vect_analyze_stmt (stmt, &dummy, node))
1210 return false;
1213 return true;
1217 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1218 operations are supported. */
1220 static bool
1221 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1223 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1224 slp_instance instance;
1225 int i;
1227 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1229 if (!vect_slp_analyze_node_operations (bb_vinfo,
1230 SLP_INSTANCE_TREE (instance)))
1232 vect_free_slp_instance (instance);
1233 VEC_ordered_remove (slp_instance, slp_instances, i);
1235 else
1236 i++;
1239 if (!VEC_length (slp_instance, slp_instances))
1240 return false;
1242 return true;
1246 /* Cheick if the basic block can be vectorized. */
1248 bb_vec_info
1249 vect_slp_analyze_bb (basic_block bb)
1251 bb_vec_info bb_vinfo;
1252 VEC (ddr_p, heap) *ddrs;
1253 VEC (slp_instance, heap) *slp_instances;
1254 slp_instance instance;
1255 int i, insns = 0;
1256 gimple_stmt_iterator gsi;
1258 if (vect_print_dump_info (REPORT_DETAILS))
1259 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1261 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1262 insns++;
1264 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1266 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1267 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1268 "block.\n");
1270 return NULL;
1273 bb_vinfo = new_bb_vec_info (bb);
1274 if (!bb_vinfo)
1275 return NULL;
1277 if (!vect_analyze_data_refs (NULL, bb_vinfo))
1279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1280 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1281 "block.\n");
1283 destroy_bb_vec_info (bb_vinfo);
1284 return NULL;
1287 ddrs = BB_VINFO_DDRS (bb_vinfo);
1288 if (!VEC_length (ddr_p, ddrs))
1290 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1291 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1292 "block.\n");
1294 destroy_bb_vec_info (bb_vinfo);
1295 return NULL;
1298 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1300 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1301 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1302 "block.\n");
1304 destroy_bb_vec_info (bb_vinfo);
1305 return NULL;
1308 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1311 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1312 " block.\n");
1314 destroy_bb_vec_info (bb_vinfo);
1315 return NULL;
1318 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1320 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1321 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1322 "block.\n");
1324 destroy_bb_vec_info (bb_vinfo);
1325 return NULL;
1328 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1330 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1331 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1332 "block.\n");
1334 destroy_bb_vec_info (bb_vinfo);
1335 return NULL;
1338 /* Check the SLP opportunities in the basic block, analyze and build SLP
1339 trees. */
1340 if (!vect_analyze_slp (NULL, bb_vinfo))
1342 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1343 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1344 "in basic block.\n");
1346 destroy_bb_vec_info (bb_vinfo);
1347 return NULL;
1350 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1352 /* Mark all the statements that we want to vectorize as pure SLP and
1353 relevant. */
1354 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1356 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1357 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1360 if (!vect_slp_analyze_operations (bb_vinfo))
1362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1363 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1365 destroy_bb_vec_info (bb_vinfo);
1366 return NULL;
1369 if (vect_print_dump_info (REPORT_DETAILS))
1370 fprintf (vect_dump, "BB will be vectorized using SLP\n");
1372 return bb_vinfo;
1376 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1377 the number of created vector stmts depends on the unrolling factor). However,
1378 the actual number of vector stmts for every SLP node depends on VF which is
1379 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1380 In this function we assume that the inside costs calculated in
1381 vect_model_xxx_cost are linear in ncopies. */
1383 void
1384 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1386 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1387 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1388 slp_instance instance;
1390 if (vect_print_dump_info (REPORT_SLP))
1391 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1393 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1394 /* We assume that costs are linear in ncopies. */
1395 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1396 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1400 /* For constant and loop invariant defs of SLP_NODE this function returns
1401 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1402 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1403 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1405 static void
1406 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1407 unsigned int op_num, unsigned int number_of_vectors)
1409 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1410 gimple stmt = VEC_index (gimple, stmts, 0);
1411 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1412 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1413 int nunits;
1414 tree vec_cst;
1415 tree t = NULL_TREE;
1416 int j, number_of_places_left_in_vector;
1417 tree vector_type;
1418 tree op, vop;
1419 int group_size = VEC_length (gimple, stmts);
1420 unsigned int vec_num, i;
1421 int number_of_copies = 1;
1422 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1423 bool constant_p, is_store;
1425 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1427 is_store = true;
1428 op = gimple_assign_rhs1 (stmt);
1430 else
1432 is_store = false;
1433 op = gimple_op (stmt, op_num + 1);
1436 if (CONSTANT_CLASS_P (op))
1438 vector_type = vectype;
1439 constant_p = true;
1441 else
1443 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1444 gcc_assert (vector_type);
1445 constant_p = false;
1448 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1450 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1451 created vectors. It is greater than 1 if unrolling is performed.
1453 For example, we have two scalar operands, s1 and s2 (e.g., group of
1454 strided accesses of size two), while NUNITS is four (i.e., four scalars
1455 of this type can be packed in a vector). The output vector will contain
1456 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1457 will be 2).
1459 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1460 containing the operands.
1462 For example, NUNITS is four as before, and the group size is 8
1463 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1464 {s5, s6, s7, s8}. */
1466 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1468 number_of_places_left_in_vector = nunits;
1469 for (j = 0; j < number_of_copies; j++)
1471 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1473 if (is_store)
1474 op = gimple_assign_rhs1 (stmt);
1475 else
1476 op = gimple_op (stmt, op_num + 1);
1478 /* Create 'vect_ = {op0,op1,...,opn}'. */
1479 t = tree_cons (NULL_TREE, op, t);
1481 number_of_places_left_in_vector--;
1483 if (number_of_places_left_in_vector == 0)
1485 number_of_places_left_in_vector = nunits;
1487 if (constant_p)
1488 vec_cst = build_vector (vector_type, t);
1489 else
1490 vec_cst = build_constructor_from_list (vector_type, t);
1491 VEC_quick_push (tree, voprnds,
1492 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1493 t = NULL_TREE;
1498 /* Since the vectors are created in the reverse order, we should invert
1499 them. */
1500 vec_num = VEC_length (tree, voprnds);
1501 for (j = vec_num - 1; j >= 0; j--)
1503 vop = VEC_index (tree, voprnds, j);
1504 VEC_quick_push (tree, *vec_oprnds, vop);
1507 VEC_free (tree, heap, voprnds);
1509 /* In case that VF is greater than the unrolling factor needed for the SLP
1510 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1511 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1512 to replicate the vectors. */
1513 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1515 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1516 VEC_quick_push (tree, *vec_oprnds, vop);
1521 /* Get vectorized definitions from SLP_NODE that contains corresponding
1522 vectorized def-stmts. */
1524 static void
1525 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1527 tree vec_oprnd;
1528 gimple vec_def_stmt;
1529 unsigned int i;
1531 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1533 for (i = 0;
1534 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1535 i++)
1537 gcc_assert (vec_def_stmt);
1538 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1539 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1544 /* Get vectorized definitions for SLP_NODE.
1545 If the scalar definitions are loop invariants or constants, collect them and
1546 call vect_get_constant_vectors() to create vector stmts.
1547 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1548 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1549 vect_get_slp_vect_defs() to retrieve them.
1550 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1551 the right node. This is used when the second operand must remain scalar. */
1553 void
1554 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1555 VEC (tree,heap) **vec_oprnds1)
1557 gimple first_stmt;
1558 enum tree_code code;
1559 int number_of_vects;
1560 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1562 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1563 /* The number of vector defs is determined by the number of vector statements
1564 in the node from which we get those statements. */
1565 if (SLP_TREE_LEFT (slp_node))
1566 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1567 else
1569 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1570 /* Number of vector stmts was calculated according to LHS in
1571 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1572 necessary. See vect_get_smallest_scalar_type() for details. */
1573 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1574 &rhs_size_unit);
1575 if (rhs_size_unit != lhs_size_unit)
1577 number_of_vects *= rhs_size_unit;
1578 number_of_vects /= lhs_size_unit;
1582 /* Allocate memory for vectorized defs. */
1583 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1585 /* SLP_NODE corresponds either to a group of stores or to a group of
1586 unary/binary operations. We don't call this function for loads. */
1587 if (SLP_TREE_LEFT (slp_node))
1588 /* The defs are already vectorized. */
1589 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1590 else
1591 /* Build vectors from scalar defs. */
1592 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1594 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1595 /* Since we don't call this function with loads, this is a group of
1596 stores. */
1597 return;
1599 code = gimple_assign_rhs_code (first_stmt);
1600 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1601 return;
1603 /* The number of vector defs is determined by the number of vector statements
1604 in the node from which we get those statements. */
1605 if (SLP_TREE_RIGHT (slp_node))
1606 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1607 else
1608 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1610 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1612 if (SLP_TREE_RIGHT (slp_node))
1613 /* The defs are already vectorized. */
1614 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1615 else
1616 /* Build vectors from scalar defs. */
1617 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1621 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1622 building a vector of type MASK_TYPE from it) and two input vectors placed in
1623 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1624 shifting by STRIDE elements of DR_CHAIN for every copy.
1625 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1626 copies).
1627 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1628 the created stmts must be inserted. */
1630 static inline void
1631 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1632 int *mask_array, int mask_nunits,
1633 tree mask_element_type, tree mask_type,
1634 int first_vec_indx, int second_vec_indx,
1635 gimple_stmt_iterator *gsi, slp_tree node,
1636 tree builtin_decl, tree vectype,
1637 VEC(tree,heap) *dr_chain,
1638 int ncopies, int vect_stmts_counter)
1640 tree t = NULL_TREE, mask_vec, mask, perm_dest;
1641 gimple perm_stmt = NULL;
1642 stmt_vec_info next_stmt_info;
1643 int i, group_size, stride, dr_chain_size;
1644 tree first_vec, second_vec, data_ref;
1645 VEC (tree, heap) *params = NULL;
1647 /* Create a vector mask. */
1648 for (i = mask_nunits - 1; i >= 0; --i)
1649 t = tree_cons (NULL_TREE, build_int_cst (mask_element_type, mask_array[i]),
1651 mask_vec = build_vector (mask_type, t);
1652 mask = vect_init_vector (stmt, mask_vec, mask_type, NULL);
1654 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node));
1655 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1656 dr_chain_size = VEC_length (tree, dr_chain);
1658 /* Initialize the vect stmts of NODE to properly insert the generated
1659 stmts later. */
1660 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1661 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1662 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1664 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1665 for (i = 0; i < ncopies; i++)
1667 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1668 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1670 /* Build argument list for the vectorized call. */
1671 VEC_free (tree, heap, params);
1672 params = VEC_alloc (tree, heap, 3);
1673 VEC_quick_push (tree, params, first_vec);
1674 VEC_quick_push (tree, params, second_vec);
1675 VEC_quick_push (tree, params, mask);
1677 /* Generate the permute statement. */
1678 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1679 data_ref = make_ssa_name (perm_dest, perm_stmt);
1680 gimple_call_set_lhs (perm_stmt, data_ref);
1681 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1683 /* Store the vector statement in NODE. */
1684 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1685 stride * i + vect_stmts_counter, perm_stmt);
1687 first_vec_indx += stride;
1688 second_vec_indx += stride;
1691 /* Mark the scalar stmt as vectorized. */
1692 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1693 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1697 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1698 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1699 representation. Check that the mask is valid and return FALSE if not.
1700 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1701 the next vector, i.e., the current first vector is not needed. */
1703 static bool
1704 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1705 int mask_nunits, bool only_one_vec, int index,
1706 int *mask, int *current_mask_element,
1707 bool *need_next_vector)
1709 int i;
1710 static int number_of_mask_fixes = 1;
1711 static bool mask_fixed = false;
1712 static bool needs_first_vector = false;
1714 /* Convert to target specific representation. */
1715 *current_mask_element = first_mask_element + m;
1716 /* Adjust the value in case it's a mask for second and third vectors. */
1717 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1719 if (*current_mask_element < mask_nunits)
1720 needs_first_vector = true;
1722 /* We have only one input vector to permute but the mask accesses values in
1723 the next vector as well. */
1724 if (only_one_vec && *current_mask_element >= mask_nunits)
1726 if (vect_print_dump_info (REPORT_DETAILS))
1728 fprintf (vect_dump, "permutation requires at least two vectors ");
1729 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1732 return false;
1735 /* The mask requires the next vector. */
1736 if (*current_mask_element >= mask_nunits * 2)
1738 if (needs_first_vector || mask_fixed)
1740 /* We either need the first vector too or have already moved to the
1741 next vector. In both cases, this permutation needs three
1742 vectors. */
1743 if (vect_print_dump_info (REPORT_DETAILS))
1745 fprintf (vect_dump, "permutation requires at "
1746 "least three vectors ");
1747 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1750 return false;
1753 /* We move to the next vector, dropping the first one and working with
1754 the second and the third - we need to adjust the values of the mask
1755 accordingly. */
1756 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1758 for (i = 0; i < index; i++)
1759 mask[i] -= mask_nunits * number_of_mask_fixes;
1761 (number_of_mask_fixes)++;
1762 mask_fixed = true;
1765 *need_next_vector = mask_fixed;
1767 /* This was the last element of this mask. Start a new one. */
1768 if (index == mask_nunits - 1)
1770 number_of_mask_fixes = 1;
1771 mask_fixed = false;
1772 needs_first_vector = false;
1775 return true;
1779 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1780 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1781 permute statements for SLP_NODE_INSTANCE. */
1782 bool
1783 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1784 gimple_stmt_iterator *gsi, int vf,
1785 slp_instance slp_node_instance, bool analyze_only)
1787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1788 tree mask_element_type = NULL_TREE, mask_type;
1789 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1790 slp_tree node;
1791 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1792 gimple next_scalar_stmt;
1793 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1794 int first_mask_element;
1795 int index, unroll_factor, *mask, current_mask_element, ncopies;
1796 bool only_one_vec = false, need_next_vector = false;
1797 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1799 if (!targetm.vectorize.builtin_vec_perm)
1801 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "no builtin for vect permute for ");
1804 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1807 return false;
1810 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1811 &mask_element_type);
1812 if (!builtin_decl || !mask_element_type)
1814 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "no builtin for vect permute for ");
1817 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1820 return false;
1823 mask_type = get_vectype_for_scalar_type (mask_element_type);
1824 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1825 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1826 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1827 scale = mask_nunits / nunits;
1828 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1830 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1831 unrolling factor. */
1832 orig_vec_stmts_num = group_size *
1833 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1834 if (orig_vec_stmts_num == 1)
1835 only_one_vec = true;
1837 /* Number of copies is determined by the final vectorization factor
1838 relatively to SLP_NODE_INSTANCE unrolling factor. */
1839 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1841 /* Generate permutation masks for every NODE. Number of masks for each NODE
1842 is equal to GROUP_SIZE.
1843 E.g., we have a group of three nodes with three loads from the same
1844 location in each node, and the vector size is 4. I.e., we have a
1845 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1846 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1847 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1850 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1851 scpecific type, e.g., in bytes for Altivec.
1852 The last mask is illegal since we assume two operands for permute
1853 operation, and the mask element values can't be outside that range. Hence,
1854 the last mask must be converted into {2,5,5,5}.
1855 For the first two permutations we need the first and the second input
1856 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1857 we need the second and the third vectors: {b1,c1,a2,b2} and
1858 {c2,a3,b3,c3}. */
1860 for (i = 0;
1861 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1862 i, node);
1863 i++)
1865 scalar_index = 0;
1866 index = 0;
1867 vect_stmts_counter = 0;
1868 vec_index = 0;
1869 first_vec_index = vec_index++;
1870 if (only_one_vec)
1871 second_vec_index = first_vec_index;
1872 else
1873 second_vec_index = vec_index++;
1875 for (j = 0; j < unroll_factor; j++)
1877 for (k = 0; k < group_size; k++)
1879 first_mask_element = (i + j * group_size) * scale;
1880 for (m = 0; m < scale; m++)
1882 if (!vect_get_mask_element (stmt, first_mask_element, m,
1883 mask_nunits, only_one_vec, index, mask,
1884 &current_mask_element, &need_next_vector))
1885 return false;
1887 mask[index++] = current_mask_element;
1890 if (index == mask_nunits)
1892 index = 0;
1893 if (!analyze_only)
1895 if (need_next_vector)
1897 first_vec_index = second_vec_index;
1898 second_vec_index = vec_index;
1901 next_scalar_stmt = VEC_index (gimple,
1902 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1904 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1905 mask, mask_nunits, mask_element_type, mask_type,
1906 first_vec_index, second_vec_index, gsi, node,
1907 builtin_decl, vectype, dr_chain, ncopies,
1908 vect_stmts_counter++);
1915 free (mask);
1916 return true;
1921 /* Vectorize SLP instance tree in postorder. */
1923 static bool
1924 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1925 unsigned int vectorization_factor)
1927 gimple stmt;
1928 bool strided_store, is_store;
1929 gimple_stmt_iterator si;
1930 stmt_vec_info stmt_info;
1931 unsigned int vec_stmts_size, nunits, group_size;
1932 tree vectype;
1933 int i;
1934 slp_tree loads_node;
1936 if (!node)
1937 return false;
1939 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1940 vectorization_factor);
1941 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1942 vectorization_factor);
1944 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1945 stmt_info = vinfo_for_stmt (stmt);
1947 /* VECTYPE is the type of the destination. */
1948 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1949 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1950 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1952 /* For each SLP instance calculate number of vector stmts to be created
1953 for the scalar stmts in each node of the SLP tree. Number of vector
1954 elements in one vector iteration is the number of scalar elements in
1955 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1956 size. */
1957 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1959 /* In case of load permutation we have to allocate vectorized statements for
1960 all the nodes that participate in that permutation. */
1961 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1963 for (i = 0;
1964 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1965 i++)
1967 if (!SLP_TREE_VEC_STMTS (loads_node))
1969 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1970 vec_stmts_size);
1971 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1976 if (!SLP_TREE_VEC_STMTS (node))
1978 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
1979 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
1982 if (vect_print_dump_info (REPORT_DETAILS))
1984 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
1985 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1988 /* Loads should be inserted before the first load. */
1989 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
1990 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1991 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1992 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1993 else
1994 si = gsi_for_stmt (stmt);
1996 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
1997 if (is_store)
1999 if (DR_GROUP_FIRST_DR (stmt_info))
2000 /* If IS_STORE is TRUE, the vectorization of the
2001 interleaving chain was completed - free all the stores in
2002 the chain. */
2003 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2004 else
2005 /* FORNOW: SLP originates only from strided stores. */
2006 gcc_unreachable ();
2008 return true;
2011 /* FORNOW: SLP originates only from strided stores. */
2012 return false;
2016 bool
2017 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2019 VEC (slp_instance, heap) *slp_instances;
2020 slp_instance instance;
2021 unsigned int i, vf;
2022 bool is_store = false;
2024 if (loop_vinfo)
2026 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2027 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2029 else
2031 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2032 vf = 1;
2035 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2037 /* Schedule the tree of INSTANCE. */
2038 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2039 instance, vf);
2040 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2041 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2042 fprintf (vect_dump, "vectorizing stmts using SLP.");
2045 return is_store;
2049 /* Vectorize the basic block. */
2051 void
2052 vect_slp_transform_bb (basic_block bb)
2054 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2055 gimple_stmt_iterator si;
2057 gcc_assert (bb_vinfo);
2059 if (vect_print_dump_info (REPORT_DETAILS))
2060 fprintf (vect_dump, "SLPing BB\n");
2062 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2064 gimple stmt = gsi_stmt (si);
2065 stmt_vec_info stmt_info;
2067 if (vect_print_dump_info (REPORT_DETAILS))
2069 fprintf (vect_dump, "------>SLPing statement: ");
2070 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2073 stmt_info = vinfo_for_stmt (stmt);
2074 gcc_assert (stmt_info);
2076 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2077 if (STMT_SLP_TYPE (stmt_info))
2079 vect_schedule_slp (NULL, bb_vinfo);
2080 break;
2084 mark_sym_for_renaming (gimple_vop (cfun));
2085 /* The memory tags and pointers in vectorized statements need to
2086 have their SSA forms updated. FIXME, why can't this be delayed
2087 until all the loops have been transformed? */
2088 update_ssa (TODO_update_ssa);
2090 if (vect_print_dump_info (REPORT_DETAILS))
2091 fprintf (vect_dump, "BB VECTORIZED\n");
2093 destroy_bb_vec_info (bb_vinfo);