2010-06-02 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-slp.c
blob432392df0d92617af7c597f5438c4f03dd261c72
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
44 LOC
45 find_bb_location (basic_block bb)
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
50 if (!bb)
51 return UNKNOWN_LOC;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
60 return UNKNOWN_LOC;
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66 static void
67 vect_free_slp_tree (slp_tree node)
69 if (!node)
70 return;
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83 free (node);
87 /* Free the memory allocated for the SLP instance. */
89 void
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136 &dt[i])
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
145 return false;
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
150 pattern. */
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
171 return false;
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
185 switch (gimple_code (def_stmt))
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
220 else
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
236 "oprnds in stmt");
237 return false;
239 *first_stmt_const_oprnd = oprnd;
242 else
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
246 if ((i == 0
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
250 TREE_TYPE (def)))))
251 || (i == 1
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
255 TREE_TYPE (def)))))
256 || (!def
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258 TREE_TYPE (oprnd))))
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
263 return false;
268 /* Check the types of the definitions. */
269 switch (dt[i])
271 case vect_constant_def:
272 case vect_external_def:
273 break;
275 case vect_internal_def:
276 if (i == 0)
277 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
278 else
279 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
280 break;
282 default:
283 /* FORNOW: Not supported. */
284 if (vect_print_dump_info (REPORT_SLP))
286 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287 print_generic_expr (vect_dump, def, TDF_SLIM);
290 return false;
294 return true;
298 /* Recursively build an SLP tree starting from NODE.
299 Fail (and return FALSE) if def-stmts are not isomorphic, require data
300 permutation or are of unsupported types of operation. Otherwise, return
301 TRUE. */
303 static bool
304 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305 slp_tree *node, unsigned int group_size,
306 int *inside_cost, int *outside_cost,
307 int ncopies_for_cost, unsigned int *max_nunits,
308 VEC (int, heap) **load_permutation,
309 VEC (slp_tree, heap) **loads,
310 unsigned int vectorization_factor)
312 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
314 unsigned int i;
315 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316 gimple stmt = VEC_index (gimple, stmts, 0);
317 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
321 tree lhs;
322 bool stop_recursion = false, need_same_oprnds = false;
323 tree vectype, scalar_type, first_op1 = NULL_TREE;
324 unsigned int ncopies;
325 optab optab;
326 int icode;
327 enum machine_mode optab_op2_mode;
328 enum machine_mode vec_mode;
329 tree first_stmt_const_oprnd = NULL_TREE;
330 struct data_reference *first_dr;
331 bool pattern0 = false, pattern1 = false;
332 HOST_WIDE_INT dummy;
333 bool permutation = false;
334 unsigned int load_place;
335 gimple first_load;
337 /* For every stmt in NODE find its def stmt/s. */
338 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
340 if (vect_print_dump_info (REPORT_SLP))
342 fprintf (vect_dump, "Build SLP for ");
343 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 lhs = gimple_get_lhs (stmt);
347 if (lhs == NULL_TREE)
349 if (vect_print_dump_info (REPORT_SLP))
351 fprintf (vect_dump,
352 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
356 return false;
359 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360 vectype = get_vectype_for_scalar_type (scalar_type);
361 if (!vectype)
363 if (vect_print_dump_info (REPORT_SLP))
365 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
368 return false;
371 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
372 if (ncopies != 1)
374 if (vect_print_dump_info (REPORT_SLP))
375 fprintf (vect_dump, "SLP with multiple types ");
377 /* FORNOW: multiple types are unsupported in BB SLP. */
378 if (bb_vinfo)
379 return false;
382 /* In case of multiple types we need to detect the smallest type. */
383 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
386 if (is_gimple_call (stmt))
387 rhs_code = CALL_EXPR;
388 else
389 rhs_code = gimple_assign_rhs_code (stmt);
391 /* Check the operation. */
392 if (i == 0)
394 first_stmt_code = rhs_code;
396 /* Shift arguments should be equal in all the packed stmts for a
397 vector shift with scalar shift operand. */
398 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399 || rhs_code == LROTATE_EXPR
400 || rhs_code == RROTATE_EXPR)
402 vec_mode = TYPE_MODE (vectype);
404 /* First see if we have a vector/vector shift. */
405 optab = optab_for_tree_code (rhs_code, vectype,
406 optab_vector);
408 if (!optab
409 || (optab->handlers[(int) vec_mode].insn_code
410 == CODE_FOR_nothing))
412 /* No vector/vector shift, try for a vector/scalar shift. */
413 optab = optab_for_tree_code (rhs_code, vectype,
414 optab_scalar);
416 if (!optab)
418 if (vect_print_dump_info (REPORT_SLP))
419 fprintf (vect_dump, "Build SLP failed: no optab.");
420 return false;
422 icode = (int) optab->handlers[(int) vec_mode].insn_code;
423 if (icode == CODE_FOR_nothing)
425 if (vect_print_dump_info (REPORT_SLP))
426 fprintf (vect_dump, "Build SLP failed: "
427 "op not supported by target.");
428 return false;
430 optab_op2_mode = insn_data[icode].operand[2].mode;
431 if (!VECTOR_MODE_P (optab_op2_mode))
433 need_same_oprnds = true;
434 first_op1 = gimple_assign_rhs2 (stmt);
439 else
441 if (first_stmt_code != rhs_code
442 && (first_stmt_code != IMAGPART_EXPR
443 || rhs_code != REALPART_EXPR)
444 && (first_stmt_code != REALPART_EXPR
445 || rhs_code != IMAGPART_EXPR))
447 if (vect_print_dump_info (REPORT_SLP))
449 fprintf (vect_dump,
450 "Build SLP failed: different operation in stmt ");
451 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
454 return false;
457 if (need_same_oprnds
458 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
460 if (vect_print_dump_info (REPORT_SLP))
462 fprintf (vect_dump,
463 "Build SLP failed: different shift arguments in ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
467 return false;
471 /* Strided store or load. */
472 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
474 if (REFERENCE_CLASS_P (lhs))
476 /* Store. */
477 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478 stmt, &def_stmts0, &def_stmts1,
479 &first_stmt_dt0,
480 &first_stmt_dt1,
481 &first_stmt_def0_type,
482 &first_stmt_def1_type,
483 &first_stmt_const_oprnd,
484 ncopies_for_cost,
485 &pattern0, &pattern1))
486 return false;
488 else
490 /* Load. */
491 /* FORNOW: Check that there is no gap between the loads. */
492 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
497 if (vect_print_dump_info (REPORT_SLP))
499 fprintf (vect_dump, "Build SLP failed: strided "
500 "loads have gaps ");
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
504 return false;
507 /* Check that the size of interleaved loads group is not
508 greater than the SLP group size. */
509 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510 > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
520 return false;
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
525 if (first_load == stmt)
527 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528 if (vect_supportable_dr_alignment (first_dr)
529 == dr_unaligned_unsupported)
531 if (vect_print_dump_info (REPORT_SLP))
533 fprintf (vect_dump, "Build SLP failed: unsupported "
534 "unaligned load ");
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538 return false;
541 /* Analyze costs (for the first stmt in the group). */
542 vect_model_load_cost (vinfo_for_stmt (stmt),
543 ncopies_for_cost, *node);
546 /* Store the place of this load in the interleaving chain. In
547 case that permutation is needed we later decide if a specific
548 permutation is supported. */
549 load_place = vect_get_place_in_interleaving_chain (stmt,
550 first_load);
551 if (load_place != i)
552 permutation = true;
554 VEC_safe_push (int, heap, *load_permutation, load_place);
556 /* We stop the tree when we reach a group of loads. */
557 stop_recursion = true;
558 continue;
560 } /* Strided access. */
561 else
563 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
565 /* Not strided load. */
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: not strided load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 /* FORNOW: Not strided loads are not supported. */
573 return false;
576 /* Not memory operation. */
577 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
580 if (vect_print_dump_info (REPORT_SLP))
582 fprintf (vect_dump, "Build SLP failed: operation");
583 fprintf (vect_dump, " unsupported ");
584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
587 return false;
590 /* Find the def-stmts. */
591 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592 &def_stmts0, &def_stmts1,
593 &first_stmt_dt0, &first_stmt_dt1,
594 &first_stmt_def0_type,
595 &first_stmt_def1_type,
596 &first_stmt_const_oprnd,
597 ncopies_for_cost,
598 &pattern0, &pattern1))
599 return false;
603 /* Add the costs of the node to the overall instance costs. */
604 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
607 /* Strided loads were reached - stop the recursion. */
608 if (stop_recursion)
610 if (permutation)
612 VEC_safe_push (slp_tree, heap, *loads, *node);
613 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
616 return true;
619 /* Create SLP_TREE nodes for the definition node/s. */
620 if (first_stmt_dt0 == vect_internal_def)
622 slp_tree left_node = XNEW (struct _slp_tree);
623 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624 SLP_TREE_VEC_STMTS (left_node) = NULL;
625 SLP_TREE_LEFT (left_node) = NULL;
626 SLP_TREE_RIGHT (left_node) = NULL;
627 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630 inside_cost, outside_cost, ncopies_for_cost,
631 max_nunits, load_permutation, loads,
632 vectorization_factor))
633 return false;
635 SLP_TREE_LEFT (*node) = left_node;
638 if (first_stmt_dt1 == vect_internal_def)
640 slp_tree right_node = XNEW (struct _slp_tree);
641 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642 SLP_TREE_VEC_STMTS (right_node) = NULL;
643 SLP_TREE_LEFT (right_node) = NULL;
644 SLP_TREE_RIGHT (right_node) = NULL;
645 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648 inside_cost, outside_cost, ncopies_for_cost,
649 max_nunits, load_permutation, loads,
650 vectorization_factor))
651 return false;
653 SLP_TREE_RIGHT (*node) = right_node;
656 return true;
660 static void
661 vect_print_slp_tree (slp_tree node)
663 int i;
664 gimple stmt;
666 if (!node)
667 return;
669 fprintf (vect_dump, "node ");
670 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
672 fprintf (vect_dump, "\n\tstmt %d ", i);
673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
675 fprintf (vect_dump, "\n");
677 vect_print_slp_tree (SLP_TREE_LEFT (node));
678 vect_print_slp_tree (SLP_TREE_RIGHT (node));
682 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685 stmts in NODE are to be marked. */
687 static void
688 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
690 int i;
691 gimple stmt;
693 if (!node)
694 return;
696 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
697 if (j < 0 || i == j)
698 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
700 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
705 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
707 static void
708 vect_mark_slp_stmts_relevant (slp_tree node)
710 int i;
711 gimple stmt;
712 stmt_vec_info stmt_info;
714 if (!node)
715 return;
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
719 stmt_info = vinfo_for_stmt (stmt);
720 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
725 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
730 /* Check if the permutation required by the SLP INSTANCE is supported.
731 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
733 static bool
734 vect_supported_slp_permutation_p (slp_instance instance)
736 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739 VEC (slp_tree, heap) *sorted_loads = NULL;
740 int index;
741 slp_tree *tmp_loads = NULL;
742 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
743 slp_tree load;
745 /* FORNOW: The only supported loads permutation is loads from the same
746 location in all the loads in the node, when the data-refs in
747 nodes of LOADS constitute an interleaving chain.
748 Sort the nodes according to the order of accesses in the chain. */
749 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
750 for (i = 0, j = 0;
751 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753 i += group_size, j++)
755 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756 /* Check that the loads are all in the same interleaving chain. */
757 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
759 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "Build SLP failed: unsupported data "
762 "permutation ");
763 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
766 free (tmp_loads);
767 return false;
770 tmp_loads[index] = load;
773 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774 for (i = 0; i < group_size; i++)
775 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
777 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778 SLP_INSTANCE_LOADS (instance) = sorted_loads;
779 free (tmp_loads);
781 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782 SLP_INSTANCE_UNROLLING_FACTOR (instance),
783 instance, true))
784 return false;
786 return true;
790 /* Check if the required load permutation is supported.
791 LOAD_PERMUTATION contains a list of indices of the loads.
792 In SLP this permutation is relative to the order of strided stores that are
793 the base of the SLP instance. */
795 static bool
796 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797 VEC (int, heap) *load_permutation)
799 int i = 0, j, prev = -1, next, k;
800 bool supported;
801 sbitmap load_index;
803 /* FORNOW: permutations are only supported in SLP. */
804 if (!slp_instn)
805 return false;
807 if (vect_print_dump_info (REPORT_SLP))
809 fprintf (vect_dump, "Load permutation ");
810 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811 fprintf (vect_dump, "%d ", next);
814 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
816 well. */
817 if (VEC_length (int, load_permutation)
818 != (unsigned int) (group_size * group_size))
819 return false;
821 supported = true;
822 load_index = sbitmap_alloc (group_size);
823 sbitmap_zero (load_index);
824 for (j = 0; j < group_size; j++)
826 for (i = j * group_size, k = 0;
827 VEC_iterate (int, load_permutation, i, next) && k < group_size;
828 i++, k++)
830 if (i != j * group_size && next != prev)
832 supported = false;
833 break;
836 prev = next;
839 if (TEST_BIT (load_index, prev))
841 supported = false;
842 break;
845 SET_BIT (load_index, prev);
848 for (j = 0; j < group_size; j++)
849 if (!TEST_BIT (load_index, j))
850 return false;
852 sbitmap_free (load_index);
854 if (supported && i == group_size * group_size
855 && vect_supported_slp_permutation_p (slp_instn))
856 return true;
858 return false;
862 /* Find the first load in the loop that belongs to INSTANCE.
863 When loads are in several SLP nodes, there can be a case in which the first
864 load does not appear in the first SLP node to be transformed, causing
865 incorrect order of statements. Since we generate all the loads together,
866 they must be inserted before the first load of the SLP instance and not
867 before the first load of the first node of the instance. */
868 static gimple
869 vect_find_first_load_in_slp_instance (slp_instance instance)
871 int i, j;
872 slp_tree load_node;
873 gimple first_load = NULL, load;
875 for (i = 0;
876 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
877 i++)
878 for (j = 0;
879 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
880 j++)
881 first_load = get_earlier_stmt (load, first_load);
883 return first_load;
887 /* Analyze an SLP instance starting from a group of strided stores. Call
888 vect_build_slp_tree to build a tree of packed stmts if possible.
889 Return FALSE if it's impossible to SLP any stmt in the loop. */
891 static bool
892 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
893 gimple stmt)
895 slp_instance new_instance;
896 slp_tree node = XNEW (struct _slp_tree);
897 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
898 unsigned int unrolling_factor = 1, nunits;
899 tree vectype, scalar_type;
900 gimple next;
901 unsigned int vectorization_factor = 0;
902 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
903 unsigned int max_nunits = 0;
904 VEC (int, heap) *load_permutation;
905 VEC (slp_tree, heap) *loads;
907 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
908 vinfo_for_stmt (stmt))));
909 vectype = get_vectype_for_scalar_type (scalar_type);
910 if (!vectype)
912 if (vect_print_dump_info (REPORT_SLP))
914 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
915 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
917 return false;
920 nunits = TYPE_VECTOR_SUBPARTS (vectype);
921 if (loop_vinfo)
922 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
923 else
924 /* No multitypes in BB SLP. */
925 vectorization_factor = nunits;
927 /* Calculate the unrolling factor. */
928 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
929 if (unrolling_factor != 1 && !loop_vinfo)
931 if (vect_print_dump_info (REPORT_SLP))
932 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
933 " block SLP");
935 return false;
938 /* Create a node (a root of the SLP tree) for the packed strided stores. */
939 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
940 next = stmt;
941 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
942 while (next)
944 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
945 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
948 SLP_TREE_VEC_STMTS (node) = NULL;
949 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
950 SLP_TREE_LEFT (node) = NULL;
951 SLP_TREE_RIGHT (node) = NULL;
952 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
953 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
955 /* Calculate the number of vector stmts to create based on the unrolling
956 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
957 GROUP_SIZE / NUNITS otherwise. */
958 ncopies_for_cost = unrolling_factor * group_size / nunits;
960 load_permutation = VEC_alloc (int, heap, group_size * group_size);
961 loads = VEC_alloc (slp_tree, heap, group_size);
963 /* Build the tree for the SLP instance. */
964 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
965 &inside_cost, &outside_cost, ncopies_for_cost,
966 &max_nunits, &load_permutation, &loads,
967 vectorization_factor))
969 /* Create a new SLP instance. */
970 new_instance = XNEW (struct _slp_instance);
971 SLP_INSTANCE_TREE (new_instance) = node;
972 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
973 /* Calculate the unrolling factor based on the smallest type in the
974 loop. */
975 if (max_nunits > nunits)
976 unrolling_factor = least_common_multiple (max_nunits, group_size)
977 / group_size;
979 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
980 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
981 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
982 SLP_INSTANCE_LOADS (new_instance) = loads;
983 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
984 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
985 if (VEC_length (slp_tree, loads))
987 if (!vect_supported_load_permutation_p (new_instance, group_size,
988 load_permutation))
990 if (vect_print_dump_info (REPORT_SLP))
992 fprintf (vect_dump, "Build SLP failed: unsupported load "
993 "permutation ");
994 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
997 vect_free_slp_instance (new_instance);
998 return false;
1001 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1002 = vect_find_first_load_in_slp_instance (new_instance);
1004 else
1005 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1007 if (loop_vinfo)
1008 VEC_safe_push (slp_instance, heap,
1009 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1010 new_instance);
1011 else
1012 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1013 new_instance);
1015 if (vect_print_dump_info (REPORT_SLP))
1016 vect_print_slp_tree (node);
1018 return true;
1021 /* Failed to SLP. */
1022 /* Free the allocated memory. */
1023 vect_free_slp_tree (node);
1024 VEC_free (int, heap, load_permutation);
1025 VEC_free (slp_tree, heap, loads);
1027 return false;
1031 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1032 trees of packed scalar stmts if SLP is possible. */
1034 bool
1035 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1037 unsigned int i;
1038 VEC (gimple, heap) *strided_stores;
1039 gimple store;
1040 bool ok = false;
1042 if (vect_print_dump_info (REPORT_SLP))
1043 fprintf (vect_dump, "=== vect_analyze_slp ===");
1045 if (loop_vinfo)
1046 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1047 else
1048 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1050 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1051 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1052 ok = true;
1054 if (bb_vinfo && !ok)
1056 if (vect_print_dump_info (REPORT_SLP))
1057 fprintf (vect_dump, "Failed to SLP the basic block.");
1059 return false;
1062 return true;
1066 /* For each possible SLP instance decide whether to SLP it and calculate overall
1067 unrolling factor needed to SLP the loop. */
1069 void
1070 vect_make_slp_decision (loop_vec_info loop_vinfo)
1072 unsigned int i, unrolling_factor = 1;
1073 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074 slp_instance instance;
1075 int decided_to_slp = 0;
1077 if (vect_print_dump_info (REPORT_SLP))
1078 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1080 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1082 /* FORNOW: SLP if you can. */
1083 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1084 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1086 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1087 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1088 loop-based vectorization. Such stmts will be marked as HYBRID. */
1089 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1090 decided_to_slp++;
1093 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1095 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1096 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1097 decided_to_slp, unrolling_factor);
1101 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1102 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1104 static void
1105 vect_detect_hybrid_slp_stmts (slp_tree node)
1107 int i;
1108 gimple stmt;
1109 imm_use_iterator imm_iter;
1110 gimple use_stmt;
1111 stmt_vec_info stmt_vinfo;
1113 if (!node)
1114 return;
1116 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1117 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1118 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1119 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1120 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1121 && !STMT_SLP_TYPE (stmt_vinfo)
1122 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1123 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1124 vect_mark_slp_stmts (node, hybrid, i);
1126 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1127 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1131 /* Find stmts that must be both vectorized and SLPed. */
1133 void
1134 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1136 unsigned int i;
1137 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1138 slp_instance instance;
1140 if (vect_print_dump_info (REPORT_SLP))
1141 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1143 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1144 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1148 /* Create and initialize a new bb_vec_info struct for BB, as well as
1149 stmt_vec_info structs for all the stmts in it. */
1151 static bb_vec_info
1152 new_bb_vec_info (basic_block bb)
1154 bb_vec_info res = NULL;
1155 gimple_stmt_iterator gsi;
1157 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1158 BB_VINFO_BB (res) = bb;
1160 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1162 gimple stmt = gsi_stmt (gsi);
1163 gimple_set_uid (stmt, 0);
1164 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1167 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1168 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1170 bb->aux = res;
1171 return res;
1175 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1176 stmts in the basic block. */
1178 static void
1179 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1181 basic_block bb;
1182 gimple_stmt_iterator si;
1184 if (!bb_vinfo)
1185 return;
1187 bb = BB_VINFO_BB (bb_vinfo);
1189 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1191 gimple stmt = gsi_stmt (si);
1192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1194 if (stmt_info)
1195 /* Free stmt_vec_info. */
1196 free_stmt_vec_info (stmt);
1199 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1200 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1201 free (bb_vinfo);
1202 bb->aux = NULL;
1206 /* Analyze statements contained in SLP tree node after recursively analyzing
1207 the subtree. Return TRUE if the operations are supported. */
1209 static bool
1210 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1212 bool dummy;
1213 int i;
1214 gimple stmt;
1216 if (!node)
1217 return true;
1219 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1220 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1221 return false;
1223 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226 gcc_assert (stmt_info);
1227 gcc_assert (PURE_SLP_STMT (stmt_info));
1229 if (!vect_analyze_stmt (stmt, &dummy, node))
1230 return false;
1233 return true;
1237 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1238 operations are supported. */
1240 static bool
1241 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1243 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1244 slp_instance instance;
1245 int i;
1247 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1249 if (!vect_slp_analyze_node_operations (bb_vinfo,
1250 SLP_INSTANCE_TREE (instance)))
1252 vect_free_slp_instance (instance);
1253 VEC_ordered_remove (slp_instance, slp_instances, i);
1255 else
1256 i++;
1259 if (!VEC_length (slp_instance, slp_instances))
1260 return false;
1262 return true;
1266 /* Cheick if the basic block can be vectorized. */
1268 bb_vec_info
1269 vect_slp_analyze_bb (basic_block bb)
1271 bb_vec_info bb_vinfo;
1272 VEC (ddr_p, heap) *ddrs;
1273 VEC (slp_instance, heap) *slp_instances;
1274 slp_instance instance;
1275 int i, insns = 0;
1276 gimple_stmt_iterator gsi;
1278 if (vect_print_dump_info (REPORT_DETAILS))
1279 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1281 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1283 gimple stmt = gsi_stmt (gsi);
1284 if (!is_gimple_debug (stmt)
1285 && !gimple_nop_p (stmt)
1286 && gimple_code (stmt) != GIMPLE_LABEL)
1287 insns++;
1290 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1292 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1293 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1294 "block.\n");
1296 return NULL;
1299 bb_vinfo = new_bb_vec_info (bb);
1300 if (!bb_vinfo)
1301 return NULL;
1303 if (!vect_analyze_data_refs (NULL, bb_vinfo))
1305 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1306 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1307 "block.\n");
1309 destroy_bb_vec_info (bb_vinfo);
1310 return NULL;
1313 ddrs = BB_VINFO_DDRS (bb_vinfo);
1314 if (!VEC_length (ddr_p, ddrs))
1316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1317 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1318 "block.\n");
1320 destroy_bb_vec_info (bb_vinfo);
1321 return NULL;
1324 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1327 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1328 "block.\n");
1330 destroy_bb_vec_info (bb_vinfo);
1331 return NULL;
1334 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1337 fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1338 " block.\n");
1340 destroy_bb_vec_info (bb_vinfo);
1341 return NULL;
1344 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1346 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1347 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1348 "block.\n");
1350 destroy_bb_vec_info (bb_vinfo);
1351 return NULL;
1354 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1356 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1357 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1358 "block.\n");
1360 destroy_bb_vec_info (bb_vinfo);
1361 return NULL;
1364 /* Check the SLP opportunities in the basic block, analyze and build SLP
1365 trees. */
1366 if (!vect_analyze_slp (NULL, bb_vinfo))
1368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1369 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1370 "in basic block.\n");
1372 destroy_bb_vec_info (bb_vinfo);
1373 return NULL;
1376 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1378 /* Mark all the statements that we want to vectorize as pure SLP and
1379 relevant. */
1380 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1382 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1383 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1386 if (!vect_slp_analyze_operations (bb_vinfo))
1388 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1389 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1391 destroy_bb_vec_info (bb_vinfo);
1392 return NULL;
1395 if (vect_print_dump_info (REPORT_DETAILS))
1396 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1398 return bb_vinfo;
1402 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1403 the number of created vector stmts depends on the unrolling factor). However,
1404 the actual number of vector stmts for every SLP node depends on VF which is
1405 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1406 In this function we assume that the inside costs calculated in
1407 vect_model_xxx_cost are linear in ncopies. */
1409 void
1410 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1412 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1413 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1414 slp_instance instance;
1416 if (vect_print_dump_info (REPORT_SLP))
1417 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1419 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1420 /* We assume that costs are linear in ncopies. */
1421 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1422 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1426 /* For constant and loop invariant defs of SLP_NODE this function returns
1427 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1428 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1429 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1431 static void
1432 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1433 unsigned int op_num, unsigned int number_of_vectors)
1435 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1436 gimple stmt = VEC_index (gimple, stmts, 0);
1437 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1438 int nunits;
1439 tree vec_cst;
1440 tree t = NULL_TREE;
1441 int j, number_of_places_left_in_vector;
1442 tree vector_type;
1443 tree op, vop;
1444 int group_size = VEC_length (gimple, stmts);
1445 unsigned int vec_num, i;
1446 int number_of_copies = 1;
1447 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1448 bool constant_p, is_store;
1450 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1452 is_store = true;
1453 op = gimple_assign_rhs1 (stmt);
1455 else
1457 is_store = false;
1458 op = gimple_op (stmt, op_num + 1);
1461 if (CONSTANT_CLASS_P (op))
1462 constant_p = true;
1463 else
1464 constant_p = false;
1466 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1467 gcc_assert (vector_type);
1469 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1471 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1472 created vectors. It is greater than 1 if unrolling is performed.
1474 For example, we have two scalar operands, s1 and s2 (e.g., group of
1475 strided accesses of size two), while NUNITS is four (i.e., four scalars
1476 of this type can be packed in a vector). The output vector will contain
1477 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1478 will be 2).
1480 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1481 containing the operands.
1483 For example, NUNITS is four as before, and the group size is 8
1484 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1485 {s5, s6, s7, s8}. */
1487 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1489 number_of_places_left_in_vector = nunits;
1490 for (j = 0; j < number_of_copies; j++)
1492 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1494 if (is_store)
1495 op = gimple_assign_rhs1 (stmt);
1496 else
1497 op = gimple_op (stmt, op_num + 1);
1499 /* Create 'vect_ = {op0,op1,...,opn}'. */
1500 t = tree_cons (NULL_TREE, op, t);
1502 number_of_places_left_in_vector--;
1504 if (number_of_places_left_in_vector == 0)
1506 number_of_places_left_in_vector = nunits;
1508 if (constant_p)
1509 vec_cst = build_vector (vector_type, t);
1510 else
1511 vec_cst = build_constructor_from_list (vector_type, t);
1512 VEC_quick_push (tree, voprnds,
1513 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1514 t = NULL_TREE;
1519 /* Since the vectors are created in the reverse order, we should invert
1520 them. */
1521 vec_num = VEC_length (tree, voprnds);
1522 for (j = vec_num - 1; j >= 0; j--)
1524 vop = VEC_index (tree, voprnds, j);
1525 VEC_quick_push (tree, *vec_oprnds, vop);
1528 VEC_free (tree, heap, voprnds);
1530 /* In case that VF is greater than the unrolling factor needed for the SLP
1531 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1532 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1533 to replicate the vectors. */
1534 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1536 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1537 VEC_quick_push (tree, *vec_oprnds, vop);
1542 /* Get vectorized definitions from SLP_NODE that contains corresponding
1543 vectorized def-stmts. */
1545 static void
1546 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1548 tree vec_oprnd;
1549 gimple vec_def_stmt;
1550 unsigned int i;
1552 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1554 for (i = 0;
1555 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1556 i++)
1558 gcc_assert (vec_def_stmt);
1559 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1560 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1565 /* Get vectorized definitions for SLP_NODE.
1566 If the scalar definitions are loop invariants or constants, collect them and
1567 call vect_get_constant_vectors() to create vector stmts.
1568 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1569 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1570 vect_get_slp_vect_defs() to retrieve them.
1571 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1572 the right node. This is used when the second operand must remain scalar. */
1574 void
1575 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1576 VEC (tree,heap) **vec_oprnds1)
1578 gimple first_stmt;
1579 enum tree_code code;
1580 int number_of_vects;
1581 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1583 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1584 /* The number of vector defs is determined by the number of vector statements
1585 in the node from which we get those statements. */
1586 if (SLP_TREE_LEFT (slp_node))
1587 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1588 else
1590 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1591 /* Number of vector stmts was calculated according to LHS in
1592 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1593 necessary. See vect_get_smallest_scalar_type() for details. */
1594 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1595 &rhs_size_unit);
1596 if (rhs_size_unit != lhs_size_unit)
1598 number_of_vects *= rhs_size_unit;
1599 number_of_vects /= lhs_size_unit;
1603 /* Allocate memory for vectorized defs. */
1604 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1606 /* SLP_NODE corresponds either to a group of stores or to a group of
1607 unary/binary operations. We don't call this function for loads. */
1608 if (SLP_TREE_LEFT (slp_node))
1609 /* The defs are already vectorized. */
1610 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1611 else
1612 /* Build vectors from scalar defs. */
1613 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1615 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1616 /* Since we don't call this function with loads, this is a group of
1617 stores. */
1618 return;
1620 code = gimple_assign_rhs_code (first_stmt);
1621 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1622 return;
1624 /* The number of vector defs is determined by the number of vector statements
1625 in the node from which we get those statements. */
1626 if (SLP_TREE_RIGHT (slp_node))
1627 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1628 else
1629 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1631 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1633 if (SLP_TREE_RIGHT (slp_node))
1634 /* The defs are already vectorized. */
1635 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1636 else
1637 /* Build vectors from scalar defs. */
1638 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1642 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1643 building a vector of type MASK_TYPE from it) and two input vectors placed in
1644 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1645 shifting by STRIDE elements of DR_CHAIN for every copy.
1646 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1647 copies).
1648 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1649 the created stmts must be inserted. */
1651 static inline void
1652 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1653 tree mask, int first_vec_indx, int second_vec_indx,
1654 gimple_stmt_iterator *gsi, slp_tree node,
1655 tree builtin_decl, tree vectype,
1656 VEC(tree,heap) *dr_chain,
1657 int ncopies, int vect_stmts_counter)
1659 tree perm_dest;
1660 gimple perm_stmt = NULL;
1661 stmt_vec_info next_stmt_info;
1662 int i, stride;
1663 tree first_vec, second_vec, data_ref;
1664 VEC (tree, heap) *params = NULL;
1666 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1668 /* Initialize the vect stmts of NODE to properly insert the generated
1669 stmts later. */
1670 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1671 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1672 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1674 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1675 for (i = 0; i < ncopies; i++)
1677 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1678 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1680 /* Build argument list for the vectorized call. */
1681 VEC_free (tree, heap, params);
1682 params = VEC_alloc (tree, heap, 3);
1683 VEC_quick_push (tree, params, first_vec);
1684 VEC_quick_push (tree, params, second_vec);
1685 VEC_quick_push (tree, params, mask);
1687 /* Generate the permute statement. */
1688 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1689 data_ref = make_ssa_name (perm_dest, perm_stmt);
1690 gimple_call_set_lhs (perm_stmt, data_ref);
1691 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1693 /* Store the vector statement in NODE. */
1694 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1695 stride * i + vect_stmts_counter, perm_stmt);
1697 first_vec_indx += stride;
1698 second_vec_indx += stride;
1701 /* Mark the scalar stmt as vectorized. */
1702 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1703 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1707 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1708 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1709 representation. Check that the mask is valid and return FALSE if not.
1710 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1711 the next vector, i.e., the current first vector is not needed. */
1713 static bool
1714 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1715 int mask_nunits, bool only_one_vec, int index,
1716 int *mask, int *current_mask_element,
1717 bool *need_next_vector)
1719 int i;
1720 static int number_of_mask_fixes = 1;
1721 static bool mask_fixed = false;
1722 static bool needs_first_vector = false;
1724 /* Convert to target specific representation. */
1725 *current_mask_element = first_mask_element + m;
1726 /* Adjust the value in case it's a mask for second and third vectors. */
1727 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1729 if (*current_mask_element < mask_nunits)
1730 needs_first_vector = true;
1732 /* We have only one input vector to permute but the mask accesses values in
1733 the next vector as well. */
1734 if (only_one_vec && *current_mask_element >= mask_nunits)
1736 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "permutation requires at least two vectors ");
1739 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1742 return false;
1745 /* The mask requires the next vector. */
1746 if (*current_mask_element >= mask_nunits * 2)
1748 if (needs_first_vector || mask_fixed)
1750 /* We either need the first vector too or have already moved to the
1751 next vector. In both cases, this permutation needs three
1752 vectors. */
1753 if (vect_print_dump_info (REPORT_DETAILS))
1755 fprintf (vect_dump, "permutation requires at "
1756 "least three vectors ");
1757 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1760 return false;
1763 /* We move to the next vector, dropping the first one and working with
1764 the second and the third - we need to adjust the values of the mask
1765 accordingly. */
1766 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1768 for (i = 0; i < index; i++)
1769 mask[i] -= mask_nunits * number_of_mask_fixes;
1771 (number_of_mask_fixes)++;
1772 mask_fixed = true;
1775 *need_next_vector = mask_fixed;
1777 /* This was the last element of this mask. Start a new one. */
1778 if (index == mask_nunits - 1)
1780 number_of_mask_fixes = 1;
1781 mask_fixed = false;
1782 needs_first_vector = false;
1785 return true;
1789 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1790 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1791 permute statements for SLP_NODE_INSTANCE. */
1792 bool
1793 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1794 gimple_stmt_iterator *gsi, int vf,
1795 slp_instance slp_node_instance, bool analyze_only)
1797 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1798 tree mask_element_type = NULL_TREE, mask_type;
1799 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1800 slp_tree node;
1801 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1802 gimple next_scalar_stmt;
1803 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1804 int first_mask_element;
1805 int index, unroll_factor, *mask, current_mask_element, ncopies;
1806 bool only_one_vec = false, need_next_vector = false;
1807 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1809 if (!targetm.vectorize.builtin_vec_perm)
1811 if (vect_print_dump_info (REPORT_DETAILS))
1813 fprintf (vect_dump, "no builtin for vect permute for ");
1814 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1817 return false;
1820 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1821 &mask_element_type);
1822 if (!builtin_decl || !mask_element_type)
1824 if (vect_print_dump_info (REPORT_DETAILS))
1826 fprintf (vect_dump, "no builtin for vect permute for ");
1827 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1830 return false;
1833 mask_type = get_vectype_for_scalar_type (mask_element_type);
1834 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1835 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1836 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1837 scale = mask_nunits / nunits;
1838 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1840 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1841 unrolling factor. */
1842 orig_vec_stmts_num = group_size *
1843 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1844 if (orig_vec_stmts_num == 1)
1845 only_one_vec = true;
1847 /* Number of copies is determined by the final vectorization factor
1848 relatively to SLP_NODE_INSTANCE unrolling factor. */
1849 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1851 /* Generate permutation masks for every NODE. Number of masks for each NODE
1852 is equal to GROUP_SIZE.
1853 E.g., we have a group of three nodes with three loads from the same
1854 location in each node, and the vector size is 4. I.e., we have a
1855 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1856 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1857 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1860 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1861 scpecific type, e.g., in bytes for Altivec.
1862 The last mask is illegal since we assume two operands for permute
1863 operation, and the mask element values can't be outside that range. Hence,
1864 the last mask must be converted into {2,5,5,5}.
1865 For the first two permutations we need the first and the second input
1866 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1867 we need the second and the third vectors: {b1,c1,a2,b2} and
1868 {c2,a3,b3,c3}. */
1870 for (i = 0;
1871 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1872 i, node);
1873 i++)
1875 scalar_index = 0;
1876 index = 0;
1877 vect_stmts_counter = 0;
1878 vec_index = 0;
1879 first_vec_index = vec_index++;
1880 if (only_one_vec)
1881 second_vec_index = first_vec_index;
1882 else
1883 second_vec_index = vec_index++;
1885 for (j = 0; j < unroll_factor; j++)
1887 for (k = 0; k < group_size; k++)
1889 first_mask_element = (i + j * group_size) * scale;
1890 for (m = 0; m < scale; m++)
1892 if (!vect_get_mask_element (stmt, first_mask_element, m,
1893 mask_nunits, only_one_vec, index, mask,
1894 &current_mask_element, &need_next_vector))
1895 return false;
1897 mask[index++] = current_mask_element;
1900 if (index == mask_nunits)
1902 tree mask_vec = NULL;
1904 while (--index >= 0)
1906 tree t = build_int_cst (mask_element_type, mask[index]);
1907 mask_vec = tree_cons (NULL, t, mask_vec);
1909 mask_vec = build_vector (mask_type, mask_vec);
1910 index = 0;
1912 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1913 mask_vec))
1915 if (vect_print_dump_info (REPORT_DETAILS))
1917 fprintf (vect_dump, "unsupported vect permute ");
1918 print_generic_expr (vect_dump, mask_vec, 0);
1920 free (mask);
1921 return false;
1924 if (!analyze_only)
1926 if (need_next_vector)
1928 first_vec_index = second_vec_index;
1929 second_vec_index = vec_index;
1932 next_scalar_stmt = VEC_index (gimple,
1933 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1935 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1936 mask_vec, first_vec_index, second_vec_index,
1937 gsi, node, builtin_decl, vectype, dr_chain,
1938 ncopies, vect_stmts_counter++);
1945 free (mask);
1946 return true;
1951 /* Vectorize SLP instance tree in postorder. */
1953 static bool
1954 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1955 unsigned int vectorization_factor)
1957 gimple stmt;
1958 bool strided_store, is_store;
1959 gimple_stmt_iterator si;
1960 stmt_vec_info stmt_info;
1961 unsigned int vec_stmts_size, nunits, group_size;
1962 tree vectype;
1963 int i;
1964 slp_tree loads_node;
1966 if (!node)
1967 return false;
1969 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1970 vectorization_factor);
1971 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1972 vectorization_factor);
1974 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1975 stmt_info = vinfo_for_stmt (stmt);
1977 /* VECTYPE is the type of the destination. */
1978 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1979 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1980 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1982 /* For each SLP instance calculate number of vector stmts to be created
1983 for the scalar stmts in each node of the SLP tree. Number of vector
1984 elements in one vector iteration is the number of scalar elements in
1985 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1986 size. */
1987 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1989 /* In case of load permutation we have to allocate vectorized statements for
1990 all the nodes that participate in that permutation. */
1991 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1993 for (i = 0;
1994 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1995 i++)
1997 if (!SLP_TREE_VEC_STMTS (loads_node))
1999 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2000 vec_stmts_size);
2001 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2006 if (!SLP_TREE_VEC_STMTS (node))
2008 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2009 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2012 if (vect_print_dump_info (REPORT_DETAILS))
2014 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2015 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2018 /* Loads should be inserted before the first load. */
2019 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2020 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2021 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2022 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2023 else
2024 si = gsi_for_stmt (stmt);
2026 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2027 if (is_store)
2029 if (DR_GROUP_FIRST_DR (stmt_info))
2030 /* If IS_STORE is TRUE, the vectorization of the
2031 interleaving chain was completed - free all the stores in
2032 the chain. */
2033 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2034 else
2035 /* FORNOW: SLP originates only from strided stores. */
2036 gcc_unreachable ();
2038 return true;
2041 /* FORNOW: SLP originates only from strided stores. */
2042 return false;
2046 bool
2047 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2049 VEC (slp_instance, heap) *slp_instances;
2050 slp_instance instance;
2051 unsigned int i, vf;
2052 bool is_store = false;
2054 if (loop_vinfo)
2056 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2057 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2059 else
2061 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2062 vf = 1;
2065 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2067 /* Schedule the tree of INSTANCE. */
2068 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2069 instance, vf);
2070 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2071 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2072 fprintf (vect_dump, "vectorizing stmts using SLP.");
2075 return is_store;
2079 /* Vectorize the basic block. */
2081 void
2082 vect_slp_transform_bb (basic_block bb)
2084 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2085 gimple_stmt_iterator si;
2087 gcc_assert (bb_vinfo);
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "SLPing BB\n");
2092 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2094 gimple stmt = gsi_stmt (si);
2095 stmt_vec_info stmt_info;
2097 if (vect_print_dump_info (REPORT_DETAILS))
2099 fprintf (vect_dump, "------>SLPing statement: ");
2100 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2103 stmt_info = vinfo_for_stmt (stmt);
2104 gcc_assert (stmt_info);
2106 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2107 if (STMT_SLP_TYPE (stmt_info))
2109 vect_schedule_slp (NULL, bb_vinfo);
2110 break;
2114 mark_sym_for_renaming (gimple_vop (cfun));
2115 /* The memory tags and pointers in vectorized statements need to
2116 have their SSA forms updated. FIXME, why can't this be delayed
2117 until all the loops have been transformed? */
2118 update_ssa (TODO_update_ssa);
2120 if (vect_print_dump_info (REPORT_DETAILS))
2121 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2123 destroy_bb_vec_info (bb_vinfo);