Merged r158704 through r158906 into branch.
[official-gcc.git] / gcc / tree-vect-slp.c
blobf313294bd2972b14debfbb7c47f431f9cfd9751f
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
44 LOC
45 find_bb_location (basic_block bb)
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
50 if (!bb)
51 return UNKNOWN_LOC;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
60 return UNKNOWN_LOC;
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66 static void
67 vect_free_slp_tree (slp_tree node)
69 if (!node)
70 return;
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83 free (node);
87 /* Free the memory allocated for the SLP instance. */
89 void
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136 &dt[i])
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
145 return false;
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
150 pattern. */
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
171 return false;
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
185 switch (gimple_code (def_stmt))
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
220 else
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
236 "oprnds in stmt");
237 return false;
239 *first_stmt_const_oprnd = oprnd;
242 else
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
246 if ((i == 0
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
250 TREE_TYPE (def)))))
251 || (i == 1
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
255 TREE_TYPE (def)))))
256 || (!def
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258 TREE_TYPE (oprnd))))
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
263 return false;
268 /* Check the types of the definitions. */
269 switch (dt[i])
271 case vect_constant_def:
272 case vect_external_def:
273 break;
275 case vect_internal_def:
276 case vect_reduction_def:
277 if (i == 0)
278 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 else
280 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
281 break;
283 default:
284 /* FORNOW: Not supported. */
285 if (vect_print_dump_info (REPORT_SLP))
287 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
288 print_generic_expr (vect_dump, def, TDF_SLIM);
291 return false;
295 return true;
299 /* Recursively build an SLP tree starting from NODE.
300 Fail (and return FALSE) if def-stmts are not isomorphic, require data
301 permutation or are of unsupported types of operation. Otherwise, return
302 TRUE. */
304 static bool
305 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
306 slp_tree *node, unsigned int group_size,
307 int *inside_cost, int *outside_cost,
308 int ncopies_for_cost, unsigned int *max_nunits,
309 VEC (int, heap) **load_permutation,
310 VEC (slp_tree, heap) **loads,
311 unsigned int vectorization_factor)
313 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
314 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 unsigned int i;
316 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
317 gimple stmt = VEC_index (gimple, stmts, 0);
318 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
319 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
320 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
321 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 tree lhs;
323 bool stop_recursion = false, need_same_oprnds = false;
324 tree vectype, scalar_type, first_op1 = NULL_TREE;
325 unsigned int ncopies;
326 optab optab;
327 int icode;
328 enum machine_mode optab_op2_mode;
329 enum machine_mode vec_mode;
330 tree first_stmt_const_oprnd = NULL_TREE;
331 struct data_reference *first_dr;
332 bool pattern0 = false, pattern1 = false;
333 HOST_WIDE_INT dummy;
334 bool permutation = false;
335 unsigned int load_place;
336 gimple first_load, prev_first_load = NULL;
338 /* For every stmt in NODE find its def stmt/s. */
339 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
341 if (vect_print_dump_info (REPORT_SLP))
343 fprintf (vect_dump, "Build SLP for ");
344 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
347 /* Fail to vectorize statements marked as unvectorizable. */
348 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
350 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump,
353 "Build SLP failed: unvectorizable statement ");
354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
357 return false;
360 lhs = gimple_get_lhs (stmt);
361 if (lhs == NULL_TREE)
363 if (vect_print_dump_info (REPORT_SLP))
365 fprintf (vect_dump,
366 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
367 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
370 return false;
373 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
374 vectype = get_vectype_for_scalar_type (scalar_type);
375 if (!vectype)
377 if (vect_print_dump_info (REPORT_SLP))
379 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
380 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
382 return false;
385 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
386 if (ncopies != 1)
388 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "SLP with multiple types ");
391 /* FORNOW: multiple types are unsupported in BB SLP. */
392 if (bb_vinfo)
393 return false;
396 /* In case of multiple types we need to detect the smallest type. */
397 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
398 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
400 if (is_gimple_call (stmt))
401 rhs_code = CALL_EXPR;
402 else
403 rhs_code = gimple_assign_rhs_code (stmt);
405 /* Check the operation. */
406 if (i == 0)
408 first_stmt_code = rhs_code;
410 /* Shift arguments should be equal in all the packed stmts for a
411 vector shift with scalar shift operand. */
412 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
413 || rhs_code == LROTATE_EXPR
414 || rhs_code == RROTATE_EXPR)
416 vec_mode = TYPE_MODE (vectype);
418 /* First see if we have a vector/vector shift. */
419 optab = optab_for_tree_code (rhs_code, vectype,
420 optab_vector);
422 if (!optab
423 || (optab->handlers[(int) vec_mode].insn_code
424 == CODE_FOR_nothing))
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab = optab_for_tree_code (rhs_code, vectype,
428 optab_scalar);
430 if (!optab)
432 if (vect_print_dump_info (REPORT_SLP))
433 fprintf (vect_dump, "Build SLP failed: no optab.");
434 return false;
436 icode = (int) optab->handlers[(int) vec_mode].insn_code;
437 if (icode == CODE_FOR_nothing)
439 if (vect_print_dump_info (REPORT_SLP))
440 fprintf (vect_dump, "Build SLP failed: "
441 "op not supported by target.");
442 return false;
444 optab_op2_mode = insn_data[icode].operand[2].mode;
445 if (!VECTOR_MODE_P (optab_op2_mode))
447 need_same_oprnds = true;
448 first_op1 = gimple_assign_rhs2 (stmt);
453 else
455 if (first_stmt_code != rhs_code
456 && (first_stmt_code != IMAGPART_EXPR
457 || rhs_code != REALPART_EXPR)
458 && (first_stmt_code != REALPART_EXPR
459 || rhs_code != IMAGPART_EXPR))
461 if (vect_print_dump_info (REPORT_SLP))
463 fprintf (vect_dump,
464 "Build SLP failed: different operation in stmt ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
468 return false;
471 if (need_same_oprnds
472 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
474 if (vect_print_dump_info (REPORT_SLP))
476 fprintf (vect_dump,
477 "Build SLP failed: different shift arguments in ");
478 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
481 return false;
485 /* Strided store or load. */
486 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
488 if (REFERENCE_CLASS_P (lhs))
490 /* Store. */
491 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
492 stmt, &def_stmts0, &def_stmts1,
493 &first_stmt_dt0,
494 &first_stmt_dt1,
495 &first_stmt_def0_type,
496 &first_stmt_def1_type,
497 &first_stmt_const_oprnd,
498 ncopies_for_cost,
499 &pattern0, &pattern1))
500 return false;
502 else
504 /* Load. */
505 /* FORNOW: Check that there is no gap between the loads. */
506 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
507 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
508 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
511 if (vect_print_dump_info (REPORT_SLP))
513 fprintf (vect_dump, "Build SLP failed: strided "
514 "loads have gaps ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518 return false;
521 /* Check that the size of interleaved loads group is not
522 greater than the SLP group size. */
523 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
525 if (vect_print_dump_info (REPORT_SLP))
527 fprintf (vect_dump, "Build SLP failed: the number of "
528 "interleaved loads is greater than"
529 " the SLP group size ");
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
533 return false;
536 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
537 if (prev_first_load)
539 /* Check that there are no loads from different interleaving
540 chains in the same node. The only exception is complex
541 numbers. */
542 if (prev_first_load != first_load
543 && rhs_code != REALPART_EXPR
544 && rhs_code != IMAGPART_EXPR)
546 if (vect_print_dump_info (REPORT_SLP))
548 fprintf (vect_dump, "Build SLP failed: different "
549 "interleaving chains in one node ");
550 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
553 return false;
556 else
557 prev_first_load = first_load;
559 if (first_load == stmt)
561 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
562 if (vect_supportable_dr_alignment (first_dr)
563 == dr_unaligned_unsupported)
565 if (vect_print_dump_info (REPORT_SLP))
567 fprintf (vect_dump, "Build SLP failed: unsupported "
568 "unaligned load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 return false;
575 /* Analyze costs (for the first stmt in the group). */
576 vect_model_load_cost (vinfo_for_stmt (stmt),
577 ncopies_for_cost, *node);
580 /* Store the place of this load in the interleaving chain. In
581 case that permutation is needed we later decide if a specific
582 permutation is supported. */
583 load_place = vect_get_place_in_interleaving_chain (stmt,
584 first_load);
585 if (load_place != i)
586 permutation = true;
588 VEC_safe_push (int, heap, *load_permutation, load_place);
590 /* We stop the tree when we reach a group of loads. */
591 stop_recursion = true;
592 continue;
594 } /* Strided access. */
595 else
597 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
599 /* Not strided load. */
600 if (vect_print_dump_info (REPORT_SLP))
602 fprintf (vect_dump, "Build SLP failed: not strided load ");
603 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
606 /* FORNOW: Not strided loads are not supported. */
607 return false;
610 /* Not memory operation. */
611 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
612 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
614 if (vect_print_dump_info (REPORT_SLP))
616 fprintf (vect_dump, "Build SLP failed: operation");
617 fprintf (vect_dump, " unsupported ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 return false;
624 /* Find the def-stmts. */
625 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
626 &def_stmts0, &def_stmts1,
627 &first_stmt_dt0, &first_stmt_dt1,
628 &first_stmt_def0_type,
629 &first_stmt_def1_type,
630 &first_stmt_const_oprnd,
631 ncopies_for_cost,
632 &pattern0, &pattern1))
633 return false;
637 /* Add the costs of the node to the overall instance costs. */
638 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
639 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
641 /* Strided loads were reached - stop the recursion. */
642 if (stop_recursion)
644 if (permutation)
646 VEC_safe_push (slp_tree, heap, *loads, *node);
647 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
650 return true;
653 /* Create SLP_TREE nodes for the definition node/s. */
654 if (first_stmt_dt0 == vect_internal_def)
656 slp_tree left_node = XNEW (struct _slp_tree);
657 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
658 SLP_TREE_VEC_STMTS (left_node) = NULL;
659 SLP_TREE_LEFT (left_node) = NULL;
660 SLP_TREE_RIGHT (left_node) = NULL;
661 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
662 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
663 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
664 inside_cost, outside_cost, ncopies_for_cost,
665 max_nunits, load_permutation, loads,
666 vectorization_factor))
667 return false;
669 SLP_TREE_LEFT (*node) = left_node;
672 if (first_stmt_dt1 == vect_internal_def)
674 slp_tree right_node = XNEW (struct _slp_tree);
675 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
676 SLP_TREE_VEC_STMTS (right_node) = NULL;
677 SLP_TREE_LEFT (right_node) = NULL;
678 SLP_TREE_RIGHT (right_node) = NULL;
679 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
680 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
681 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
682 inside_cost, outside_cost, ncopies_for_cost,
683 max_nunits, load_permutation, loads,
684 vectorization_factor))
685 return false;
687 SLP_TREE_RIGHT (*node) = right_node;
690 return true;
694 static void
695 vect_print_slp_tree (slp_tree node)
697 int i;
698 gimple stmt;
700 if (!node)
701 return;
703 fprintf (vect_dump, "node ");
704 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
706 fprintf (vect_dump, "\n\tstmt %d ", i);
707 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
709 fprintf (vect_dump, "\n");
711 vect_print_slp_tree (SLP_TREE_LEFT (node));
712 vect_print_slp_tree (SLP_TREE_RIGHT (node));
716 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
717 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
718 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
719 stmts in NODE are to be marked. */
721 static void
722 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
724 int i;
725 gimple stmt;
727 if (!node)
728 return;
730 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
731 if (j < 0 || i == j)
732 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
734 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
735 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
739 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
741 static void
742 vect_mark_slp_stmts_relevant (slp_tree node)
744 int i;
745 gimple stmt;
746 stmt_vec_info stmt_info;
748 if (!node)
749 return;
751 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
753 stmt_info = vinfo_for_stmt (stmt);
754 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
755 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
756 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
759 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
760 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
764 /* Check if the permutation required by the SLP INSTANCE is supported.
765 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
767 static bool
768 vect_supported_slp_permutation_p (slp_instance instance)
770 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
771 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
772 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
773 VEC (slp_tree, heap) *sorted_loads = NULL;
774 int index;
775 slp_tree *tmp_loads = NULL;
776 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
777 slp_tree load;
779 /* FORNOW: The only supported loads permutation is loads from the same
780 location in all the loads in the node, when the data-refs in
781 nodes of LOADS constitute an interleaving chain.
782 Sort the nodes according to the order of accesses in the chain. */
783 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
784 for (i = 0, j = 0;
785 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
786 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
787 i += group_size, j++)
789 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
790 /* Check that the loads are all in the same interleaving chain. */
791 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
793 if (vect_print_dump_info (REPORT_DETAILS))
795 fprintf (vect_dump, "Build SLP failed: unsupported data "
796 "permutation ");
797 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
800 free (tmp_loads);
801 return false;
804 tmp_loads[index] = load;
807 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
808 for (i = 0; i < group_size; i++)
809 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
811 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
812 SLP_INSTANCE_LOADS (instance) = sorted_loads;
813 free (tmp_loads);
815 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
816 SLP_INSTANCE_UNROLLING_FACTOR (instance),
817 instance, true))
818 return false;
820 return true;
824 /* Rearrange the statements of NODE according to PERMUTATION. */
826 static void
827 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
828 VEC (int, heap) *permutation)
830 gimple stmt;
831 VEC (gimple, heap) *tmp_stmts;
832 unsigned int index, i;
834 if (!node)
835 return;
837 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
838 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
840 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
841 tmp_stmts = VEC_alloc (gimple, heap, group_size);
843 for (i = 0; i < group_size; i++)
844 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
846 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
848 index = VEC_index (int, permutation, i);
849 VEC_replace (gimple, tmp_stmts, index, stmt);
852 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
853 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
857 /* Check if the required load permutation is supported.
858 LOAD_PERMUTATION contains a list of indices of the loads.
859 In SLP this permutation is relative to the order of strided stores that are
860 the base of the SLP instance. */
862 static bool
863 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
864 VEC (int, heap) *load_permutation)
866 int i = 0, j, prev = -1, next, k, number_of_groups;
867 bool supported, bad_permutation = false;
868 sbitmap load_index;
869 slp_tree node;
870 gimple stmt;
872 /* FORNOW: permutations are only supported in SLP. */
873 if (!slp_instn)
874 return false;
876 if (vect_print_dump_info (REPORT_SLP))
878 fprintf (vect_dump, "Load permutation ");
879 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
880 fprintf (vect_dump, "%d ", next);
883 /* In case of reduction every load permutation is allowed, since the order
884 of the reduction statements is not important (as opposed to the case of
885 strided stores). The only condition we need to check is that all the
886 load nodes are of the same size and have the same permutation (and then
887 rearrange all the nodes of the SLP instance according to this
888 permutation). */
890 /* Check that all the load nodes are of the same size. */
891 for (i = 0;
892 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
893 i++)
894 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
895 != (unsigned) group_size)
896 return false;
898 node = SLP_INSTANCE_TREE (slp_instn);
899 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
900 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
901 instance, not all the loads belong to the same node or interleaving
902 group. Hence, we need to divide them into groups according to
903 GROUP_SIZE. */
904 number_of_groups = VEC_length (int, load_permutation) / group_size;
906 /* Reduction (there are no data-refs in the root). */
907 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
909 int first_group_load_index;
911 /* Compare all the permutation sequences to the first one. */
912 for (i = 1; i < number_of_groups; i++)
914 k = 0;
915 for (j = i * group_size; j < i * group_size + group_size; j++)
917 next = VEC_index (int, load_permutation, j);
918 first_group_load_index = VEC_index (int, load_permutation, k);
920 if (next != first_group_load_index)
922 bad_permutation = true;
923 break;
926 k++;
929 if (bad_permutation)
930 break;
933 if (!bad_permutation)
935 /* This permutaion is valid for reduction. Since the order of the
936 statements in the nodes is not important unless they are memory
937 accesses, we can rearrange the statements in all the nodes
938 according to the order of the loads. */
939 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
940 load_permutation);
941 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
942 return true;
946 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
947 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
948 well (unless it's reduction). */
949 if (VEC_length (int, load_permutation)
950 != (unsigned int) (group_size * group_size))
951 return false;
953 supported = true;
954 load_index = sbitmap_alloc (group_size);
955 sbitmap_zero (load_index);
956 for (j = 0; j < group_size; j++)
958 for (i = j * group_size, k = 0;
959 VEC_iterate (int, load_permutation, i, next) && k < group_size;
960 i++, k++)
962 if (i != j * group_size && next != prev)
964 supported = false;
965 break;
968 prev = next;
971 if (TEST_BIT (load_index, prev))
973 supported = false;
974 break;
977 SET_BIT (load_index, prev);
980 for (j = 0; j < group_size; j++)
981 if (!TEST_BIT (load_index, j))
982 return false;
984 sbitmap_free (load_index);
986 if (supported && i == group_size * group_size
987 && vect_supported_slp_permutation_p (slp_instn))
988 return true;
990 return false;
994 /* Find the first load in the loop that belongs to INSTANCE.
995 When loads are in several SLP nodes, there can be a case in which the first
996 load does not appear in the first SLP node to be transformed, causing
997 incorrect order of statements. Since we generate all the loads together,
998 they must be inserted before the first load of the SLP instance and not
999 before the first load of the first node of the instance. */
1000 static gimple
1001 vect_find_first_load_in_slp_instance (slp_instance instance)
1003 int i, j;
1004 slp_tree load_node;
1005 gimple first_load = NULL, load;
1007 for (i = 0;
1008 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1009 i++)
1010 for (j = 0;
1011 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1012 j++)
1013 first_load = get_earlier_stmt (load, first_load);
1015 return first_load;
1019 /* Analyze an SLP instance starting from a group of strided stores. Call
1020 vect_build_slp_tree to build a tree of packed stmts if possible.
1021 Return FALSE if it's impossible to SLP any stmt in the loop. */
1023 static bool
1024 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1025 gimple stmt)
1027 slp_instance new_instance;
1028 slp_tree node = XNEW (struct _slp_tree);
1029 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1030 unsigned int unrolling_factor = 1, nunits;
1031 tree vectype, scalar_type = NULL_TREE;
1032 gimple next;
1033 unsigned int vectorization_factor = 0;
1034 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1035 unsigned int max_nunits = 0;
1036 VEC (int, heap) *load_permutation;
1037 VEC (slp_tree, heap) *loads;
1038 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1040 if (dr)
1042 scalar_type = TREE_TYPE (DR_REF (dr));
1043 vectype = get_vectype_for_scalar_type (scalar_type);
1044 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1046 else
1048 gcc_assert (loop_vinfo);
1049 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1050 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1053 if (!vectype)
1055 if (vect_print_dump_info (REPORT_SLP))
1057 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1058 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1061 return false;
1064 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1065 if (loop_vinfo)
1066 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1067 else
1068 /* No multitypes in BB SLP. */
1069 vectorization_factor = nunits;
1071 /* Calculate the unrolling factor. */
1072 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1073 if (unrolling_factor != 1 && !loop_vinfo)
1075 if (vect_print_dump_info (REPORT_SLP))
1076 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1077 " block SLP");
1079 return false;
1082 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1083 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1084 next = stmt;
1085 if (dr)
1087 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1088 while (next)
1090 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1091 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1094 else
1096 /* Collect reduction statements. */
1097 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1098 next);
1099 i++)
1101 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1102 if (vect_print_dump_info (REPORT_DETAILS))
1104 fprintf (vect_dump, "pushing reduction into node: ");
1105 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1110 SLP_TREE_VEC_STMTS (node) = NULL;
1111 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1112 SLP_TREE_LEFT (node) = NULL;
1113 SLP_TREE_RIGHT (node) = NULL;
1114 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1115 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1117 /* Calculate the number of vector stmts to create based on the unrolling
1118 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1119 GROUP_SIZE / NUNITS otherwise. */
1120 ncopies_for_cost = unrolling_factor * group_size / nunits;
1122 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1123 loads = VEC_alloc (slp_tree, heap, group_size);
1125 /* Build the tree for the SLP instance. */
1126 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1127 &inside_cost, &outside_cost, ncopies_for_cost,
1128 &max_nunits, &load_permutation, &loads,
1129 vectorization_factor))
1131 /* Create a new SLP instance. */
1132 new_instance = XNEW (struct _slp_instance);
1133 SLP_INSTANCE_TREE (new_instance) = node;
1134 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1135 /* Calculate the unrolling factor based on the smallest type in the
1136 loop. */
1137 if (max_nunits > nunits)
1138 unrolling_factor = least_common_multiple (max_nunits, group_size)
1139 / group_size;
1141 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1142 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1143 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1144 SLP_INSTANCE_LOADS (new_instance) = loads;
1145 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1146 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1147 if (VEC_length (slp_tree, loads))
1149 if (!vect_supported_load_permutation_p (new_instance, group_size,
1150 load_permutation))
1152 if (vect_print_dump_info (REPORT_SLP))
1154 fprintf (vect_dump, "Build SLP failed: unsupported load "
1155 "permutation ");
1156 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1159 vect_free_slp_instance (new_instance);
1160 return false;
1163 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1164 = vect_find_first_load_in_slp_instance (new_instance);
1166 else
1167 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1169 if (loop_vinfo)
1170 VEC_safe_push (slp_instance, heap,
1171 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1172 new_instance);
1173 else
1174 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1175 new_instance);
1177 if (vect_print_dump_info (REPORT_SLP))
1178 vect_print_slp_tree (node);
1180 return true;
1183 /* Failed to SLP. */
1184 /* Free the allocated memory. */
1185 vect_free_slp_tree (node);
1186 VEC_free (int, heap, load_permutation);
1187 VEC_free (slp_tree, heap, loads);
1189 return false;
1193 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1194 trees of packed scalar stmts if SLP is possible. */
1196 bool
1197 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1199 unsigned int i;
1200 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1201 gimple store;
1202 bool ok = false;
1204 if (vect_print_dump_info (REPORT_SLP))
1205 fprintf (vect_dump, "=== vect_analyze_slp ===");
1207 if (loop_vinfo)
1209 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1210 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1212 else
1213 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1215 /* Find SLP sequences starting from groups of strided stores. */
1216 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1217 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1218 ok = true;
1220 if (bb_vinfo && !ok)
1222 if (vect_print_dump_info (REPORT_SLP))
1223 fprintf (vect_dump, "Failed to SLP the basic block.");
1225 return false;
1228 /* Find SLP sequences starting from groups of reductions. */
1229 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1230 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1231 VEC_index (gimple, reductions, 0)))
1232 ok = true;
1234 return true;
1238 /* For each possible SLP instance decide whether to SLP it and calculate overall
1239 unrolling factor needed to SLP the loop. */
1241 void
1242 vect_make_slp_decision (loop_vec_info loop_vinfo)
1244 unsigned int i, unrolling_factor = 1;
1245 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1246 slp_instance instance;
1247 int decided_to_slp = 0;
1249 if (vect_print_dump_info (REPORT_SLP))
1250 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1252 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1254 /* FORNOW: SLP if you can. */
1255 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1256 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1258 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1259 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1260 loop-based vectorization. Such stmts will be marked as HYBRID. */
1261 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1262 decided_to_slp++;
1265 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1267 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1268 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1269 decided_to_slp, unrolling_factor);
1273 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1274 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1276 static void
1277 vect_detect_hybrid_slp_stmts (slp_tree node)
1279 int i;
1280 gimple stmt;
1281 imm_use_iterator imm_iter;
1282 gimple use_stmt;
1283 stmt_vec_info stmt_vinfo;
1285 if (!node)
1286 return;
1288 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1289 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1290 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1291 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1292 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1293 && !STMT_SLP_TYPE (stmt_vinfo)
1294 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1295 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1296 && !(gimple_code (use_stmt) == GIMPLE_PHI
1297 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1298 == vect_reduction_def))
1299 vect_mark_slp_stmts (node, hybrid, i);
1301 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1302 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1306 /* Find stmts that must be both vectorized and SLPed. */
1308 void
1309 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1311 unsigned int i;
1312 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1313 slp_instance instance;
1315 if (vect_print_dump_info (REPORT_SLP))
1316 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1318 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1319 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1323 /* Create and initialize a new bb_vec_info struct for BB, as well as
1324 stmt_vec_info structs for all the stmts in it. */
1326 static bb_vec_info
1327 new_bb_vec_info (basic_block bb)
1329 bb_vec_info res = NULL;
1330 gimple_stmt_iterator gsi;
1332 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1333 BB_VINFO_BB (res) = bb;
1335 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1337 gimple stmt = gsi_stmt (gsi);
1338 gimple_set_uid (stmt, 0);
1339 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1342 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1343 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1345 bb->aux = res;
1346 return res;
1350 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1351 stmts in the basic block. */
1353 static void
1354 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1356 basic_block bb;
1357 gimple_stmt_iterator si;
1359 if (!bb_vinfo)
1360 return;
1362 bb = BB_VINFO_BB (bb_vinfo);
1364 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1366 gimple stmt = gsi_stmt (si);
1367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1369 if (stmt_info)
1370 /* Free stmt_vec_info. */
1371 free_stmt_vec_info (stmt);
1374 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1375 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1376 free (bb_vinfo);
1377 bb->aux = NULL;
1381 /* Analyze statements contained in SLP tree node after recursively analyzing
1382 the subtree. Return TRUE if the operations are supported. */
1384 static bool
1385 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1387 bool dummy;
1388 int i;
1389 gimple stmt;
1391 if (!node)
1392 return true;
1394 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1395 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1396 return false;
1398 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1401 gcc_assert (stmt_info);
1402 gcc_assert (PURE_SLP_STMT (stmt_info));
1404 if (!vect_analyze_stmt (stmt, &dummy, node))
1405 return false;
1408 return true;
1412 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1413 operations are supported. */
1415 static bool
1416 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1418 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1419 slp_instance instance;
1420 int i;
1422 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1424 if (!vect_slp_analyze_node_operations (bb_vinfo,
1425 SLP_INSTANCE_TREE (instance)))
1427 vect_free_slp_instance (instance);
1428 VEC_ordered_remove (slp_instance, slp_instances, i);
1430 else
1431 i++;
1434 if (!VEC_length (slp_instance, slp_instances))
1435 return false;
1437 return true;
1441 /* Cheick if the basic block can be vectorized. */
1443 bb_vec_info
1444 vect_slp_analyze_bb (basic_block bb)
1446 bb_vec_info bb_vinfo;
1447 VEC (ddr_p, heap) *ddrs;
1448 VEC (slp_instance, heap) *slp_instances;
1449 slp_instance instance;
1450 int i, insns = 0;
1451 gimple_stmt_iterator gsi;
1452 int min_vf = 2;
1453 int max_vf = MAX_VECTORIZATION_FACTOR;
1455 if (vect_print_dump_info (REPORT_DETAILS))
1456 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1458 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1460 gimple stmt = gsi_stmt (gsi);
1461 if (!is_gimple_debug (stmt)
1462 && !gimple_nop_p (stmt)
1463 && gimple_code (stmt) != GIMPLE_LABEL)
1464 insns++;
1467 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1470 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1471 "block.\n");
1473 return NULL;
1476 bb_vinfo = new_bb_vec_info (bb);
1477 if (!bb_vinfo)
1478 return NULL;
1480 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1483 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1484 "block.\n");
1486 destroy_bb_vec_info (bb_vinfo);
1487 return NULL;
1490 ddrs = BB_VINFO_DDRS (bb_vinfo);
1491 if (!VEC_length (ddr_p, ddrs))
1493 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1494 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1495 "block.\n");
1497 destroy_bb_vec_info (bb_vinfo);
1498 return NULL;
1501 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1502 || min_vf > max_vf)
1504 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1505 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1506 "in basic block.\n");
1508 destroy_bb_vec_info (bb_vinfo);
1509 return NULL;
1512 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1514 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1515 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1516 "block.\n");
1518 destroy_bb_vec_info (bb_vinfo);
1519 return NULL;
1522 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1524 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1525 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1526 "block.\n");
1528 destroy_bb_vec_info (bb_vinfo);
1529 return NULL;
1532 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1535 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1536 "block.\n");
1538 destroy_bb_vec_info (bb_vinfo);
1539 return NULL;
1542 /* Check the SLP opportunities in the basic block, analyze and build SLP
1543 trees. */
1544 if (!vect_analyze_slp (NULL, bb_vinfo))
1546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1547 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1548 "in basic block.\n");
1550 destroy_bb_vec_info (bb_vinfo);
1551 return NULL;
1554 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1556 /* Mark all the statements that we want to vectorize as pure SLP and
1557 relevant. */
1558 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1560 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1561 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1564 if (!vect_slp_analyze_operations (bb_vinfo))
1566 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1567 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1569 destroy_bb_vec_info (bb_vinfo);
1570 return NULL;
1573 if (vect_print_dump_info (REPORT_DETAILS))
1574 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1576 return bb_vinfo;
1580 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1581 the number of created vector stmts depends on the unrolling factor). However,
1582 the actual number of vector stmts for every SLP node depends on VF which is
1583 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1584 In this function we assume that the inside costs calculated in
1585 vect_model_xxx_cost are linear in ncopies. */
1587 void
1588 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1590 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1591 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1592 slp_instance instance;
1594 if (vect_print_dump_info (REPORT_SLP))
1595 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1597 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1598 /* We assume that costs are linear in ncopies. */
1599 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1600 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1604 /* For constant and loop invariant defs of SLP_NODE this function returns
1605 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1606 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1607 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1608 REDUC_INDEX is the index of the reduction operand in the statements, unless
1609 it is -1. */
1611 static void
1612 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1613 unsigned int op_num, unsigned int number_of_vectors,
1614 int reduc_index)
1616 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1617 gimple stmt = VEC_index (gimple, stmts, 0);
1618 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1619 int nunits;
1620 tree vec_cst;
1621 tree t = NULL_TREE;
1622 int j, number_of_places_left_in_vector;
1623 tree vector_type;
1624 tree op, vop;
1625 int group_size = VEC_length (gimple, stmts);
1626 unsigned int vec_num, i;
1627 int number_of_copies = 1;
1628 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1629 bool constant_p, is_store;
1630 tree neutral_op = NULL;
1632 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1634 enum tree_code code = gimple_assign_rhs_code (stmt);
1635 if (reduc_index == -1)
1637 VEC_free (tree, heap, *vec_oprnds);
1638 return;
1641 op_num = reduc_index - 1;
1642 op = gimple_op (stmt, op_num + 1);
1643 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1644 we need either neutral operands or the original operands. See
1645 get_initial_def_for_reduction() for details. */
1646 switch (code)
1648 case WIDEN_SUM_EXPR:
1649 case DOT_PROD_EXPR:
1650 case PLUS_EXPR:
1651 case MINUS_EXPR:
1652 case BIT_IOR_EXPR:
1653 case BIT_XOR_EXPR:
1654 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1655 neutral_op = build_real (TREE_TYPE (op), dconst0);
1656 else
1657 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1659 break;
1661 case MULT_EXPR:
1662 case BIT_AND_EXPR:
1663 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1664 neutral_op = build_real (TREE_TYPE (op), dconst1);
1665 else
1666 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1668 break;
1670 default:
1671 neutral_op = NULL;
1675 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1677 is_store = true;
1678 op = gimple_assign_rhs1 (stmt);
1680 else
1682 is_store = false;
1683 op = gimple_op (stmt, op_num + 1);
1686 if (CONSTANT_CLASS_P (op))
1687 constant_p = true;
1688 else
1689 constant_p = false;
1691 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1692 gcc_assert (vector_type);
1694 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1696 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1697 created vectors. It is greater than 1 if unrolling is performed.
1699 For example, we have two scalar operands, s1 and s2 (e.g., group of
1700 strided accesses of size two), while NUNITS is four (i.e., four scalars
1701 of this type can be packed in a vector). The output vector will contain
1702 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1703 will be 2).
1705 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1706 containing the operands.
1708 For example, NUNITS is four as before, and the group size is 8
1709 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1710 {s5, s6, s7, s8}. */
1712 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1714 number_of_places_left_in_vector = nunits;
1715 for (j = 0; j < number_of_copies; j++)
1717 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1719 if (is_store)
1720 op = gimple_assign_rhs1 (stmt);
1721 else
1722 op = gimple_op (stmt, op_num + 1);
1724 if (reduc_index != -1)
1726 struct loop *loop = (gimple_bb (stmt))->loop_father;
1727 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1729 gcc_assert (loop);
1730 /* Get the def before the loop. */
1731 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1732 loop_preheader_edge (loop));
1733 if (j != (number_of_copies - 1) && neutral_op)
1734 op = neutral_op;
1737 /* Create 'vect_ = {op0,op1,...,opn}'. */
1738 t = tree_cons (NULL_TREE, op, t);
1740 number_of_places_left_in_vector--;
1742 if (number_of_places_left_in_vector == 0)
1744 number_of_places_left_in_vector = nunits;
1746 if (constant_p)
1747 vec_cst = build_vector (vector_type, t);
1748 else
1749 vec_cst = build_constructor_from_list (vector_type, t);
1750 VEC_quick_push (tree, voprnds,
1751 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1752 t = NULL_TREE;
1757 /* Since the vectors are created in the reverse order, we should invert
1758 them. */
1759 vec_num = VEC_length (tree, voprnds);
1760 for (j = vec_num - 1; j >= 0; j--)
1762 vop = VEC_index (tree, voprnds, j);
1763 VEC_quick_push (tree, *vec_oprnds, vop);
1766 VEC_free (tree, heap, voprnds);
1768 /* In case that VF is greater than the unrolling factor needed for the SLP
1769 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1770 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1771 to replicate the vectors. */
1772 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1774 tree neutral_vec = NULL;
1776 if (neutral_op)
1778 if (!neutral_vec)
1780 t = NULL;
1781 for (i = 0; i < (unsigned) nunits; i++)
1782 t = tree_cons (NULL_TREE, neutral_op, t);
1783 neutral_vec = build_vector (vector_type, t);
1786 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1788 else
1790 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1791 VEC_quick_push (tree, *vec_oprnds, vop);
1797 /* Get vectorized definitions from SLP_NODE that contains corresponding
1798 vectorized def-stmts. */
1800 static void
1801 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1803 tree vec_oprnd;
1804 gimple vec_def_stmt;
1805 unsigned int i;
1807 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1809 for (i = 0;
1810 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1811 i++)
1813 gcc_assert (vec_def_stmt);
1814 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1815 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1820 /* Get vectorized definitions for SLP_NODE.
1821 If the scalar definitions are loop invariants or constants, collect them and
1822 call vect_get_constant_vectors() to create vector stmts.
1823 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1824 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1825 vect_get_slp_vect_defs() to retrieve them.
1826 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1827 the right node. This is used when the second operand must remain scalar. */
1829 void
1830 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1831 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1833 gimple first_stmt;
1834 enum tree_code code;
1835 int number_of_vects;
1836 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1838 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1839 /* The number of vector defs is determined by the number of vector statements
1840 in the node from which we get those statements. */
1841 if (SLP_TREE_LEFT (slp_node))
1842 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1843 else
1845 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1846 /* Number of vector stmts was calculated according to LHS in
1847 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1848 necessary. See vect_get_smallest_scalar_type() for details. */
1849 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1850 &rhs_size_unit);
1851 if (rhs_size_unit != lhs_size_unit)
1853 number_of_vects *= rhs_size_unit;
1854 number_of_vects /= lhs_size_unit;
1858 /* Allocate memory for vectorized defs. */
1859 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1861 /* SLP_NODE corresponds either to a group of stores or to a group of
1862 unary/binary operations. We don't call this function for loads.
1863 For reduction defs we call vect_get_constant_vectors(), since we are
1864 looking for initial loop invariant values. */
1865 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1866 /* The defs are already vectorized. */
1867 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1868 else
1869 /* Build vectors from scalar defs. */
1870 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1871 reduc_index);
1873 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1874 /* Since we don't call this function with loads, this is a group of
1875 stores. */
1876 return;
1878 /* For reductions, we only need initial values. */
1879 if (reduc_index != -1)
1880 return;
1882 code = gimple_assign_rhs_code (first_stmt);
1883 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1884 return;
1886 /* The number of vector defs is determined by the number of vector statements
1887 in the node from which we get those statements. */
1888 if (SLP_TREE_RIGHT (slp_node))
1889 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1890 else
1891 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1893 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1895 if (SLP_TREE_RIGHT (slp_node))
1896 /* The defs are already vectorized. */
1897 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1898 else
1899 /* Build vectors from scalar defs. */
1900 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1904 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1905 building a vector of type MASK_TYPE from it) and two input vectors placed in
1906 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1907 shifting by STRIDE elements of DR_CHAIN for every copy.
1908 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1909 copies).
1910 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1911 the created stmts must be inserted. */
1913 static inline void
1914 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1915 tree mask, int first_vec_indx, int second_vec_indx,
1916 gimple_stmt_iterator *gsi, slp_tree node,
1917 tree builtin_decl, tree vectype,
1918 VEC(tree,heap) *dr_chain,
1919 int ncopies, int vect_stmts_counter)
1921 tree perm_dest;
1922 gimple perm_stmt = NULL;
1923 stmt_vec_info next_stmt_info;
1924 int i, stride;
1925 tree first_vec, second_vec, data_ref;
1926 VEC (tree, heap) *params = NULL;
1928 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1930 /* Initialize the vect stmts of NODE to properly insert the generated
1931 stmts later. */
1932 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1933 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1934 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1936 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1937 for (i = 0; i < ncopies; i++)
1939 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1940 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1942 /* Build argument list for the vectorized call. */
1943 VEC_free (tree, heap, params);
1944 params = VEC_alloc (tree, heap, 3);
1945 VEC_quick_push (tree, params, first_vec);
1946 VEC_quick_push (tree, params, second_vec);
1947 VEC_quick_push (tree, params, mask);
1949 /* Generate the permute statement. */
1950 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1951 data_ref = make_ssa_name (perm_dest, perm_stmt);
1952 gimple_call_set_lhs (perm_stmt, data_ref);
1953 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1955 /* Store the vector statement in NODE. */
1956 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1957 stride * i + vect_stmts_counter, perm_stmt);
1959 first_vec_indx += stride;
1960 second_vec_indx += stride;
1963 /* Mark the scalar stmt as vectorized. */
1964 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1965 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1969 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1970 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1971 representation. Check that the mask is valid and return FALSE if not.
1972 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1973 the next vector, i.e., the current first vector is not needed. */
1975 static bool
1976 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1977 int mask_nunits, bool only_one_vec, int index,
1978 int *mask, int *current_mask_element,
1979 bool *need_next_vector)
1981 int i;
1982 static int number_of_mask_fixes = 1;
1983 static bool mask_fixed = false;
1984 static bool needs_first_vector = false;
1986 /* Convert to target specific representation. */
1987 *current_mask_element = first_mask_element + m;
1988 /* Adjust the value in case it's a mask for second and third vectors. */
1989 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1991 if (*current_mask_element < mask_nunits)
1992 needs_first_vector = true;
1994 /* We have only one input vector to permute but the mask accesses values in
1995 the next vector as well. */
1996 if (only_one_vec && *current_mask_element >= mask_nunits)
1998 if (vect_print_dump_info (REPORT_DETAILS))
2000 fprintf (vect_dump, "permutation requires at least two vectors ");
2001 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2004 return false;
2007 /* The mask requires the next vector. */
2008 if (*current_mask_element >= mask_nunits * 2)
2010 if (needs_first_vector || mask_fixed)
2012 /* We either need the first vector too or have already moved to the
2013 next vector. In both cases, this permutation needs three
2014 vectors. */
2015 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "permutation requires at "
2018 "least three vectors ");
2019 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2022 return false;
2025 /* We move to the next vector, dropping the first one and working with
2026 the second and the third - we need to adjust the values of the mask
2027 accordingly. */
2028 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2030 for (i = 0; i < index; i++)
2031 mask[i] -= mask_nunits * number_of_mask_fixes;
2033 (number_of_mask_fixes)++;
2034 mask_fixed = true;
2037 *need_next_vector = mask_fixed;
2039 /* This was the last element of this mask. Start a new one. */
2040 if (index == mask_nunits - 1)
2042 number_of_mask_fixes = 1;
2043 mask_fixed = false;
2044 needs_first_vector = false;
2047 return true;
2051 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2052 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2053 permute statements for SLP_NODE_INSTANCE. */
2054 bool
2055 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2056 gimple_stmt_iterator *gsi, int vf,
2057 slp_instance slp_node_instance, bool analyze_only)
2059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2060 tree mask_element_type = NULL_TREE, mask_type;
2061 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2062 slp_tree node;
2063 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2064 gimple next_scalar_stmt;
2065 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2066 int first_mask_element;
2067 int index, unroll_factor, *mask, current_mask_element, ncopies;
2068 bool only_one_vec = false, need_next_vector = false;
2069 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2071 if (!targetm.vectorize.builtin_vec_perm)
2073 if (vect_print_dump_info (REPORT_DETAILS))
2075 fprintf (vect_dump, "no builtin for vect permute for ");
2076 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2079 return false;
2082 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2083 &mask_element_type);
2084 if (!builtin_decl || !mask_element_type)
2086 if (vect_print_dump_info (REPORT_DETAILS))
2088 fprintf (vect_dump, "no builtin for vect permute for ");
2089 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2092 return false;
2095 mask_type = get_vectype_for_scalar_type (mask_element_type);
2096 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2097 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2098 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2099 scale = mask_nunits / nunits;
2100 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2102 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2103 unrolling factor. */
2104 orig_vec_stmts_num = group_size *
2105 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2106 if (orig_vec_stmts_num == 1)
2107 only_one_vec = true;
2109 /* Number of copies is determined by the final vectorization factor
2110 relatively to SLP_NODE_INSTANCE unrolling factor. */
2111 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2113 /* Generate permutation masks for every NODE. Number of masks for each NODE
2114 is equal to GROUP_SIZE.
2115 E.g., we have a group of three nodes with three loads from the same
2116 location in each node, and the vector size is 4. I.e., we have a
2117 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2118 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2119 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2122 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2123 scpecific type, e.g., in bytes for Altivec.
2124 The last mask is illegal since we assume two operands for permute
2125 operation, and the mask element values can't be outside that range. Hence,
2126 the last mask must be converted into {2,5,5,5}.
2127 For the first two permutations we need the first and the second input
2128 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2129 we need the second and the third vectors: {b1,c1,a2,b2} and
2130 {c2,a3,b3,c3}. */
2132 for (i = 0;
2133 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2134 i, node);
2135 i++)
2137 scalar_index = 0;
2138 index = 0;
2139 vect_stmts_counter = 0;
2140 vec_index = 0;
2141 first_vec_index = vec_index++;
2142 if (only_one_vec)
2143 second_vec_index = first_vec_index;
2144 else
2145 second_vec_index = vec_index++;
2147 for (j = 0; j < unroll_factor; j++)
2149 for (k = 0; k < group_size; k++)
2151 first_mask_element = (i + j * group_size) * scale;
2152 for (m = 0; m < scale; m++)
2154 if (!vect_get_mask_element (stmt, first_mask_element, m,
2155 mask_nunits, only_one_vec, index, mask,
2156 &current_mask_element, &need_next_vector))
2157 return false;
2159 mask[index++] = current_mask_element;
2162 if (index == mask_nunits)
2164 tree mask_vec = NULL;
2166 while (--index >= 0)
2168 tree t = build_int_cst (mask_element_type, mask[index]);
2169 mask_vec = tree_cons (NULL, t, mask_vec);
2171 mask_vec = build_vector (mask_type, mask_vec);
2172 index = 0;
2174 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2175 mask_vec))
2177 if (vect_print_dump_info (REPORT_DETAILS))
2179 fprintf (vect_dump, "unsupported vect permute ");
2180 print_generic_expr (vect_dump, mask_vec, 0);
2182 free (mask);
2183 return false;
2186 if (!analyze_only)
2188 if (need_next_vector)
2190 first_vec_index = second_vec_index;
2191 second_vec_index = vec_index;
2194 next_scalar_stmt = VEC_index (gimple,
2195 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2197 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2198 mask_vec, first_vec_index, second_vec_index,
2199 gsi, node, builtin_decl, vectype, dr_chain,
2200 ncopies, vect_stmts_counter++);
2207 free (mask);
2208 return true;
2213 /* Vectorize SLP instance tree in postorder. */
2215 static bool
2216 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2217 unsigned int vectorization_factor)
2219 gimple stmt;
2220 bool strided_store, is_store;
2221 gimple_stmt_iterator si;
2222 stmt_vec_info stmt_info;
2223 unsigned int vec_stmts_size, nunits, group_size;
2224 tree vectype;
2225 int i;
2226 slp_tree loads_node;
2228 if (!node)
2229 return false;
2231 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2232 vectorization_factor);
2233 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2234 vectorization_factor);
2236 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2237 stmt_info = vinfo_for_stmt (stmt);
2239 /* VECTYPE is the type of the destination. */
2240 vectype = STMT_VINFO_VECTYPE (stmt_info);
2241 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2242 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2244 /* For each SLP instance calculate number of vector stmts to be created
2245 for the scalar stmts in each node of the SLP tree. Number of vector
2246 elements in one vector iteration is the number of scalar elements in
2247 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2248 size. */
2249 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2251 /* In case of load permutation we have to allocate vectorized statements for
2252 all the nodes that participate in that permutation. */
2253 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2255 for (i = 0;
2256 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2257 i++)
2259 if (!SLP_TREE_VEC_STMTS (loads_node))
2261 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2262 vec_stmts_size);
2263 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2268 if (!SLP_TREE_VEC_STMTS (node))
2270 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2271 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2274 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2277 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2280 /* Loads should be inserted before the first load. */
2281 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2282 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2283 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2284 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2285 else
2286 si = gsi_for_stmt (stmt);
2288 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2289 return is_store;
2293 bool
2294 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2296 VEC (slp_instance, heap) *slp_instances;
2297 slp_instance instance;
2298 unsigned int i, vf;
2299 bool is_store = false;
2301 if (loop_vinfo)
2303 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2304 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2306 else
2308 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2309 vf = 1;
2312 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2314 /* Schedule the tree of INSTANCE. */
2315 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2316 instance, vf);
2317 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2318 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2319 fprintf (vect_dump, "vectorizing stmts using SLP.");
2322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2324 slp_tree root = SLP_INSTANCE_TREE (instance);
2325 gimple store;
2326 unsigned int j;
2327 gimple_stmt_iterator gsi;
2329 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2330 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2332 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2333 break;
2335 /* Free the attached stmt_vec_info and remove the stmt. */
2336 gsi = gsi_for_stmt (store);
2337 gsi_remove (&gsi, true);
2338 free_stmt_vec_info (store);
2342 return is_store;
2346 /* Vectorize the basic block. */
2348 void
2349 vect_slp_transform_bb (basic_block bb)
2351 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2352 gimple_stmt_iterator si;
2354 gcc_assert (bb_vinfo);
2356 if (vect_print_dump_info (REPORT_DETAILS))
2357 fprintf (vect_dump, "SLPing BB\n");
2359 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2361 gimple stmt = gsi_stmt (si);
2362 stmt_vec_info stmt_info;
2364 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "------>SLPing statement: ");
2367 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2370 stmt_info = vinfo_for_stmt (stmt);
2371 gcc_assert (stmt_info);
2373 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2374 if (STMT_SLP_TYPE (stmt_info))
2376 vect_schedule_slp (NULL, bb_vinfo);
2377 break;
2381 mark_sym_for_renaming (gimple_vop (cfun));
2382 /* The memory tags and pointers in vectorized statements need to
2383 have their SSA forms updated. FIXME, why can't this be delayed
2384 until all the loops have been transformed? */
2385 update_ssa (TODO_update_ssa);
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2390 destroy_bb_vec_info (bb_vinfo);