* gcc.target/powerpc/altivec-volatile.c: Adjust expected warning.
[official-gcc.git] / gcc / tree-vect-slp.c
blob6b377a8842a3541aea4565b5bc8f116c90044c49
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
221 else
223 if (!*first_stmt_dt1 && i == 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
235 if (vect_print_dump_info (REPORT_SLP))
236 fprintf (vect_dump, "Build SLP failed: two constant "
237 "oprnds in stmt");
238 return false;
240 *first_stmt_const_oprnd = oprnd;
243 else
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
247 if ((i == 0
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
252 || (i == 1
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
257 || (!def
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
261 if (vect_print_dump_info (REPORT_SLP))
262 fprintf (vect_dump, "Build SLP failed: different types ");
264 return false;
269 /* Check the types of the definitions. */
270 switch (dt[i])
272 case vect_constant_def:
273 case vect_external_def:
274 break;
276 case vect_internal_def:
277 case vect_reduction_def:
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
284 default:
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP))
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
292 return false;
296 return true;
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
303 TRUE. */
305 static bool
306 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
310 VEC (int, heap) **load_permutation,
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
326 unsigned int ncopies;
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
337 gimple first_load, prev_first_load = NULL;
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
342 if (vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 if (vect_print_dump_info (REPORT_SLP))
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 return false;
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 return false;
374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
378 if (vect_print_dump_info (REPORT_SLP))
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 return false;
386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
387 if (ncopies != 1)
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
406 /* Check the operation. */
407 if (i == 0)
409 first_stmt_code = rhs_code;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
417 vec_mode = TYPE_MODE (vectype);
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
423 if (!optab
424 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab = optab_for_tree_code (rhs_code, vectype,
428 optab_scalar);
430 if (!optab)
432 if (vect_print_dump_info (REPORT_SLP))
433 fprintf (vect_dump, "Build SLP failed: no optab.");
434 return false;
436 icode = (int) optab_handler (optab, vec_mode);
437 if (icode == CODE_FOR_nothing)
439 if (vect_print_dump_info (REPORT_SLP))
440 fprintf (vect_dump, "Build SLP failed: "
441 "op not supported by target.");
442 return false;
444 optab_op2_mode = insn_data[icode].operand[2].mode;
445 if (!VECTOR_MODE_P (optab_op2_mode))
447 need_same_oprnds = true;
448 first_op1 = gimple_assign_rhs2 (stmt);
453 else
455 if (first_stmt_code != rhs_code
456 && (first_stmt_code != IMAGPART_EXPR
457 || rhs_code != REALPART_EXPR)
458 && (first_stmt_code != REALPART_EXPR
459 || rhs_code != IMAGPART_EXPR))
461 if (vect_print_dump_info (REPORT_SLP))
463 fprintf (vect_dump,
464 "Build SLP failed: different operation in stmt ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
468 return false;
471 if (need_same_oprnds
472 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
474 if (vect_print_dump_info (REPORT_SLP))
476 fprintf (vect_dump,
477 "Build SLP failed: different shift arguments in ");
478 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
481 return false;
485 /* Strided store or load. */
486 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
488 if (REFERENCE_CLASS_P (lhs))
490 /* Store. */
491 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
492 stmt, &def_stmts0, &def_stmts1,
493 &first_stmt_dt0,
494 &first_stmt_dt1,
495 &first_stmt_def0_type,
496 &first_stmt_def1_type,
497 &first_stmt_const_oprnd,
498 ncopies_for_cost,
499 &pattern0, &pattern1))
500 return false;
502 else
504 /* Load. */
505 /* FORNOW: Check that there is no gap between the loads. */
506 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
507 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
508 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
511 if (vect_print_dump_info (REPORT_SLP))
513 fprintf (vect_dump, "Build SLP failed: strided "
514 "loads have gaps ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518 return false;
521 /* Check that the size of interleaved loads group is not
522 greater than the SLP group size. */
523 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
525 if (vect_print_dump_info (REPORT_SLP))
527 fprintf (vect_dump, "Build SLP failed: the number of "
528 "interleaved loads is greater than"
529 " the SLP group size ");
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
533 return false;
536 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
537 if (prev_first_load)
539 /* Check that there are no loads from different interleaving
540 chains in the same node. The only exception is complex
541 numbers. */
542 if (prev_first_load != first_load
543 && rhs_code != REALPART_EXPR
544 && rhs_code != IMAGPART_EXPR)
546 if (vect_print_dump_info (REPORT_SLP))
548 fprintf (vect_dump, "Build SLP failed: different "
549 "interleaving chains in one node ");
550 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
553 return false;
556 else
557 prev_first_load = first_load;
559 if (first_load == stmt)
561 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
562 if (vect_supportable_dr_alignment (first_dr, false)
563 == dr_unaligned_unsupported)
565 if (vect_print_dump_info (REPORT_SLP))
567 fprintf (vect_dump, "Build SLP failed: unsupported "
568 "unaligned load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 return false;
575 /* Analyze costs (for the first stmt in the group). */
576 vect_model_load_cost (vinfo_for_stmt (stmt),
577 ncopies_for_cost, *node);
580 /* Store the place of this load in the interleaving chain. In
581 case that permutation is needed we later decide if a specific
582 permutation is supported. */
583 load_place = vect_get_place_in_interleaving_chain (stmt,
584 first_load);
585 if (load_place != i)
586 permutation = true;
588 VEC_safe_push (int, heap, *load_permutation, load_place);
590 /* We stop the tree when we reach a group of loads. */
591 stop_recursion = true;
592 continue;
594 } /* Strided access. */
595 else
597 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
599 /* Not strided load. */
600 if (vect_print_dump_info (REPORT_SLP))
602 fprintf (vect_dump, "Build SLP failed: not strided load ");
603 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
606 /* FORNOW: Not strided loads are not supported. */
607 return false;
610 /* Not memory operation. */
611 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
612 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
614 if (vect_print_dump_info (REPORT_SLP))
616 fprintf (vect_dump, "Build SLP failed: operation");
617 fprintf (vect_dump, " unsupported ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 return false;
624 /* Find the def-stmts. */
625 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
626 &def_stmts0, &def_stmts1,
627 &first_stmt_dt0, &first_stmt_dt1,
628 &first_stmt_def0_type,
629 &first_stmt_def1_type,
630 &first_stmt_const_oprnd,
631 ncopies_for_cost,
632 &pattern0, &pattern1))
633 return false;
637 /* Add the costs of the node to the overall instance costs. */
638 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
639 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
641 /* Strided loads were reached - stop the recursion. */
642 if (stop_recursion)
644 if (permutation)
646 VEC_safe_push (slp_tree, heap, *loads, *node);
647 *inside_cost
648 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
649 * group_size;
652 return true;
655 /* Create SLP_TREE nodes for the definition node/s. */
656 if (first_stmt_dt0 == vect_internal_def)
658 slp_tree left_node = XNEW (struct _slp_tree);
659 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
660 SLP_TREE_VEC_STMTS (left_node) = NULL;
661 SLP_TREE_LEFT (left_node) = NULL;
662 SLP_TREE_RIGHT (left_node) = NULL;
663 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
664 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
665 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
666 inside_cost, outside_cost, ncopies_for_cost,
667 max_nunits, load_permutation, loads,
668 vectorization_factor))
669 return false;
671 SLP_TREE_LEFT (*node) = left_node;
674 if (first_stmt_dt1 == vect_internal_def)
676 slp_tree right_node = XNEW (struct _slp_tree);
677 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
678 SLP_TREE_VEC_STMTS (right_node) = NULL;
679 SLP_TREE_LEFT (right_node) = NULL;
680 SLP_TREE_RIGHT (right_node) = NULL;
681 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
682 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
683 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
684 inside_cost, outside_cost, ncopies_for_cost,
685 max_nunits, load_permutation, loads,
686 vectorization_factor))
687 return false;
689 SLP_TREE_RIGHT (*node) = right_node;
692 return true;
696 static void
697 vect_print_slp_tree (slp_tree node)
699 int i;
700 gimple stmt;
702 if (!node)
703 return;
705 fprintf (vect_dump, "node ");
706 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
708 fprintf (vect_dump, "\n\tstmt %d ", i);
709 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
711 fprintf (vect_dump, "\n");
713 vect_print_slp_tree (SLP_TREE_LEFT (node));
714 vect_print_slp_tree (SLP_TREE_RIGHT (node));
718 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
719 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
720 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
721 stmts in NODE are to be marked. */
723 static void
724 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
726 int i;
727 gimple stmt;
729 if (!node)
730 return;
732 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
733 if (j < 0 || i == j)
734 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
736 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
737 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
741 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
743 static void
744 vect_mark_slp_stmts_relevant (slp_tree node)
746 int i;
747 gimple stmt;
748 stmt_vec_info stmt_info;
750 if (!node)
751 return;
753 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
755 stmt_info = vinfo_for_stmt (stmt);
756 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
757 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
758 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
761 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
762 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
766 /* Check if the permutation required by the SLP INSTANCE is supported.
767 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
769 static bool
770 vect_supported_slp_permutation_p (slp_instance instance)
772 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
773 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
774 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
775 VEC (slp_tree, heap) *sorted_loads = NULL;
776 int index;
777 slp_tree *tmp_loads = NULL;
778 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
779 slp_tree load;
781 /* FORNOW: The only supported loads permutation is loads from the same
782 location in all the loads in the node, when the data-refs in
783 nodes of LOADS constitute an interleaving chain.
784 Sort the nodes according to the order of accesses in the chain. */
785 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
786 for (i = 0, j = 0;
787 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
788 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
789 i += group_size, j++)
791 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
792 /* Check that the loads are all in the same interleaving chain. */
793 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
795 if (vect_print_dump_info (REPORT_DETAILS))
797 fprintf (vect_dump, "Build SLP failed: unsupported data "
798 "permutation ");
799 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
802 free (tmp_loads);
803 return false;
806 tmp_loads[index] = load;
809 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
810 for (i = 0; i < group_size; i++)
811 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
813 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
814 SLP_INSTANCE_LOADS (instance) = sorted_loads;
815 free (tmp_loads);
817 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
818 SLP_INSTANCE_UNROLLING_FACTOR (instance),
819 instance, true))
820 return false;
822 return true;
826 /* Rearrange the statements of NODE according to PERMUTATION. */
828 static void
829 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
830 VEC (int, heap) *permutation)
832 gimple stmt;
833 VEC (gimple, heap) *tmp_stmts;
834 unsigned int index, i;
836 if (!node)
837 return;
839 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
840 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
842 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
843 tmp_stmts = VEC_alloc (gimple, heap, group_size);
845 for (i = 0; i < group_size; i++)
846 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
848 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
850 index = VEC_index (int, permutation, i);
851 VEC_replace (gimple, tmp_stmts, index, stmt);
854 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
855 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
859 /* Check if the required load permutation is supported.
860 LOAD_PERMUTATION contains a list of indices of the loads.
861 In SLP this permutation is relative to the order of strided stores that are
862 the base of the SLP instance. */
864 static bool
865 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
866 VEC (int, heap) *load_permutation)
868 int i = 0, j, prev = -1, next, k, number_of_groups;
869 bool supported, bad_permutation = false;
870 sbitmap load_index;
871 slp_tree node;
872 gimple stmt;
874 /* FORNOW: permutations are only supported in SLP. */
875 if (!slp_instn)
876 return false;
878 if (vect_print_dump_info (REPORT_SLP))
880 fprintf (vect_dump, "Load permutation ");
881 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
882 fprintf (vect_dump, "%d ", next);
885 /* In case of reduction every load permutation is allowed, since the order
886 of the reduction statements is not important (as opposed to the case of
887 strided stores). The only condition we need to check is that all the
888 load nodes are of the same size and have the same permutation (and then
889 rearrange all the nodes of the SLP instance according to this
890 permutation). */
892 /* Check that all the load nodes are of the same size. */
893 for (i = 0;
894 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
895 i++)
896 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
897 != (unsigned) group_size)
898 return false;
900 node = SLP_INSTANCE_TREE (slp_instn);
901 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
902 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
903 instance, not all the loads belong to the same node or interleaving
904 group. Hence, we need to divide them into groups according to
905 GROUP_SIZE. */
906 number_of_groups = VEC_length (int, load_permutation) / group_size;
908 /* Reduction (there are no data-refs in the root). */
909 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
911 int first_group_load_index;
913 /* Compare all the permutation sequences to the first one. */
914 for (i = 1; i < number_of_groups; i++)
916 k = 0;
917 for (j = i * group_size; j < i * group_size + group_size; j++)
919 next = VEC_index (int, load_permutation, j);
920 first_group_load_index = VEC_index (int, load_permutation, k);
922 if (next != first_group_load_index)
924 bad_permutation = true;
925 break;
928 k++;
931 if (bad_permutation)
932 break;
935 if (!bad_permutation)
937 /* This permutaion is valid for reduction. Since the order of the
938 statements in the nodes is not important unless they are memory
939 accesses, we can rearrange the statements in all the nodes
940 according to the order of the loads. */
941 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
942 load_permutation);
943 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
944 return true;
948 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
949 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
950 well (unless it's reduction). */
951 if (VEC_length (int, load_permutation)
952 != (unsigned int) (group_size * group_size))
953 return false;
955 supported = true;
956 load_index = sbitmap_alloc (group_size);
957 sbitmap_zero (load_index);
958 for (j = 0; j < group_size; j++)
960 for (i = j * group_size, k = 0;
961 VEC_iterate (int, load_permutation, i, next) && k < group_size;
962 i++, k++)
964 if (i != j * group_size && next != prev)
966 supported = false;
967 break;
970 prev = next;
973 if (TEST_BIT (load_index, prev))
975 supported = false;
976 break;
979 SET_BIT (load_index, prev);
982 for (j = 0; j < group_size; j++)
983 if (!TEST_BIT (load_index, j))
984 return false;
986 sbitmap_free (load_index);
988 if (supported && i == group_size * group_size
989 && vect_supported_slp_permutation_p (slp_instn))
990 return true;
992 return false;
996 /* Find the first load in the loop that belongs to INSTANCE.
997 When loads are in several SLP nodes, there can be a case in which the first
998 load does not appear in the first SLP node to be transformed, causing
999 incorrect order of statements. Since we generate all the loads together,
1000 they must be inserted before the first load of the SLP instance and not
1001 before the first load of the first node of the instance. */
1002 static gimple
1003 vect_find_first_load_in_slp_instance (slp_instance instance)
1005 int i, j;
1006 slp_tree load_node;
1007 gimple first_load = NULL, load;
1009 for (i = 0;
1010 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1011 i++)
1012 for (j = 0;
1013 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1014 j++)
1015 first_load = get_earlier_stmt (load, first_load);
1017 return first_load;
1021 /* Analyze an SLP instance starting from a group of strided stores. Call
1022 vect_build_slp_tree to build a tree of packed stmts if possible.
1023 Return FALSE if it's impossible to SLP any stmt in the loop. */
1025 static bool
1026 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1027 gimple stmt)
1029 slp_instance new_instance;
1030 slp_tree node = XNEW (struct _slp_tree);
1031 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1032 unsigned int unrolling_factor = 1, nunits;
1033 tree vectype, scalar_type = NULL_TREE;
1034 gimple next;
1035 unsigned int vectorization_factor = 0;
1036 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1037 unsigned int max_nunits = 0;
1038 VEC (int, heap) *load_permutation;
1039 VEC (slp_tree, heap) *loads;
1040 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1042 if (dr)
1044 scalar_type = TREE_TYPE (DR_REF (dr));
1045 vectype = get_vectype_for_scalar_type (scalar_type);
1046 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1048 else
1050 gcc_assert (loop_vinfo);
1051 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1052 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1055 if (!vectype)
1057 if (vect_print_dump_info (REPORT_SLP))
1059 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1060 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1063 return false;
1066 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1067 if (loop_vinfo)
1068 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1069 else
1070 /* No multitypes in BB SLP. */
1071 vectorization_factor = nunits;
1073 /* Calculate the unrolling factor. */
1074 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1075 if (unrolling_factor != 1 && !loop_vinfo)
1077 if (vect_print_dump_info (REPORT_SLP))
1078 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1079 " block SLP");
1081 return false;
1084 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1085 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1086 next = stmt;
1087 if (dr)
1089 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1090 while (next)
1092 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1093 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1096 else
1098 /* Collect reduction statements. */
1099 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1100 next);
1101 i++)
1103 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1104 if (vect_print_dump_info (REPORT_DETAILS))
1106 fprintf (vect_dump, "pushing reduction into node: ");
1107 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1112 SLP_TREE_VEC_STMTS (node) = NULL;
1113 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1114 SLP_TREE_LEFT (node) = NULL;
1115 SLP_TREE_RIGHT (node) = NULL;
1116 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1117 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1119 /* Calculate the number of vector stmts to create based on the unrolling
1120 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1121 GROUP_SIZE / NUNITS otherwise. */
1122 ncopies_for_cost = unrolling_factor * group_size / nunits;
1124 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1125 loads = VEC_alloc (slp_tree, heap, group_size);
1127 /* Build the tree for the SLP instance. */
1128 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1129 &inside_cost, &outside_cost, ncopies_for_cost,
1130 &max_nunits, &load_permutation, &loads,
1131 vectorization_factor))
1133 /* Create a new SLP instance. */
1134 new_instance = XNEW (struct _slp_instance);
1135 SLP_INSTANCE_TREE (new_instance) = node;
1136 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1137 /* Calculate the unrolling factor based on the smallest type in the
1138 loop. */
1139 if (max_nunits > nunits)
1140 unrolling_factor = least_common_multiple (max_nunits, group_size)
1141 / group_size;
1143 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1144 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1145 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1146 SLP_INSTANCE_LOADS (new_instance) = loads;
1147 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1148 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1149 if (VEC_length (slp_tree, loads))
1151 if (!vect_supported_load_permutation_p (new_instance, group_size,
1152 load_permutation))
1154 if (vect_print_dump_info (REPORT_SLP))
1156 fprintf (vect_dump, "Build SLP failed: unsupported load "
1157 "permutation ");
1158 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1161 vect_free_slp_instance (new_instance);
1162 return false;
1165 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1166 = vect_find_first_load_in_slp_instance (new_instance);
1168 else
1169 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1171 if (loop_vinfo)
1172 VEC_safe_push (slp_instance, heap,
1173 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1174 new_instance);
1175 else
1176 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1177 new_instance);
1179 if (vect_print_dump_info (REPORT_SLP))
1180 vect_print_slp_tree (node);
1182 return true;
1185 /* Failed to SLP. */
1186 /* Free the allocated memory. */
1187 vect_free_slp_tree (node);
1188 VEC_free (int, heap, load_permutation);
1189 VEC_free (slp_tree, heap, loads);
1191 return false;
1195 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1196 trees of packed scalar stmts if SLP is possible. */
1198 bool
1199 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1201 unsigned int i;
1202 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1203 gimple store;
1204 bool ok = false;
1206 if (vect_print_dump_info (REPORT_SLP))
1207 fprintf (vect_dump, "=== vect_analyze_slp ===");
1209 if (loop_vinfo)
1211 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1212 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1214 else
1215 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1217 /* Find SLP sequences starting from groups of strided stores. */
1218 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1219 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1220 ok = true;
1222 if (bb_vinfo && !ok)
1224 if (vect_print_dump_info (REPORT_SLP))
1225 fprintf (vect_dump, "Failed to SLP the basic block.");
1227 return false;
1230 /* Find SLP sequences starting from groups of reductions. */
1231 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1232 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1233 VEC_index (gimple, reductions, 0)))
1234 ok = true;
1236 return true;
1240 /* For each possible SLP instance decide whether to SLP it and calculate overall
1241 unrolling factor needed to SLP the loop. */
1243 void
1244 vect_make_slp_decision (loop_vec_info loop_vinfo)
1246 unsigned int i, unrolling_factor = 1;
1247 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1248 slp_instance instance;
1249 int decided_to_slp = 0;
1251 if (vect_print_dump_info (REPORT_SLP))
1252 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1254 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1256 /* FORNOW: SLP if you can. */
1257 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1258 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1260 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1261 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1262 loop-based vectorization. Such stmts will be marked as HYBRID. */
1263 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1264 decided_to_slp++;
1267 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1269 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1270 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1271 decided_to_slp, unrolling_factor);
1275 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1276 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1278 static void
1279 vect_detect_hybrid_slp_stmts (slp_tree node)
1281 int i;
1282 gimple stmt;
1283 imm_use_iterator imm_iter;
1284 gimple use_stmt;
1285 stmt_vec_info stmt_vinfo;
1287 if (!node)
1288 return;
1290 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1291 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1292 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1293 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1294 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1295 && !STMT_SLP_TYPE (stmt_vinfo)
1296 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1297 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1298 && !(gimple_code (use_stmt) == GIMPLE_PHI
1299 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1300 == vect_reduction_def))
1301 vect_mark_slp_stmts (node, hybrid, i);
1303 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1304 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1308 /* Find stmts that must be both vectorized and SLPed. */
1310 void
1311 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1313 unsigned int i;
1314 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1315 slp_instance instance;
1317 if (vect_print_dump_info (REPORT_SLP))
1318 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1320 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1321 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1325 /* Create and initialize a new bb_vec_info struct for BB, as well as
1326 stmt_vec_info structs for all the stmts in it. */
1328 static bb_vec_info
1329 new_bb_vec_info (basic_block bb)
1331 bb_vec_info res = NULL;
1332 gimple_stmt_iterator gsi;
1334 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1335 BB_VINFO_BB (res) = bb;
1337 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1339 gimple stmt = gsi_stmt (gsi);
1340 gimple_set_uid (stmt, 0);
1341 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1344 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1345 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1347 bb->aux = res;
1348 return res;
1352 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1353 stmts in the basic block. */
1355 static void
1356 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1358 basic_block bb;
1359 gimple_stmt_iterator si;
1361 if (!bb_vinfo)
1362 return;
1364 bb = BB_VINFO_BB (bb_vinfo);
1366 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1368 gimple stmt = gsi_stmt (si);
1369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1371 if (stmt_info)
1372 /* Free stmt_vec_info. */
1373 free_stmt_vec_info (stmt);
1376 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1377 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1378 free (bb_vinfo);
1379 bb->aux = NULL;
1383 /* Analyze statements contained in SLP tree node after recursively analyzing
1384 the subtree. Return TRUE if the operations are supported. */
1386 static bool
1387 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1389 bool dummy;
1390 int i;
1391 gimple stmt;
1393 if (!node)
1394 return true;
1396 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1397 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1398 return false;
1400 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1403 gcc_assert (stmt_info);
1404 gcc_assert (PURE_SLP_STMT (stmt_info));
1406 if (!vect_analyze_stmt (stmt, &dummy, node))
1407 return false;
1410 return true;
1414 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1415 operations are supported. */
1417 static bool
1418 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1420 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1421 slp_instance instance;
1422 int i;
1424 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1426 if (!vect_slp_analyze_node_operations (bb_vinfo,
1427 SLP_INSTANCE_TREE (instance)))
1429 vect_free_slp_instance (instance);
1430 VEC_ordered_remove (slp_instance, slp_instances, i);
1432 else
1433 i++;
1436 if (!VEC_length (slp_instance, slp_instances))
1437 return false;
1439 return true;
1443 /* Cheick if the basic block can be vectorized. */
1445 bb_vec_info
1446 vect_slp_analyze_bb (basic_block bb)
1448 bb_vec_info bb_vinfo;
1449 VEC (ddr_p, heap) *ddrs;
1450 VEC (slp_instance, heap) *slp_instances;
1451 slp_instance instance;
1452 int i, insns = 0;
1453 gimple_stmt_iterator gsi;
1454 int min_vf = 2;
1455 int max_vf = MAX_VECTORIZATION_FACTOR;
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1460 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1462 gimple stmt = gsi_stmt (gsi);
1463 if (!is_gimple_debug (stmt)
1464 && !gimple_nop_p (stmt)
1465 && gimple_code (stmt) != GIMPLE_LABEL)
1466 insns++;
1469 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1472 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1473 "block.\n");
1475 return NULL;
1478 bb_vinfo = new_bb_vec_info (bb);
1479 if (!bb_vinfo)
1480 return NULL;
1482 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1485 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1486 "block.\n");
1488 destroy_bb_vec_info (bb_vinfo);
1489 return NULL;
1492 ddrs = BB_VINFO_DDRS (bb_vinfo);
1493 if (!VEC_length (ddr_p, ddrs))
1495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1496 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1497 "block.\n");
1499 destroy_bb_vec_info (bb_vinfo);
1500 return NULL;
1503 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1504 || min_vf > max_vf)
1506 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1507 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1508 "in basic block.\n");
1510 destroy_bb_vec_info (bb_vinfo);
1511 return NULL;
1514 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1516 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1517 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1518 "block.\n");
1520 destroy_bb_vec_info (bb_vinfo);
1521 return NULL;
1524 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1526 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1527 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1528 "block.\n");
1530 destroy_bb_vec_info (bb_vinfo);
1531 return NULL;
1534 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1537 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1538 "block.\n");
1540 destroy_bb_vec_info (bb_vinfo);
1541 return NULL;
1544 /* Check the SLP opportunities in the basic block, analyze and build SLP
1545 trees. */
1546 if (!vect_analyze_slp (NULL, bb_vinfo))
1548 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1549 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1550 "in basic block.\n");
1552 destroy_bb_vec_info (bb_vinfo);
1553 return NULL;
1556 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1558 /* Mark all the statements that we want to vectorize as pure SLP and
1559 relevant. */
1560 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1562 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1563 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1566 if (!vect_slp_analyze_operations (bb_vinfo))
1568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1569 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1571 destroy_bb_vec_info (bb_vinfo);
1572 return NULL;
1575 if (vect_print_dump_info (REPORT_DETAILS))
1576 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1578 return bb_vinfo;
1582 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1583 the number of created vector stmts depends on the unrolling factor). However,
1584 the actual number of vector stmts for every SLP node depends on VF which is
1585 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1586 In this function we assume that the inside costs calculated in
1587 vect_model_xxx_cost are linear in ncopies. */
1589 void
1590 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1592 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1593 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1594 slp_instance instance;
1596 if (vect_print_dump_info (REPORT_SLP))
1597 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1599 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1600 /* We assume that costs are linear in ncopies. */
1601 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1602 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1606 /* For constant and loop invariant defs of SLP_NODE this function returns
1607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1609 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1610 REDUC_INDEX is the index of the reduction operand in the statements, unless
1611 it is -1. */
1613 static void
1614 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1615 unsigned int op_num, unsigned int number_of_vectors,
1616 int reduc_index)
1618 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1619 gimple stmt = VEC_index (gimple, stmts, 0);
1620 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1621 int nunits;
1622 tree vec_cst;
1623 tree t = NULL_TREE;
1624 int j, number_of_places_left_in_vector;
1625 tree vector_type;
1626 tree op, vop;
1627 int group_size = VEC_length (gimple, stmts);
1628 unsigned int vec_num, i;
1629 int number_of_copies = 1;
1630 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1631 bool constant_p, is_store;
1632 tree neutral_op = NULL;
1634 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1636 enum tree_code code = gimple_assign_rhs_code (stmt);
1637 if (reduc_index == -1)
1639 VEC_free (tree, heap, *vec_oprnds);
1640 return;
1643 op_num = reduc_index - 1;
1644 op = gimple_op (stmt, op_num + 1);
1645 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1646 we need either neutral operands or the original operands. See
1647 get_initial_def_for_reduction() for details. */
1648 switch (code)
1650 case WIDEN_SUM_EXPR:
1651 case DOT_PROD_EXPR:
1652 case PLUS_EXPR:
1653 case MINUS_EXPR:
1654 case BIT_IOR_EXPR:
1655 case BIT_XOR_EXPR:
1656 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1657 neutral_op = build_real (TREE_TYPE (op), dconst0);
1658 else
1659 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1661 break;
1663 case MULT_EXPR:
1664 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1665 neutral_op = build_real (TREE_TYPE (op), dconst1);
1666 else
1667 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1669 break;
1671 case BIT_AND_EXPR:
1672 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1673 break;
1675 default:
1676 neutral_op = NULL;
1680 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1682 is_store = true;
1683 op = gimple_assign_rhs1 (stmt);
1685 else
1687 is_store = false;
1688 op = gimple_op (stmt, op_num + 1);
1691 if (CONSTANT_CLASS_P (op))
1692 constant_p = true;
1693 else
1694 constant_p = false;
1696 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1697 gcc_assert (vector_type);
1699 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1701 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1702 created vectors. It is greater than 1 if unrolling is performed.
1704 For example, we have two scalar operands, s1 and s2 (e.g., group of
1705 strided accesses of size two), while NUNITS is four (i.e., four scalars
1706 of this type can be packed in a vector). The output vector will contain
1707 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1708 will be 2).
1710 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1711 containing the operands.
1713 For example, NUNITS is four as before, and the group size is 8
1714 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1715 {s5, s6, s7, s8}. */
1717 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1719 number_of_places_left_in_vector = nunits;
1720 for (j = 0; j < number_of_copies; j++)
1722 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1724 if (is_store)
1725 op = gimple_assign_rhs1 (stmt);
1726 else
1727 op = gimple_op (stmt, op_num + 1);
1729 if (reduc_index != -1)
1731 struct loop *loop = (gimple_bb (stmt))->loop_father;
1732 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1734 gcc_assert (loop);
1735 /* Get the def before the loop. */
1736 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1737 loop_preheader_edge (loop));
1738 if (j != (number_of_copies - 1) && neutral_op)
1739 op = neutral_op;
1742 /* Create 'vect_ = {op0,op1,...,opn}'. */
1743 t = tree_cons (NULL_TREE, op, t);
1745 number_of_places_left_in_vector--;
1747 if (number_of_places_left_in_vector == 0)
1749 number_of_places_left_in_vector = nunits;
1751 if (constant_p)
1752 vec_cst = build_vector (vector_type, t);
1753 else
1754 vec_cst = build_constructor_from_list (vector_type, t);
1755 VEC_quick_push (tree, voprnds,
1756 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1757 t = NULL_TREE;
1762 /* Since the vectors are created in the reverse order, we should invert
1763 them. */
1764 vec_num = VEC_length (tree, voprnds);
1765 for (j = vec_num - 1; j >= 0; j--)
1767 vop = VEC_index (tree, voprnds, j);
1768 VEC_quick_push (tree, *vec_oprnds, vop);
1771 VEC_free (tree, heap, voprnds);
1773 /* In case that VF is greater than the unrolling factor needed for the SLP
1774 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1775 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1776 to replicate the vectors. */
1777 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1779 tree neutral_vec = NULL;
1781 if (neutral_op)
1783 if (!neutral_vec)
1785 t = NULL;
1786 for (i = 0; i < (unsigned) nunits; i++)
1787 t = tree_cons (NULL_TREE, neutral_op, t);
1788 neutral_vec = build_vector (vector_type, t);
1791 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1793 else
1795 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1796 VEC_quick_push (tree, *vec_oprnds, vop);
1802 /* Get vectorized definitions from SLP_NODE that contains corresponding
1803 vectorized def-stmts. */
1805 static void
1806 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1808 tree vec_oprnd;
1809 gimple vec_def_stmt;
1810 unsigned int i;
1812 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1814 for (i = 0;
1815 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1816 i++)
1818 gcc_assert (vec_def_stmt);
1819 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1820 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1825 /* Get vectorized definitions for SLP_NODE.
1826 If the scalar definitions are loop invariants or constants, collect them and
1827 call vect_get_constant_vectors() to create vector stmts.
1828 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1829 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1830 vect_get_slp_vect_defs() to retrieve them.
1831 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1832 the right node. This is used when the second operand must remain scalar. */
1834 void
1835 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1836 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1838 gimple first_stmt;
1839 enum tree_code code;
1840 int number_of_vects;
1841 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1843 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1844 /* The number of vector defs is determined by the number of vector statements
1845 in the node from which we get those statements. */
1846 if (SLP_TREE_LEFT (slp_node))
1847 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1848 else
1850 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1851 /* Number of vector stmts was calculated according to LHS in
1852 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1853 necessary. See vect_get_smallest_scalar_type() for details. */
1854 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1855 &rhs_size_unit);
1856 if (rhs_size_unit != lhs_size_unit)
1858 number_of_vects *= rhs_size_unit;
1859 number_of_vects /= lhs_size_unit;
1863 /* Allocate memory for vectorized defs. */
1864 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1866 /* SLP_NODE corresponds either to a group of stores or to a group of
1867 unary/binary operations. We don't call this function for loads.
1868 For reduction defs we call vect_get_constant_vectors(), since we are
1869 looking for initial loop invariant values. */
1870 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1871 /* The defs are already vectorized. */
1872 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1873 else
1874 /* Build vectors from scalar defs. */
1875 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1876 reduc_index);
1878 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1879 /* Since we don't call this function with loads, this is a group of
1880 stores. */
1881 return;
1883 /* For reductions, we only need initial values. */
1884 if (reduc_index != -1)
1885 return;
1887 code = gimple_assign_rhs_code (first_stmt);
1888 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1889 return;
1891 /* The number of vector defs is determined by the number of vector statements
1892 in the node from which we get those statements. */
1893 if (SLP_TREE_RIGHT (slp_node))
1894 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1895 else
1896 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1898 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1900 if (SLP_TREE_RIGHT (slp_node))
1901 /* The defs are already vectorized. */
1902 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1903 else
1904 /* Build vectors from scalar defs. */
1905 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1909 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1910 building a vector of type MASK_TYPE from it) and two input vectors placed in
1911 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1912 shifting by STRIDE elements of DR_CHAIN for every copy.
1913 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1914 copies).
1915 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1916 the created stmts must be inserted. */
1918 static inline void
1919 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1920 tree mask, int first_vec_indx, int second_vec_indx,
1921 gimple_stmt_iterator *gsi, slp_tree node,
1922 tree builtin_decl, tree vectype,
1923 VEC(tree,heap) *dr_chain,
1924 int ncopies, int vect_stmts_counter)
1926 tree perm_dest;
1927 gimple perm_stmt = NULL;
1928 stmt_vec_info next_stmt_info;
1929 int i, stride;
1930 tree first_vec, second_vec, data_ref;
1932 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1934 /* Initialize the vect stmts of NODE to properly insert the generated
1935 stmts later. */
1936 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1937 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1938 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1940 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1941 for (i = 0; i < ncopies; i++)
1943 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1944 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1946 /* Generate the permute statement. */
1947 perm_stmt = gimple_build_call (builtin_decl,
1948 3, first_vec, second_vec, mask);
1949 data_ref = make_ssa_name (perm_dest, perm_stmt);
1950 gimple_call_set_lhs (perm_stmt, data_ref);
1951 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1953 /* Store the vector statement in NODE. */
1954 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1955 stride * i + vect_stmts_counter, perm_stmt);
1957 first_vec_indx += stride;
1958 second_vec_indx += stride;
1961 /* Mark the scalar stmt as vectorized. */
1962 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1963 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1967 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1968 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1969 representation. Check that the mask is valid and return FALSE if not.
1970 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1971 the next vector, i.e., the current first vector is not needed. */
1973 static bool
1974 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1975 int mask_nunits, bool only_one_vec, int index,
1976 int *mask, int *current_mask_element,
1977 bool *need_next_vector)
1979 int i;
1980 static int number_of_mask_fixes = 1;
1981 static bool mask_fixed = false;
1982 static bool needs_first_vector = false;
1984 /* Convert to target specific representation. */
1985 *current_mask_element = first_mask_element + m;
1986 /* Adjust the value in case it's a mask for second and third vectors. */
1987 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1989 if (*current_mask_element < mask_nunits)
1990 needs_first_vector = true;
1992 /* We have only one input vector to permute but the mask accesses values in
1993 the next vector as well. */
1994 if (only_one_vec && *current_mask_element >= mask_nunits)
1996 if (vect_print_dump_info (REPORT_DETAILS))
1998 fprintf (vect_dump, "permutation requires at least two vectors ");
1999 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2002 return false;
2005 /* The mask requires the next vector. */
2006 if (*current_mask_element >= mask_nunits * 2)
2008 if (needs_first_vector || mask_fixed)
2010 /* We either need the first vector too or have already moved to the
2011 next vector. In both cases, this permutation needs three
2012 vectors. */
2013 if (vect_print_dump_info (REPORT_DETAILS))
2015 fprintf (vect_dump, "permutation requires at "
2016 "least three vectors ");
2017 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2020 return false;
2023 /* We move to the next vector, dropping the first one and working with
2024 the second and the third - we need to adjust the values of the mask
2025 accordingly. */
2026 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2028 for (i = 0; i < index; i++)
2029 mask[i] -= mask_nunits * number_of_mask_fixes;
2031 (number_of_mask_fixes)++;
2032 mask_fixed = true;
2035 *need_next_vector = mask_fixed;
2037 /* This was the last element of this mask. Start a new one. */
2038 if (index == mask_nunits - 1)
2040 number_of_mask_fixes = 1;
2041 mask_fixed = false;
2042 needs_first_vector = false;
2045 return true;
2049 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2050 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2051 permute statements for SLP_NODE_INSTANCE. */
2052 bool
2053 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2054 gimple_stmt_iterator *gsi, int vf,
2055 slp_instance slp_node_instance, bool analyze_only)
2057 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058 tree mask_element_type = NULL_TREE, mask_type;
2059 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2060 slp_tree node;
2061 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2062 gimple next_scalar_stmt;
2063 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2064 int first_mask_element;
2065 int index, unroll_factor, *mask, current_mask_element, ncopies;
2066 bool only_one_vec = false, need_next_vector = false;
2067 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2069 if (!targetm.vectorize.builtin_vec_perm)
2071 if (vect_print_dump_info (REPORT_DETAILS))
2073 fprintf (vect_dump, "no builtin for vect permute for ");
2074 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2077 return false;
2080 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2081 &mask_element_type);
2082 if (!builtin_decl || !mask_element_type)
2084 if (vect_print_dump_info (REPORT_DETAILS))
2086 fprintf (vect_dump, "no builtin for vect permute for ");
2087 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2090 return false;
2093 mask_type = get_vectype_for_scalar_type (mask_element_type);
2094 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2095 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2096 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2097 scale = mask_nunits / nunits;
2098 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2100 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2101 unrolling factor. */
2102 orig_vec_stmts_num = group_size *
2103 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2104 if (orig_vec_stmts_num == 1)
2105 only_one_vec = true;
2107 /* Number of copies is determined by the final vectorization factor
2108 relatively to SLP_NODE_INSTANCE unrolling factor. */
2109 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2111 /* Generate permutation masks for every NODE. Number of masks for each NODE
2112 is equal to GROUP_SIZE.
2113 E.g., we have a group of three nodes with three loads from the same
2114 location in each node, and the vector size is 4. I.e., we have a
2115 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2116 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2117 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2120 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2121 scpecific type, e.g., in bytes for Altivec.
2122 The last mask is illegal since we assume two operands for permute
2123 operation, and the mask element values can't be outside that range. Hence,
2124 the last mask must be converted into {2,5,5,5}.
2125 For the first two permutations we need the first and the second input
2126 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2127 we need the second and the third vectors: {b1,c1,a2,b2} and
2128 {c2,a3,b3,c3}. */
2130 for (i = 0;
2131 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2132 i, node);
2133 i++)
2135 scalar_index = 0;
2136 index = 0;
2137 vect_stmts_counter = 0;
2138 vec_index = 0;
2139 first_vec_index = vec_index++;
2140 if (only_one_vec)
2141 second_vec_index = first_vec_index;
2142 else
2143 second_vec_index = vec_index++;
2145 for (j = 0; j < unroll_factor; j++)
2147 for (k = 0; k < group_size; k++)
2149 first_mask_element = (i + j * group_size) * scale;
2150 for (m = 0; m < scale; m++)
2152 if (!vect_get_mask_element (stmt, first_mask_element, m,
2153 mask_nunits, only_one_vec, index, mask,
2154 &current_mask_element, &need_next_vector))
2155 return false;
2157 mask[index++] = current_mask_element;
2160 if (index == mask_nunits)
2162 tree mask_vec = NULL;
2164 while (--index >= 0)
2166 tree t = build_int_cst (mask_element_type, mask[index]);
2167 mask_vec = tree_cons (NULL, t, mask_vec);
2169 mask_vec = build_vector (mask_type, mask_vec);
2170 index = 0;
2172 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2173 mask_vec))
2175 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "unsupported vect permute ");
2178 print_generic_expr (vect_dump, mask_vec, 0);
2180 free (mask);
2181 return false;
2184 if (!analyze_only)
2186 if (need_next_vector)
2188 first_vec_index = second_vec_index;
2189 second_vec_index = vec_index;
2192 next_scalar_stmt = VEC_index (gimple,
2193 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2195 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2196 mask_vec, first_vec_index, second_vec_index,
2197 gsi, node, builtin_decl, vectype, dr_chain,
2198 ncopies, vect_stmts_counter++);
2205 free (mask);
2206 return true;
2211 /* Vectorize SLP instance tree in postorder. */
2213 static bool
2214 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2215 unsigned int vectorization_factor)
2217 gimple stmt;
2218 bool strided_store, is_store;
2219 gimple_stmt_iterator si;
2220 stmt_vec_info stmt_info;
2221 unsigned int vec_stmts_size, nunits, group_size;
2222 tree vectype;
2223 int i;
2224 slp_tree loads_node;
2226 if (!node)
2227 return false;
2229 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2230 vectorization_factor);
2231 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2232 vectorization_factor);
2234 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2235 stmt_info = vinfo_for_stmt (stmt);
2237 /* VECTYPE is the type of the destination. */
2238 vectype = STMT_VINFO_VECTYPE (stmt_info);
2239 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2240 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2242 /* For each SLP instance calculate number of vector stmts to be created
2243 for the scalar stmts in each node of the SLP tree. Number of vector
2244 elements in one vector iteration is the number of scalar elements in
2245 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2246 size. */
2247 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2249 /* In case of load permutation we have to allocate vectorized statements for
2250 all the nodes that participate in that permutation. */
2251 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2253 for (i = 0;
2254 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2255 i++)
2257 if (!SLP_TREE_VEC_STMTS (loads_node))
2259 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2260 vec_stmts_size);
2261 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2266 if (!SLP_TREE_VEC_STMTS (node))
2268 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2269 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2272 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2275 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2278 /* Loads should be inserted before the first load. */
2279 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2280 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2281 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2282 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2283 else
2284 si = gsi_for_stmt (stmt);
2286 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2287 return is_store;
2291 bool
2292 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2294 VEC (slp_instance, heap) *slp_instances;
2295 slp_instance instance;
2296 unsigned int i, vf;
2297 bool is_store = false;
2299 if (loop_vinfo)
2301 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2302 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2304 else
2306 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2307 vf = 1;
2310 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2312 /* Schedule the tree of INSTANCE. */
2313 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2314 instance, vf);
2315 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2316 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2317 fprintf (vect_dump, "vectorizing stmts using SLP.");
2320 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2322 slp_tree root = SLP_INSTANCE_TREE (instance);
2323 gimple store;
2324 unsigned int j;
2325 gimple_stmt_iterator gsi;
2327 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2328 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2330 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2331 break;
2333 /* Free the attached stmt_vec_info and remove the stmt. */
2334 gsi = gsi_for_stmt (store);
2335 gsi_remove (&gsi, true);
2336 free_stmt_vec_info (store);
2340 return is_store;
2344 /* Vectorize the basic block. */
2346 void
2347 vect_slp_transform_bb (basic_block bb)
2349 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2350 gimple_stmt_iterator si;
2352 gcc_assert (bb_vinfo);
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "SLPing BB\n");
2357 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2359 gimple stmt = gsi_stmt (si);
2360 stmt_vec_info stmt_info;
2362 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "------>SLPing statement: ");
2365 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2368 stmt_info = vinfo_for_stmt (stmt);
2369 gcc_assert (stmt_info);
2371 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2372 if (STMT_SLP_TYPE (stmt_info))
2374 vect_schedule_slp (NULL, bb_vinfo);
2375 break;
2379 mark_sym_for_renaming (gimple_vop (cfun));
2380 /* The memory tags and pointers in vectorized statements need to
2381 have their SSA forms updated. FIXME, why can't this be delayed
2382 until all the loops have been transformed? */
2383 update_ssa (TODO_update_ssa);
2385 if (vect_print_dump_info (REPORT_DETAILS))
2386 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2388 destroy_bb_vec_info (bb_vinfo);