common.opt (fshow-column): Don't mark as C ObjC C++ ObjC++.
[official-gcc.git] / gcc / tree-vect-slp.c
blob41c01b977093c8b2aed838db3dbad99c6eea5ae5
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
221 else
223 if (!*first_stmt_dt1 && i == 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
235 if (vect_print_dump_info (REPORT_SLP))
236 fprintf (vect_dump, "Build SLP failed: two constant "
237 "oprnds in stmt");
238 return false;
240 *first_stmt_const_oprnd = oprnd;
243 else
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
247 if ((i == 0
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
252 || (i == 1
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
257 || (!def
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
261 if (vect_print_dump_info (REPORT_SLP))
262 fprintf (vect_dump, "Build SLP failed: different types ");
264 return false;
269 /* Check the types of the definitions. */
270 switch (dt[i])
272 case vect_constant_def:
273 case vect_external_def:
274 break;
276 case vect_internal_def:
277 case vect_reduction_def:
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
284 default:
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP))
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
292 return false;
296 return true;
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
303 TRUE. */
305 static bool
306 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
310 VEC (int, heap) **load_permutation,
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
326 unsigned int ncopies;
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
337 gimple first_load, prev_first_load = NULL;
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
342 if (vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 if (vect_print_dump_info (REPORT_SLP))
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 return false;
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 return false;
374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
378 if (vect_print_dump_info (REPORT_SLP))
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 return false;
386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
387 if (ncopies != 1)
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
406 /* Check the operation. */
407 if (i == 0)
409 first_stmt_code = rhs_code;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
417 vec_mode = TYPE_MODE (vectype);
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
423 if (!optab
424 || (optab->handlers[(int) vec_mode].insn_code
425 == CODE_FOR_nothing))
427 /* No vector/vector shift, try for a vector/scalar shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
429 optab_scalar);
431 if (!optab)
433 if (vect_print_dump_info (REPORT_SLP))
434 fprintf (vect_dump, "Build SLP failed: no optab.");
435 return false;
437 icode = (int) optab->handlers[(int) vec_mode].insn_code;
438 if (icode == CODE_FOR_nothing)
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: "
442 "op not supported by target.");
443 return false;
445 optab_op2_mode = insn_data[icode].operand[2].mode;
446 if (!VECTOR_MODE_P (optab_op2_mode))
448 need_same_oprnds = true;
449 first_op1 = gimple_assign_rhs2 (stmt);
454 else
456 if (first_stmt_code != rhs_code
457 && (first_stmt_code != IMAGPART_EXPR
458 || rhs_code != REALPART_EXPR)
459 && (first_stmt_code != REALPART_EXPR
460 || rhs_code != IMAGPART_EXPR))
462 if (vect_print_dump_info (REPORT_SLP))
464 fprintf (vect_dump,
465 "Build SLP failed: different operation in stmt ");
466 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
469 return false;
472 if (need_same_oprnds
473 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
475 if (vect_print_dump_info (REPORT_SLP))
477 fprintf (vect_dump,
478 "Build SLP failed: different shift arguments in ");
479 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
482 return false;
486 /* Strided store or load. */
487 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
489 if (REFERENCE_CLASS_P (lhs))
491 /* Store. */
492 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
493 stmt, &def_stmts0, &def_stmts1,
494 &first_stmt_dt0,
495 &first_stmt_dt1,
496 &first_stmt_def0_type,
497 &first_stmt_def1_type,
498 &first_stmt_const_oprnd,
499 ncopies_for_cost,
500 &pattern0, &pattern1))
501 return false;
503 else
505 /* Load. */
506 /* FORNOW: Check that there is no gap between the loads. */
507 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
508 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
509 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
510 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: strided "
515 "loads have gaps ");
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
519 return false;
522 /* Check that the size of interleaved loads group is not
523 greater than the SLP group size. */
524 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
526 if (vect_print_dump_info (REPORT_SLP))
528 fprintf (vect_dump, "Build SLP failed: the number of "
529 "interleaved loads is greater than"
530 " the SLP group size ");
531 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
534 return false;
537 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
538 if (prev_first_load)
540 /* Check that there are no loads from different interleaving
541 chains in the same node. The only exception is complex
542 numbers. */
543 if (prev_first_load != first_load
544 && rhs_code != REALPART_EXPR
545 && rhs_code != IMAGPART_EXPR)
547 if (vect_print_dump_info (REPORT_SLP))
549 fprintf (vect_dump, "Build SLP failed: different "
550 "interleaving chains in one node ");
551 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
554 return false;
557 else
558 prev_first_load = first_load;
560 if (first_load == stmt)
562 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
563 if (vect_supportable_dr_alignment (first_dr)
564 == dr_unaligned_unsupported)
566 if (vect_print_dump_info (REPORT_SLP))
568 fprintf (vect_dump, "Build SLP failed: unsupported "
569 "unaligned load ");
570 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
573 return false;
576 /* Analyze costs (for the first stmt in the group). */
577 vect_model_load_cost (vinfo_for_stmt (stmt),
578 ncopies_for_cost, *node);
581 /* Store the place of this load in the interleaving chain. In
582 case that permutation is needed we later decide if a specific
583 permutation is supported. */
584 load_place = vect_get_place_in_interleaving_chain (stmt,
585 first_load);
586 if (load_place != i)
587 permutation = true;
589 VEC_safe_push (int, heap, *load_permutation, load_place);
591 /* We stop the tree when we reach a group of loads. */
592 stop_recursion = true;
593 continue;
595 } /* Strided access. */
596 else
598 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
600 /* Not strided load. */
601 if (vect_print_dump_info (REPORT_SLP))
603 fprintf (vect_dump, "Build SLP failed: not strided load ");
604 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
607 /* FORNOW: Not strided loads are not supported. */
608 return false;
611 /* Not memory operation. */
612 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
613 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
615 if (vect_print_dump_info (REPORT_SLP))
617 fprintf (vect_dump, "Build SLP failed: operation");
618 fprintf (vect_dump, " unsupported ");
619 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
622 return false;
625 /* Find the def-stmts. */
626 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
627 &def_stmts0, &def_stmts1,
628 &first_stmt_dt0, &first_stmt_dt1,
629 &first_stmt_def0_type,
630 &first_stmt_def1_type,
631 &first_stmt_const_oprnd,
632 ncopies_for_cost,
633 &pattern0, &pattern1))
634 return false;
638 /* Add the costs of the node to the overall instance costs. */
639 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
640 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
642 /* Strided loads were reached - stop the recursion. */
643 if (stop_recursion)
645 if (permutation)
647 VEC_safe_push (slp_tree, heap, *loads, *node);
648 *inside_cost
649 += targetm.vectorize.builtin_vectorization_cost (vec_perm)
650 * group_size;
653 return true;
656 /* Create SLP_TREE nodes for the definition node/s. */
657 if (first_stmt_dt0 == vect_internal_def)
659 slp_tree left_node = XNEW (struct _slp_tree);
660 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
661 SLP_TREE_VEC_STMTS (left_node) = NULL;
662 SLP_TREE_LEFT (left_node) = NULL;
663 SLP_TREE_RIGHT (left_node) = NULL;
664 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
665 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
666 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
667 inside_cost, outside_cost, ncopies_for_cost,
668 max_nunits, load_permutation, loads,
669 vectorization_factor))
670 return false;
672 SLP_TREE_LEFT (*node) = left_node;
675 if (first_stmt_dt1 == vect_internal_def)
677 slp_tree right_node = XNEW (struct _slp_tree);
678 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
679 SLP_TREE_VEC_STMTS (right_node) = NULL;
680 SLP_TREE_LEFT (right_node) = NULL;
681 SLP_TREE_RIGHT (right_node) = NULL;
682 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
683 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
684 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
685 inside_cost, outside_cost, ncopies_for_cost,
686 max_nunits, load_permutation, loads,
687 vectorization_factor))
688 return false;
690 SLP_TREE_RIGHT (*node) = right_node;
693 return true;
697 static void
698 vect_print_slp_tree (slp_tree node)
700 int i;
701 gimple stmt;
703 if (!node)
704 return;
706 fprintf (vect_dump, "node ");
707 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
709 fprintf (vect_dump, "\n\tstmt %d ", i);
710 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
712 fprintf (vect_dump, "\n");
714 vect_print_slp_tree (SLP_TREE_LEFT (node));
715 vect_print_slp_tree (SLP_TREE_RIGHT (node));
719 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
720 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
721 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
722 stmts in NODE are to be marked. */
724 static void
725 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
727 int i;
728 gimple stmt;
730 if (!node)
731 return;
733 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
734 if (j < 0 || i == j)
735 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
737 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
738 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
742 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
744 static void
745 vect_mark_slp_stmts_relevant (slp_tree node)
747 int i;
748 gimple stmt;
749 stmt_vec_info stmt_info;
751 if (!node)
752 return;
754 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
756 stmt_info = vinfo_for_stmt (stmt);
757 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
758 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
759 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
762 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
763 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
767 /* Check if the permutation required by the SLP INSTANCE is supported.
768 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
770 static bool
771 vect_supported_slp_permutation_p (slp_instance instance)
773 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
774 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
775 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
776 VEC (slp_tree, heap) *sorted_loads = NULL;
777 int index;
778 slp_tree *tmp_loads = NULL;
779 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
780 slp_tree load;
782 /* FORNOW: The only supported loads permutation is loads from the same
783 location in all the loads in the node, when the data-refs in
784 nodes of LOADS constitute an interleaving chain.
785 Sort the nodes according to the order of accesses in the chain. */
786 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
787 for (i = 0, j = 0;
788 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
789 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
790 i += group_size, j++)
792 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
793 /* Check that the loads are all in the same interleaving chain. */
794 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
796 if (vect_print_dump_info (REPORT_DETAILS))
798 fprintf (vect_dump, "Build SLP failed: unsupported data "
799 "permutation ");
800 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
803 free (tmp_loads);
804 return false;
807 tmp_loads[index] = load;
810 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
811 for (i = 0; i < group_size; i++)
812 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
814 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
815 SLP_INSTANCE_LOADS (instance) = sorted_loads;
816 free (tmp_loads);
818 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
819 SLP_INSTANCE_UNROLLING_FACTOR (instance),
820 instance, true))
821 return false;
823 return true;
827 /* Rearrange the statements of NODE according to PERMUTATION. */
829 static void
830 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
831 VEC (int, heap) *permutation)
833 gimple stmt;
834 VEC (gimple, heap) *tmp_stmts;
835 unsigned int index, i;
837 if (!node)
838 return;
840 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
841 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
843 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
844 tmp_stmts = VEC_alloc (gimple, heap, group_size);
846 for (i = 0; i < group_size; i++)
847 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
849 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
851 index = VEC_index (int, permutation, i);
852 VEC_replace (gimple, tmp_stmts, index, stmt);
855 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
856 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
860 /* Check if the required load permutation is supported.
861 LOAD_PERMUTATION contains a list of indices of the loads.
862 In SLP this permutation is relative to the order of strided stores that are
863 the base of the SLP instance. */
865 static bool
866 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
867 VEC (int, heap) *load_permutation)
869 int i = 0, j, prev = -1, next, k, number_of_groups;
870 bool supported, bad_permutation = false;
871 sbitmap load_index;
872 slp_tree node;
873 gimple stmt;
875 /* FORNOW: permutations are only supported in SLP. */
876 if (!slp_instn)
877 return false;
879 if (vect_print_dump_info (REPORT_SLP))
881 fprintf (vect_dump, "Load permutation ");
882 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
883 fprintf (vect_dump, "%d ", next);
886 /* In case of reduction every load permutation is allowed, since the order
887 of the reduction statements is not important (as opposed to the case of
888 strided stores). The only condition we need to check is that all the
889 load nodes are of the same size and have the same permutation (and then
890 rearrange all the nodes of the SLP instance according to this
891 permutation). */
893 /* Check that all the load nodes are of the same size. */
894 for (i = 0;
895 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
896 i++)
897 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
898 != (unsigned) group_size)
899 return false;
901 node = SLP_INSTANCE_TREE (slp_instn);
902 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
903 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
904 instance, not all the loads belong to the same node or interleaving
905 group. Hence, we need to divide them into groups according to
906 GROUP_SIZE. */
907 number_of_groups = VEC_length (int, load_permutation) / group_size;
909 /* Reduction (there are no data-refs in the root). */
910 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
912 int first_group_load_index;
914 /* Compare all the permutation sequences to the first one. */
915 for (i = 1; i < number_of_groups; i++)
917 k = 0;
918 for (j = i * group_size; j < i * group_size + group_size; j++)
920 next = VEC_index (int, load_permutation, j);
921 first_group_load_index = VEC_index (int, load_permutation, k);
923 if (next != first_group_load_index)
925 bad_permutation = true;
926 break;
929 k++;
932 if (bad_permutation)
933 break;
936 if (!bad_permutation)
938 /* This permutaion is valid for reduction. Since the order of the
939 statements in the nodes is not important unless they are memory
940 accesses, we can rearrange the statements in all the nodes
941 according to the order of the loads. */
942 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
943 load_permutation);
944 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
945 return true;
949 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
950 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
951 well (unless it's reduction). */
952 if (VEC_length (int, load_permutation)
953 != (unsigned int) (group_size * group_size))
954 return false;
956 supported = true;
957 load_index = sbitmap_alloc (group_size);
958 sbitmap_zero (load_index);
959 for (j = 0; j < group_size; j++)
961 for (i = j * group_size, k = 0;
962 VEC_iterate (int, load_permutation, i, next) && k < group_size;
963 i++, k++)
965 if (i != j * group_size && next != prev)
967 supported = false;
968 break;
971 prev = next;
974 if (TEST_BIT (load_index, prev))
976 supported = false;
977 break;
980 SET_BIT (load_index, prev);
983 for (j = 0; j < group_size; j++)
984 if (!TEST_BIT (load_index, j))
985 return false;
987 sbitmap_free (load_index);
989 if (supported && i == group_size * group_size
990 && vect_supported_slp_permutation_p (slp_instn))
991 return true;
993 return false;
997 /* Find the first load in the loop that belongs to INSTANCE.
998 When loads are in several SLP nodes, there can be a case in which the first
999 load does not appear in the first SLP node to be transformed, causing
1000 incorrect order of statements. Since we generate all the loads together,
1001 they must be inserted before the first load of the SLP instance and not
1002 before the first load of the first node of the instance. */
1003 static gimple
1004 vect_find_first_load_in_slp_instance (slp_instance instance)
1006 int i, j;
1007 slp_tree load_node;
1008 gimple first_load = NULL, load;
1010 for (i = 0;
1011 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1012 i++)
1013 for (j = 0;
1014 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1015 j++)
1016 first_load = get_earlier_stmt (load, first_load);
1018 return first_load;
1022 /* Analyze an SLP instance starting from a group of strided stores. Call
1023 vect_build_slp_tree to build a tree of packed stmts if possible.
1024 Return FALSE if it's impossible to SLP any stmt in the loop. */
1026 static bool
1027 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1028 gimple stmt)
1030 slp_instance new_instance;
1031 slp_tree node = XNEW (struct _slp_tree);
1032 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1033 unsigned int unrolling_factor = 1, nunits;
1034 tree vectype, scalar_type = NULL_TREE;
1035 gimple next;
1036 unsigned int vectorization_factor = 0;
1037 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1038 unsigned int max_nunits = 0;
1039 VEC (int, heap) *load_permutation;
1040 VEC (slp_tree, heap) *loads;
1041 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1043 if (dr)
1045 scalar_type = TREE_TYPE (DR_REF (dr));
1046 vectype = get_vectype_for_scalar_type (scalar_type);
1047 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1049 else
1051 gcc_assert (loop_vinfo);
1052 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1053 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1056 if (!vectype)
1058 if (vect_print_dump_info (REPORT_SLP))
1060 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1061 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1064 return false;
1067 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1068 if (loop_vinfo)
1069 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1070 else
1071 /* No multitypes in BB SLP. */
1072 vectorization_factor = nunits;
1074 /* Calculate the unrolling factor. */
1075 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1076 if (unrolling_factor != 1 && !loop_vinfo)
1078 if (vect_print_dump_info (REPORT_SLP))
1079 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1080 " block SLP");
1082 return false;
1085 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1086 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1087 next = stmt;
1088 if (dr)
1090 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1091 while (next)
1093 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1094 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1097 else
1099 /* Collect reduction statements. */
1100 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1101 next);
1102 i++)
1104 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1105 if (vect_print_dump_info (REPORT_DETAILS))
1107 fprintf (vect_dump, "pushing reduction into node: ");
1108 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1113 SLP_TREE_VEC_STMTS (node) = NULL;
1114 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1115 SLP_TREE_LEFT (node) = NULL;
1116 SLP_TREE_RIGHT (node) = NULL;
1117 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1118 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1120 /* Calculate the number of vector stmts to create based on the unrolling
1121 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1122 GROUP_SIZE / NUNITS otherwise. */
1123 ncopies_for_cost = unrolling_factor * group_size / nunits;
1125 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1126 loads = VEC_alloc (slp_tree, heap, group_size);
1128 /* Build the tree for the SLP instance. */
1129 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1130 &inside_cost, &outside_cost, ncopies_for_cost,
1131 &max_nunits, &load_permutation, &loads,
1132 vectorization_factor))
1134 /* Create a new SLP instance. */
1135 new_instance = XNEW (struct _slp_instance);
1136 SLP_INSTANCE_TREE (new_instance) = node;
1137 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1138 /* Calculate the unrolling factor based on the smallest type in the
1139 loop. */
1140 if (max_nunits > nunits)
1141 unrolling_factor = least_common_multiple (max_nunits, group_size)
1142 / group_size;
1144 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1145 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1146 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1147 SLP_INSTANCE_LOADS (new_instance) = loads;
1148 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1149 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1150 if (VEC_length (slp_tree, loads))
1152 if (!vect_supported_load_permutation_p (new_instance, group_size,
1153 load_permutation))
1155 if (vect_print_dump_info (REPORT_SLP))
1157 fprintf (vect_dump, "Build SLP failed: unsupported load "
1158 "permutation ");
1159 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1162 vect_free_slp_instance (new_instance);
1163 return false;
1166 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1167 = vect_find_first_load_in_slp_instance (new_instance);
1169 else
1170 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1172 if (loop_vinfo)
1173 VEC_safe_push (slp_instance, heap,
1174 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1175 new_instance);
1176 else
1177 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1178 new_instance);
1180 if (vect_print_dump_info (REPORT_SLP))
1181 vect_print_slp_tree (node);
1183 return true;
1186 /* Failed to SLP. */
1187 /* Free the allocated memory. */
1188 vect_free_slp_tree (node);
1189 VEC_free (int, heap, load_permutation);
1190 VEC_free (slp_tree, heap, loads);
1192 return false;
1196 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1197 trees of packed scalar stmts if SLP is possible. */
1199 bool
1200 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1202 unsigned int i;
1203 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1204 gimple store;
1205 bool ok = false;
1207 if (vect_print_dump_info (REPORT_SLP))
1208 fprintf (vect_dump, "=== vect_analyze_slp ===");
1210 if (loop_vinfo)
1212 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1213 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1215 else
1216 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1218 /* Find SLP sequences starting from groups of strided stores. */
1219 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1220 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1221 ok = true;
1223 if (bb_vinfo && !ok)
1225 if (vect_print_dump_info (REPORT_SLP))
1226 fprintf (vect_dump, "Failed to SLP the basic block.");
1228 return false;
1231 /* Find SLP sequences starting from groups of reductions. */
1232 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1233 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1234 VEC_index (gimple, reductions, 0)))
1235 ok = true;
1237 return true;
1241 /* For each possible SLP instance decide whether to SLP it and calculate overall
1242 unrolling factor needed to SLP the loop. */
1244 void
1245 vect_make_slp_decision (loop_vec_info loop_vinfo)
1247 unsigned int i, unrolling_factor = 1;
1248 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1249 slp_instance instance;
1250 int decided_to_slp = 0;
1252 if (vect_print_dump_info (REPORT_SLP))
1253 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1255 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1257 /* FORNOW: SLP if you can. */
1258 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1259 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1261 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1262 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1263 loop-based vectorization. Such stmts will be marked as HYBRID. */
1264 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1265 decided_to_slp++;
1268 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1270 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1271 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1272 decided_to_slp, unrolling_factor);
1276 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1277 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1279 static void
1280 vect_detect_hybrid_slp_stmts (slp_tree node)
1282 int i;
1283 gimple stmt;
1284 imm_use_iterator imm_iter;
1285 gimple use_stmt;
1286 stmt_vec_info stmt_vinfo;
1288 if (!node)
1289 return;
1291 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1292 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1293 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1294 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1295 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1296 && !STMT_SLP_TYPE (stmt_vinfo)
1297 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1298 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1299 && !(gimple_code (use_stmt) == GIMPLE_PHI
1300 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1301 == vect_reduction_def))
1302 vect_mark_slp_stmts (node, hybrid, i);
1304 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1305 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1309 /* Find stmts that must be both vectorized and SLPed. */
1311 void
1312 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1314 unsigned int i;
1315 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1316 slp_instance instance;
1318 if (vect_print_dump_info (REPORT_SLP))
1319 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1321 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1322 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1326 /* Create and initialize a new bb_vec_info struct for BB, as well as
1327 stmt_vec_info structs for all the stmts in it. */
1329 static bb_vec_info
1330 new_bb_vec_info (basic_block bb)
1332 bb_vec_info res = NULL;
1333 gimple_stmt_iterator gsi;
1335 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1336 BB_VINFO_BB (res) = bb;
1338 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1340 gimple stmt = gsi_stmt (gsi);
1341 gimple_set_uid (stmt, 0);
1342 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1345 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1346 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1348 bb->aux = res;
1349 return res;
1353 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1354 stmts in the basic block. */
1356 static void
1357 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1359 basic_block bb;
1360 gimple_stmt_iterator si;
1362 if (!bb_vinfo)
1363 return;
1365 bb = BB_VINFO_BB (bb_vinfo);
1367 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1369 gimple stmt = gsi_stmt (si);
1370 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1372 if (stmt_info)
1373 /* Free stmt_vec_info. */
1374 free_stmt_vec_info (stmt);
1377 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1378 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1379 free (bb_vinfo);
1380 bb->aux = NULL;
1384 /* Analyze statements contained in SLP tree node after recursively analyzing
1385 the subtree. Return TRUE if the operations are supported. */
1387 static bool
1388 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1390 bool dummy;
1391 int i;
1392 gimple stmt;
1394 if (!node)
1395 return true;
1397 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1398 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1399 return false;
1401 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1404 gcc_assert (stmt_info);
1405 gcc_assert (PURE_SLP_STMT (stmt_info));
1407 if (!vect_analyze_stmt (stmt, &dummy, node))
1408 return false;
1411 return true;
1415 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1416 operations are supported. */
1418 static bool
1419 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1421 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1422 slp_instance instance;
1423 int i;
1425 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1427 if (!vect_slp_analyze_node_operations (bb_vinfo,
1428 SLP_INSTANCE_TREE (instance)))
1430 vect_free_slp_instance (instance);
1431 VEC_ordered_remove (slp_instance, slp_instances, i);
1433 else
1434 i++;
1437 if (!VEC_length (slp_instance, slp_instances))
1438 return false;
1440 return true;
1444 /* Cheick if the basic block can be vectorized. */
1446 bb_vec_info
1447 vect_slp_analyze_bb (basic_block bb)
1449 bb_vec_info bb_vinfo;
1450 VEC (ddr_p, heap) *ddrs;
1451 VEC (slp_instance, heap) *slp_instances;
1452 slp_instance instance;
1453 int i, insns = 0;
1454 gimple_stmt_iterator gsi;
1455 int min_vf = 2;
1456 int max_vf = MAX_VECTORIZATION_FACTOR;
1458 if (vect_print_dump_info (REPORT_DETAILS))
1459 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1461 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1463 gimple stmt = gsi_stmt (gsi);
1464 if (!is_gimple_debug (stmt)
1465 && !gimple_nop_p (stmt)
1466 && gimple_code (stmt) != GIMPLE_LABEL)
1467 insns++;
1470 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1472 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1473 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1474 "block.\n");
1476 return NULL;
1479 bb_vinfo = new_bb_vec_info (bb);
1480 if (!bb_vinfo)
1481 return NULL;
1483 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1485 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1486 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1487 "block.\n");
1489 destroy_bb_vec_info (bb_vinfo);
1490 return NULL;
1493 ddrs = BB_VINFO_DDRS (bb_vinfo);
1494 if (!VEC_length (ddr_p, ddrs))
1496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1497 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1498 "block.\n");
1500 destroy_bb_vec_info (bb_vinfo);
1501 return NULL;
1504 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1505 || min_vf > max_vf)
1507 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1508 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1509 "in basic block.\n");
1511 destroy_bb_vec_info (bb_vinfo);
1512 return NULL;
1515 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1518 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1519 "block.\n");
1521 destroy_bb_vec_info (bb_vinfo);
1522 return NULL;
1525 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1527 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1528 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1529 "block.\n");
1531 destroy_bb_vec_info (bb_vinfo);
1532 return NULL;
1535 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1538 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1539 "block.\n");
1541 destroy_bb_vec_info (bb_vinfo);
1542 return NULL;
1545 /* Check the SLP opportunities in the basic block, analyze and build SLP
1546 trees. */
1547 if (!vect_analyze_slp (NULL, bb_vinfo))
1549 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1550 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1551 "in basic block.\n");
1553 destroy_bb_vec_info (bb_vinfo);
1554 return NULL;
1557 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1559 /* Mark all the statements that we want to vectorize as pure SLP and
1560 relevant. */
1561 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1563 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1564 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1567 if (!vect_slp_analyze_operations (bb_vinfo))
1569 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1570 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1572 destroy_bb_vec_info (bb_vinfo);
1573 return NULL;
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1579 return bb_vinfo;
1583 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1584 the number of created vector stmts depends on the unrolling factor). However,
1585 the actual number of vector stmts for every SLP node depends on VF which is
1586 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1587 In this function we assume that the inside costs calculated in
1588 vect_model_xxx_cost are linear in ncopies. */
1590 void
1591 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1593 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1594 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1595 slp_instance instance;
1597 if (vect_print_dump_info (REPORT_SLP))
1598 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1600 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1601 /* We assume that costs are linear in ncopies. */
1602 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1603 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1607 /* For constant and loop invariant defs of SLP_NODE this function returns
1608 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1609 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1610 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1611 REDUC_INDEX is the index of the reduction operand in the statements, unless
1612 it is -1. */
1614 static void
1615 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1616 unsigned int op_num, unsigned int number_of_vectors,
1617 int reduc_index)
1619 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1620 gimple stmt = VEC_index (gimple, stmts, 0);
1621 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1622 int nunits;
1623 tree vec_cst;
1624 tree t = NULL_TREE;
1625 int j, number_of_places_left_in_vector;
1626 tree vector_type;
1627 tree op, vop;
1628 int group_size = VEC_length (gimple, stmts);
1629 unsigned int vec_num, i;
1630 int number_of_copies = 1;
1631 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1632 bool constant_p, is_store;
1633 tree neutral_op = NULL;
1635 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1637 enum tree_code code = gimple_assign_rhs_code (stmt);
1638 if (reduc_index == -1)
1640 VEC_free (tree, heap, *vec_oprnds);
1641 return;
1644 op_num = reduc_index - 1;
1645 op = gimple_op (stmt, op_num + 1);
1646 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1647 we need either neutral operands or the original operands. See
1648 get_initial_def_for_reduction() for details. */
1649 switch (code)
1651 case WIDEN_SUM_EXPR:
1652 case DOT_PROD_EXPR:
1653 case PLUS_EXPR:
1654 case MINUS_EXPR:
1655 case BIT_IOR_EXPR:
1656 case BIT_XOR_EXPR:
1657 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1658 neutral_op = build_real (TREE_TYPE (op), dconst0);
1659 else
1660 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1662 break;
1664 case MULT_EXPR:
1665 case BIT_AND_EXPR:
1666 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1667 neutral_op = build_real (TREE_TYPE (op), dconst1);
1668 else
1669 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1671 break;
1673 default:
1674 neutral_op = NULL;
1678 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1680 is_store = true;
1681 op = gimple_assign_rhs1 (stmt);
1683 else
1685 is_store = false;
1686 op = gimple_op (stmt, op_num + 1);
1689 if (CONSTANT_CLASS_P (op))
1690 constant_p = true;
1691 else
1692 constant_p = false;
1694 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1695 gcc_assert (vector_type);
1697 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1699 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1700 created vectors. It is greater than 1 if unrolling is performed.
1702 For example, we have two scalar operands, s1 and s2 (e.g., group of
1703 strided accesses of size two), while NUNITS is four (i.e., four scalars
1704 of this type can be packed in a vector). The output vector will contain
1705 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1706 will be 2).
1708 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1709 containing the operands.
1711 For example, NUNITS is four as before, and the group size is 8
1712 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1713 {s5, s6, s7, s8}. */
1715 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1717 number_of_places_left_in_vector = nunits;
1718 for (j = 0; j < number_of_copies; j++)
1720 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1722 if (is_store)
1723 op = gimple_assign_rhs1 (stmt);
1724 else
1725 op = gimple_op (stmt, op_num + 1);
1727 if (reduc_index != -1)
1729 struct loop *loop = (gimple_bb (stmt))->loop_father;
1730 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1732 gcc_assert (loop);
1733 /* Get the def before the loop. */
1734 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1735 loop_preheader_edge (loop));
1736 if (j != (number_of_copies - 1) && neutral_op)
1737 op = neutral_op;
1740 /* Create 'vect_ = {op0,op1,...,opn}'. */
1741 t = tree_cons (NULL_TREE, op, t);
1743 number_of_places_left_in_vector--;
1745 if (number_of_places_left_in_vector == 0)
1747 number_of_places_left_in_vector = nunits;
1749 if (constant_p)
1750 vec_cst = build_vector (vector_type, t);
1751 else
1752 vec_cst = build_constructor_from_list (vector_type, t);
1753 VEC_quick_push (tree, voprnds,
1754 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1755 t = NULL_TREE;
1760 /* Since the vectors are created in the reverse order, we should invert
1761 them. */
1762 vec_num = VEC_length (tree, voprnds);
1763 for (j = vec_num - 1; j >= 0; j--)
1765 vop = VEC_index (tree, voprnds, j);
1766 VEC_quick_push (tree, *vec_oprnds, vop);
1769 VEC_free (tree, heap, voprnds);
1771 /* In case that VF is greater than the unrolling factor needed for the SLP
1772 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1773 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1774 to replicate the vectors. */
1775 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1777 tree neutral_vec = NULL;
1779 if (neutral_op)
1781 if (!neutral_vec)
1783 t = NULL;
1784 for (i = 0; i < (unsigned) nunits; i++)
1785 t = tree_cons (NULL_TREE, neutral_op, t);
1786 neutral_vec = build_vector (vector_type, t);
1789 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1791 else
1793 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1794 VEC_quick_push (tree, *vec_oprnds, vop);
1800 /* Get vectorized definitions from SLP_NODE that contains corresponding
1801 vectorized def-stmts. */
1803 static void
1804 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1806 tree vec_oprnd;
1807 gimple vec_def_stmt;
1808 unsigned int i;
1810 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1812 for (i = 0;
1813 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1814 i++)
1816 gcc_assert (vec_def_stmt);
1817 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1818 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1823 /* Get vectorized definitions for SLP_NODE.
1824 If the scalar definitions are loop invariants or constants, collect them and
1825 call vect_get_constant_vectors() to create vector stmts.
1826 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1827 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1828 vect_get_slp_vect_defs() to retrieve them.
1829 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1830 the right node. This is used when the second operand must remain scalar. */
1832 void
1833 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1834 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1836 gimple first_stmt;
1837 enum tree_code code;
1838 int number_of_vects;
1839 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1841 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1842 /* The number of vector defs is determined by the number of vector statements
1843 in the node from which we get those statements. */
1844 if (SLP_TREE_LEFT (slp_node))
1845 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1846 else
1848 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1849 /* Number of vector stmts was calculated according to LHS in
1850 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1851 necessary. See vect_get_smallest_scalar_type() for details. */
1852 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1853 &rhs_size_unit);
1854 if (rhs_size_unit != lhs_size_unit)
1856 number_of_vects *= rhs_size_unit;
1857 number_of_vects /= lhs_size_unit;
1861 /* Allocate memory for vectorized defs. */
1862 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1864 /* SLP_NODE corresponds either to a group of stores or to a group of
1865 unary/binary operations. We don't call this function for loads.
1866 For reduction defs we call vect_get_constant_vectors(), since we are
1867 looking for initial loop invariant values. */
1868 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1869 /* The defs are already vectorized. */
1870 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1871 else
1872 /* Build vectors from scalar defs. */
1873 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1874 reduc_index);
1876 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1877 /* Since we don't call this function with loads, this is a group of
1878 stores. */
1879 return;
1881 /* For reductions, we only need initial values. */
1882 if (reduc_index != -1)
1883 return;
1885 code = gimple_assign_rhs_code (first_stmt);
1886 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1887 return;
1889 /* The number of vector defs is determined by the number of vector statements
1890 in the node from which we get those statements. */
1891 if (SLP_TREE_RIGHT (slp_node))
1892 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1893 else
1894 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1896 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1898 if (SLP_TREE_RIGHT (slp_node))
1899 /* The defs are already vectorized. */
1900 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1901 else
1902 /* Build vectors from scalar defs. */
1903 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1907 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1908 building a vector of type MASK_TYPE from it) and two input vectors placed in
1909 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1910 shifting by STRIDE elements of DR_CHAIN for every copy.
1911 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1912 copies).
1913 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1914 the created stmts must be inserted. */
1916 static inline void
1917 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1918 tree mask, int first_vec_indx, int second_vec_indx,
1919 gimple_stmt_iterator *gsi, slp_tree node,
1920 tree builtin_decl, tree vectype,
1921 VEC(tree,heap) *dr_chain,
1922 int ncopies, int vect_stmts_counter)
1924 tree perm_dest;
1925 gimple perm_stmt = NULL;
1926 stmt_vec_info next_stmt_info;
1927 int i, stride;
1928 tree first_vec, second_vec, data_ref;
1930 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1932 /* Initialize the vect stmts of NODE to properly insert the generated
1933 stmts later. */
1934 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1935 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1936 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1938 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1939 for (i = 0; i < ncopies; i++)
1941 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1942 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1944 /* Generate the permute statement. */
1945 perm_stmt = gimple_build_call (builtin_decl,
1946 3, first_vec, second_vec, mask);
1947 data_ref = make_ssa_name (perm_dest, perm_stmt);
1948 gimple_call_set_lhs (perm_stmt, data_ref);
1949 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1951 /* Store the vector statement in NODE. */
1952 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1953 stride * i + vect_stmts_counter, perm_stmt);
1955 first_vec_indx += stride;
1956 second_vec_indx += stride;
1959 /* Mark the scalar stmt as vectorized. */
1960 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1961 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1965 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1966 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1967 representation. Check that the mask is valid and return FALSE if not.
1968 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1969 the next vector, i.e., the current first vector is not needed. */
1971 static bool
1972 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1973 int mask_nunits, bool only_one_vec, int index,
1974 int *mask, int *current_mask_element,
1975 bool *need_next_vector)
1977 int i;
1978 static int number_of_mask_fixes = 1;
1979 static bool mask_fixed = false;
1980 static bool needs_first_vector = false;
1982 /* Convert to target specific representation. */
1983 *current_mask_element = first_mask_element + m;
1984 /* Adjust the value in case it's a mask for second and third vectors. */
1985 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1987 if (*current_mask_element < mask_nunits)
1988 needs_first_vector = true;
1990 /* We have only one input vector to permute but the mask accesses values in
1991 the next vector as well. */
1992 if (only_one_vec && *current_mask_element >= mask_nunits)
1994 if (vect_print_dump_info (REPORT_DETAILS))
1996 fprintf (vect_dump, "permutation requires at least two vectors ");
1997 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2000 return false;
2003 /* The mask requires the next vector. */
2004 if (*current_mask_element >= mask_nunits * 2)
2006 if (needs_first_vector || mask_fixed)
2008 /* We either need the first vector too or have already moved to the
2009 next vector. In both cases, this permutation needs three
2010 vectors. */
2011 if (vect_print_dump_info (REPORT_DETAILS))
2013 fprintf (vect_dump, "permutation requires at "
2014 "least three vectors ");
2015 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2018 return false;
2021 /* We move to the next vector, dropping the first one and working with
2022 the second and the third - we need to adjust the values of the mask
2023 accordingly. */
2024 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2026 for (i = 0; i < index; i++)
2027 mask[i] -= mask_nunits * number_of_mask_fixes;
2029 (number_of_mask_fixes)++;
2030 mask_fixed = true;
2033 *need_next_vector = mask_fixed;
2035 /* This was the last element of this mask. Start a new one. */
2036 if (index == mask_nunits - 1)
2038 number_of_mask_fixes = 1;
2039 mask_fixed = false;
2040 needs_first_vector = false;
2043 return true;
2047 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2048 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2049 permute statements for SLP_NODE_INSTANCE. */
2050 bool
2051 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2052 gimple_stmt_iterator *gsi, int vf,
2053 slp_instance slp_node_instance, bool analyze_only)
2055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2056 tree mask_element_type = NULL_TREE, mask_type;
2057 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2058 slp_tree node;
2059 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2060 gimple next_scalar_stmt;
2061 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2062 int first_mask_element;
2063 int index, unroll_factor, *mask, current_mask_element, ncopies;
2064 bool only_one_vec = false, need_next_vector = false;
2065 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2067 if (!targetm.vectorize.builtin_vec_perm)
2069 if (vect_print_dump_info (REPORT_DETAILS))
2071 fprintf (vect_dump, "no builtin for vect permute for ");
2072 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2075 return false;
2078 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2079 &mask_element_type);
2080 if (!builtin_decl || !mask_element_type)
2082 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "no builtin for vect permute for ");
2085 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2088 return false;
2091 mask_type = get_vectype_for_scalar_type (mask_element_type);
2092 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2093 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2094 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2095 scale = mask_nunits / nunits;
2096 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2098 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2099 unrolling factor. */
2100 orig_vec_stmts_num = group_size *
2101 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2102 if (orig_vec_stmts_num == 1)
2103 only_one_vec = true;
2105 /* Number of copies is determined by the final vectorization factor
2106 relatively to SLP_NODE_INSTANCE unrolling factor. */
2107 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2109 /* Generate permutation masks for every NODE. Number of masks for each NODE
2110 is equal to GROUP_SIZE.
2111 E.g., we have a group of three nodes with three loads from the same
2112 location in each node, and the vector size is 4. I.e., we have a
2113 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2114 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2115 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2118 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2119 scpecific type, e.g., in bytes for Altivec.
2120 The last mask is illegal since we assume two operands for permute
2121 operation, and the mask element values can't be outside that range. Hence,
2122 the last mask must be converted into {2,5,5,5}.
2123 For the first two permutations we need the first and the second input
2124 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2125 we need the second and the third vectors: {b1,c1,a2,b2} and
2126 {c2,a3,b3,c3}. */
2128 for (i = 0;
2129 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2130 i, node);
2131 i++)
2133 scalar_index = 0;
2134 index = 0;
2135 vect_stmts_counter = 0;
2136 vec_index = 0;
2137 first_vec_index = vec_index++;
2138 if (only_one_vec)
2139 second_vec_index = first_vec_index;
2140 else
2141 second_vec_index = vec_index++;
2143 for (j = 0; j < unroll_factor; j++)
2145 for (k = 0; k < group_size; k++)
2147 first_mask_element = (i + j * group_size) * scale;
2148 for (m = 0; m < scale; m++)
2150 if (!vect_get_mask_element (stmt, first_mask_element, m,
2151 mask_nunits, only_one_vec, index, mask,
2152 &current_mask_element, &need_next_vector))
2153 return false;
2155 mask[index++] = current_mask_element;
2158 if (index == mask_nunits)
2160 tree mask_vec = NULL;
2162 while (--index >= 0)
2164 tree t = build_int_cst (mask_element_type, mask[index]);
2165 mask_vec = tree_cons (NULL, t, mask_vec);
2167 mask_vec = build_vector (mask_type, mask_vec);
2168 index = 0;
2170 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2171 mask_vec))
2173 if (vect_print_dump_info (REPORT_DETAILS))
2175 fprintf (vect_dump, "unsupported vect permute ");
2176 print_generic_expr (vect_dump, mask_vec, 0);
2178 free (mask);
2179 return false;
2182 if (!analyze_only)
2184 if (need_next_vector)
2186 first_vec_index = second_vec_index;
2187 second_vec_index = vec_index;
2190 next_scalar_stmt = VEC_index (gimple,
2191 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2193 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2194 mask_vec, first_vec_index, second_vec_index,
2195 gsi, node, builtin_decl, vectype, dr_chain,
2196 ncopies, vect_stmts_counter++);
2203 free (mask);
2204 return true;
2209 /* Vectorize SLP instance tree in postorder. */
2211 static bool
2212 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2213 unsigned int vectorization_factor)
2215 gimple stmt;
2216 bool strided_store, is_store;
2217 gimple_stmt_iterator si;
2218 stmt_vec_info stmt_info;
2219 unsigned int vec_stmts_size, nunits, group_size;
2220 tree vectype;
2221 int i;
2222 slp_tree loads_node;
2224 if (!node)
2225 return false;
2227 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2228 vectorization_factor);
2229 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2230 vectorization_factor);
2232 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2233 stmt_info = vinfo_for_stmt (stmt);
2235 /* VECTYPE is the type of the destination. */
2236 vectype = STMT_VINFO_VECTYPE (stmt_info);
2237 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2238 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2240 /* For each SLP instance calculate number of vector stmts to be created
2241 for the scalar stmts in each node of the SLP tree. Number of vector
2242 elements in one vector iteration is the number of scalar elements in
2243 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2244 size. */
2245 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2247 /* In case of load permutation we have to allocate vectorized statements for
2248 all the nodes that participate in that permutation. */
2249 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2251 for (i = 0;
2252 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2253 i++)
2255 if (!SLP_TREE_VEC_STMTS (loads_node))
2257 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2258 vec_stmts_size);
2259 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2264 if (!SLP_TREE_VEC_STMTS (node))
2266 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2267 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2270 if (vect_print_dump_info (REPORT_DETAILS))
2272 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2273 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2276 /* Loads should be inserted before the first load. */
2277 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2278 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2279 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2280 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2281 else
2282 si = gsi_for_stmt (stmt);
2284 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2285 return is_store;
2289 bool
2290 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2292 VEC (slp_instance, heap) *slp_instances;
2293 slp_instance instance;
2294 unsigned int i, vf;
2295 bool is_store = false;
2297 if (loop_vinfo)
2299 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2300 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2302 else
2304 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2305 vf = 1;
2308 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2310 /* Schedule the tree of INSTANCE. */
2311 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2312 instance, vf);
2313 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2314 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2315 fprintf (vect_dump, "vectorizing stmts using SLP.");
2318 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2320 slp_tree root = SLP_INSTANCE_TREE (instance);
2321 gimple store;
2322 unsigned int j;
2323 gimple_stmt_iterator gsi;
2325 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2326 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2328 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2329 break;
2331 /* Free the attached stmt_vec_info and remove the stmt. */
2332 gsi = gsi_for_stmt (store);
2333 gsi_remove (&gsi, true);
2334 free_stmt_vec_info (store);
2338 return is_store;
2342 /* Vectorize the basic block. */
2344 void
2345 vect_slp_transform_bb (basic_block bb)
2347 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2348 gimple_stmt_iterator si;
2350 gcc_assert (bb_vinfo);
2352 if (vect_print_dump_info (REPORT_DETAILS))
2353 fprintf (vect_dump, "SLPing BB\n");
2355 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2357 gimple stmt = gsi_stmt (si);
2358 stmt_vec_info stmt_info;
2360 if (vect_print_dump_info (REPORT_DETAILS))
2362 fprintf (vect_dump, "------>SLPing statement: ");
2363 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2366 stmt_info = vinfo_for_stmt (stmt);
2367 gcc_assert (stmt_info);
2369 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2370 if (STMT_SLP_TYPE (stmt_info))
2372 vect_schedule_slp (NULL, bb_vinfo);
2373 break;
2377 mark_sym_for_renaming (gimple_vop (cfun));
2378 /* The memory tags and pointers in vectorized statements need to
2379 have their SSA forms updated. FIXME, why can't this be delayed
2380 until all the loops have been transformed? */
2381 update_ssa (TODO_update_ssa);
2383 if (vect_print_dump_info (REPORT_DETAILS))
2384 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2386 destroy_bb_vec_info (bb_vinfo);