Merge branch 'master' into python
[official-gcc.git] / gcc / tree-vect-slp.c
blobf560ac29e42390f47f5518813dfc853bafc121e1
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
221 else
223 if (!*first_stmt_dt1 && i == 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
235 if (vect_print_dump_info (REPORT_SLP))
236 fprintf (vect_dump, "Build SLP failed: two constant "
237 "oprnds in stmt");
238 return false;
240 *first_stmt_const_oprnd = oprnd;
243 else
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
247 if ((i == 0
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
252 || (i == 1
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
257 || (!def
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
261 if (vect_print_dump_info (REPORT_SLP))
262 fprintf (vect_dump, "Build SLP failed: different types ");
264 return false;
269 /* Check the types of the definitions. */
270 switch (dt[i])
272 case vect_constant_def:
273 case vect_external_def:
274 break;
276 case vect_internal_def:
277 case vect_reduction_def:
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
284 default:
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP))
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
292 return false;
296 return true;
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
303 TRUE. */
305 static bool
306 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
310 VEC (int, heap) **load_permutation,
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
326 unsigned int ncopies;
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
337 gimple first_load, prev_first_load = NULL;
339 /* For every stmt in NODE find its def stmt/s. */
340 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
342 if (vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 if (vect_print_dump_info (REPORT_SLP))
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 return false;
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 return false;
374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
378 if (vect_print_dump_info (REPORT_SLP))
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 return false;
386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
387 if (ncopies != 1)
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
406 /* Check the operation. */
407 if (i == 0)
409 first_stmt_code = rhs_code;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
417 vec_mode = TYPE_MODE (vectype);
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
423 if (!optab
424 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab = optab_for_tree_code (rhs_code, vectype,
428 optab_scalar);
430 if (!optab)
432 if (vect_print_dump_info (REPORT_SLP))
433 fprintf (vect_dump, "Build SLP failed: no optab.");
434 return false;
436 icode = (int) optab_handler (optab, vec_mode);
437 if (icode == CODE_FOR_nothing)
439 if (vect_print_dump_info (REPORT_SLP))
440 fprintf (vect_dump, "Build SLP failed: "
441 "op not supported by target.");
442 return false;
444 optab_op2_mode = insn_data[icode].operand[2].mode;
445 if (!VECTOR_MODE_P (optab_op2_mode))
447 need_same_oprnds = true;
448 first_op1 = gimple_assign_rhs2 (stmt);
453 else
455 if (first_stmt_code != rhs_code
456 && (first_stmt_code != IMAGPART_EXPR
457 || rhs_code != REALPART_EXPR)
458 && (first_stmt_code != REALPART_EXPR
459 || rhs_code != IMAGPART_EXPR)
460 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
461 && (first_stmt_code == ARRAY_REF
462 || first_stmt_code == INDIRECT_REF
463 || first_stmt_code == COMPONENT_REF
464 || first_stmt_code == MEM_REF)))
466 if (vect_print_dump_info (REPORT_SLP))
468 fprintf (vect_dump,
469 "Build SLP failed: different operation in stmt ");
470 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
473 return false;
476 if (need_same_oprnds
477 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
479 if (vect_print_dump_info (REPORT_SLP))
481 fprintf (vect_dump,
482 "Build SLP failed: different shift arguments in ");
483 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
486 return false;
490 /* Strided store or load. */
491 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
493 if (REFERENCE_CLASS_P (lhs))
495 /* Store. */
496 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
497 stmt, &def_stmts0, &def_stmts1,
498 &first_stmt_dt0,
499 &first_stmt_dt1,
500 &first_stmt_def0_type,
501 &first_stmt_def1_type,
502 &first_stmt_const_oprnd,
503 ncopies_for_cost,
504 &pattern0, &pattern1))
505 return false;
507 else
509 /* Load. */
510 /* FORNOW: Check that there is no gap between the loads. */
511 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
512 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
513 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
514 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
516 if (vect_print_dump_info (REPORT_SLP))
518 fprintf (vect_dump, "Build SLP failed: strided "
519 "loads have gaps ");
520 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 return false;
526 /* Check that the size of interleaved loads group is not
527 greater than the SLP group size. */
528 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
530 if (vect_print_dump_info (REPORT_SLP))
532 fprintf (vect_dump, "Build SLP failed: the number of "
533 "interleaved loads is greater than"
534 " the SLP group size ");
535 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538 return false;
541 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
542 if (prev_first_load)
544 /* Check that there are no loads from different interleaving
545 chains in the same node. The only exception is complex
546 numbers. */
547 if (prev_first_load != first_load
548 && rhs_code != REALPART_EXPR
549 && rhs_code != IMAGPART_EXPR)
551 if (vect_print_dump_info (REPORT_SLP))
553 fprintf (vect_dump, "Build SLP failed: different "
554 "interleaving chains in one node ");
555 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
558 return false;
561 else
562 prev_first_load = first_load;
564 if (first_load == stmt)
566 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
567 if (vect_supportable_dr_alignment (first_dr, false)
568 == dr_unaligned_unsupported)
570 if (vect_print_dump_info (REPORT_SLP))
572 fprintf (vect_dump, "Build SLP failed: unsupported "
573 "unaligned load ");
574 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
577 return false;
580 /* Analyze costs (for the first stmt in the group). */
581 vect_model_load_cost (vinfo_for_stmt (stmt),
582 ncopies_for_cost, *node);
585 /* Store the place of this load in the interleaving chain. In
586 case that permutation is needed we later decide if a specific
587 permutation is supported. */
588 load_place = vect_get_place_in_interleaving_chain (stmt,
589 first_load);
590 if (load_place != i)
591 permutation = true;
593 VEC_safe_push (int, heap, *load_permutation, load_place);
595 /* We stop the tree when we reach a group of loads. */
596 stop_recursion = true;
597 continue;
599 } /* Strided access. */
600 else
602 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
604 /* Not strided load. */
605 if (vect_print_dump_info (REPORT_SLP))
607 fprintf (vect_dump, "Build SLP failed: not strided load ");
608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
611 /* FORNOW: Not strided loads are not supported. */
612 return false;
615 /* Not memory operation. */
616 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
617 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
619 if (vect_print_dump_info (REPORT_SLP))
621 fprintf (vect_dump, "Build SLP failed: operation");
622 fprintf (vect_dump, " unsupported ");
623 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
626 return false;
629 /* Find the def-stmts. */
630 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
631 &def_stmts0, &def_stmts1,
632 &first_stmt_dt0, &first_stmt_dt1,
633 &first_stmt_def0_type,
634 &first_stmt_def1_type,
635 &first_stmt_const_oprnd,
636 ncopies_for_cost,
637 &pattern0, &pattern1))
638 return false;
642 /* Add the costs of the node to the overall instance costs. */
643 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
644 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
646 /* Strided loads were reached - stop the recursion. */
647 if (stop_recursion)
649 if (permutation)
651 VEC_safe_push (slp_tree, heap, *loads, *node);
652 *inside_cost
653 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
654 * group_size;
656 else
658 /* We don't check here complex numbers chains, so we keep them in
659 LOADS for further check in vect_supported_load_permutation_p. */
660 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
661 VEC_safe_push (slp_tree, heap, *loads, *node);
664 return true;
667 /* Create SLP_TREE nodes for the definition node/s. */
668 if (first_stmt_dt0 == vect_internal_def)
670 slp_tree left_node = XNEW (struct _slp_tree);
671 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
672 SLP_TREE_VEC_STMTS (left_node) = NULL;
673 SLP_TREE_LEFT (left_node) = NULL;
674 SLP_TREE_RIGHT (left_node) = NULL;
675 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
676 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
677 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
678 inside_cost, outside_cost, ncopies_for_cost,
679 max_nunits, load_permutation, loads,
680 vectorization_factor))
681 return false;
683 SLP_TREE_LEFT (*node) = left_node;
686 if (first_stmt_dt1 == vect_internal_def)
688 slp_tree right_node = XNEW (struct _slp_tree);
689 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
690 SLP_TREE_VEC_STMTS (right_node) = NULL;
691 SLP_TREE_LEFT (right_node) = NULL;
692 SLP_TREE_RIGHT (right_node) = NULL;
693 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
694 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
695 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
696 inside_cost, outside_cost, ncopies_for_cost,
697 max_nunits, load_permutation, loads,
698 vectorization_factor))
699 return false;
701 SLP_TREE_RIGHT (*node) = right_node;
704 return true;
708 static void
709 vect_print_slp_tree (slp_tree node)
711 int i;
712 gimple stmt;
714 if (!node)
715 return;
717 fprintf (vect_dump, "node ");
718 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
720 fprintf (vect_dump, "\n\tstmt %d ", i);
721 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
723 fprintf (vect_dump, "\n");
725 vect_print_slp_tree (SLP_TREE_LEFT (node));
726 vect_print_slp_tree (SLP_TREE_RIGHT (node));
730 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
731 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
732 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
733 stmts in NODE are to be marked. */
735 static void
736 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
738 int i;
739 gimple stmt;
741 if (!node)
742 return;
744 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
745 if (j < 0 || i == j)
746 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
748 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
749 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
753 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
755 static void
756 vect_mark_slp_stmts_relevant (slp_tree node)
758 int i;
759 gimple stmt;
760 stmt_vec_info stmt_info;
762 if (!node)
763 return;
765 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
767 stmt_info = vinfo_for_stmt (stmt);
768 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
769 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
770 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
773 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
774 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
778 /* Check if the permutation required by the SLP INSTANCE is supported.
779 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
781 static bool
782 vect_supported_slp_permutation_p (slp_instance instance)
784 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
785 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
786 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
787 VEC (slp_tree, heap) *sorted_loads = NULL;
788 int index;
789 slp_tree *tmp_loads = NULL;
790 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
791 slp_tree load;
793 /* FORNOW: The only supported loads permutation is loads from the same
794 location in all the loads in the node, when the data-refs in
795 nodes of LOADS constitute an interleaving chain.
796 Sort the nodes according to the order of accesses in the chain. */
797 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
798 for (i = 0, j = 0;
799 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
800 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
801 i += group_size, j++)
803 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
804 /* Check that the loads are all in the same interleaving chain. */
805 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
807 if (vect_print_dump_info (REPORT_DETAILS))
809 fprintf (vect_dump, "Build SLP failed: unsupported data "
810 "permutation ");
811 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
814 free (tmp_loads);
815 return false;
818 tmp_loads[index] = load;
821 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
822 for (i = 0; i < group_size; i++)
823 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
825 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
826 SLP_INSTANCE_LOADS (instance) = sorted_loads;
827 free (tmp_loads);
829 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
830 SLP_INSTANCE_UNROLLING_FACTOR (instance),
831 instance, true))
832 return false;
834 return true;
838 /* Rearrange the statements of NODE according to PERMUTATION. */
840 static void
841 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
842 VEC (int, heap) *permutation)
844 gimple stmt;
845 VEC (gimple, heap) *tmp_stmts;
846 unsigned int index, i;
848 if (!node)
849 return;
851 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
852 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
854 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
855 tmp_stmts = VEC_alloc (gimple, heap, group_size);
857 for (i = 0; i < group_size; i++)
858 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
860 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
862 index = VEC_index (int, permutation, i);
863 VEC_replace (gimple, tmp_stmts, index, stmt);
866 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
867 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
871 /* Check if the required load permutation is supported.
872 LOAD_PERMUTATION contains a list of indices of the loads.
873 In SLP this permutation is relative to the order of strided stores that are
874 the base of the SLP instance. */
876 static bool
877 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
878 VEC (int, heap) *load_permutation)
880 int i = 0, j, prev = -1, next, k, number_of_groups;
881 bool supported, bad_permutation = false;
882 sbitmap load_index;
883 slp_tree node, other_complex_node;
884 gimple stmt, first = NULL, other_node_first;
885 unsigned complex_numbers = 0;
887 /* FORNOW: permutations are only supported in SLP. */
888 if (!slp_instn)
889 return false;
891 if (vect_print_dump_info (REPORT_SLP))
893 fprintf (vect_dump, "Load permutation ");
894 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
895 fprintf (vect_dump, "%d ", next);
898 /* In case of reduction every load permutation is allowed, since the order
899 of the reduction statements is not important (as opposed to the case of
900 strided stores). The only condition we need to check is that all the
901 load nodes are of the same size and have the same permutation (and then
902 rearrange all the nodes of the SLP instance according to this
903 permutation). */
905 /* Check that all the load nodes are of the same size. */
906 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
908 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
909 != (unsigned) group_size)
910 return false;
912 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
913 if (is_gimple_assign (stmt)
914 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
915 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
916 complex_numbers++;
919 /* Complex operands can be swapped as following:
920 real_c = real_b + real_a;
921 imag_c = imag_a + imag_b;
922 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
923 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
924 chains are mixed, they match the above pattern. */
925 if (complex_numbers)
927 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
929 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
931 if (j == 0)
932 first = stmt;
933 else
935 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
937 if (complex_numbers != 2)
938 return false;
940 if (i == 0)
941 k = 1;
942 else
943 k = 0;
945 other_complex_node = VEC_index (slp_tree,
946 SLP_INSTANCE_LOADS (slp_instn), k);
947 other_node_first = VEC_index (gimple,
948 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
950 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
951 != other_node_first)
952 return false;
959 /* We checked that this case ok, so there is no need to proceed with
960 permutation tests. */
961 if (complex_numbers == 2)
963 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
964 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
965 return true;
968 node = SLP_INSTANCE_TREE (slp_instn);
969 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
970 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
971 instance, not all the loads belong to the same node or interleaving
972 group. Hence, we need to divide them into groups according to
973 GROUP_SIZE. */
974 number_of_groups = VEC_length (int, load_permutation) / group_size;
976 /* Reduction (there are no data-refs in the root). */
977 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
979 int first_group_load_index;
981 /* Compare all the permutation sequences to the first one. */
982 for (i = 1; i < number_of_groups; i++)
984 k = 0;
985 for (j = i * group_size; j < i * group_size + group_size; j++)
987 next = VEC_index (int, load_permutation, j);
988 first_group_load_index = VEC_index (int, load_permutation, k);
990 if (next != first_group_load_index)
992 bad_permutation = true;
993 break;
996 k++;
999 if (bad_permutation)
1000 break;
1003 if (!bad_permutation)
1005 /* This permutaion is valid for reduction. Since the order of the
1006 statements in the nodes is not important unless they are memory
1007 accesses, we can rearrange the statements in all the nodes
1008 according to the order of the loads. */
1009 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1010 load_permutation);
1011 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1012 return true;
1016 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1017 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1018 well (unless it's reduction). */
1019 if (VEC_length (int, load_permutation)
1020 != (unsigned int) (group_size * group_size))
1021 return false;
1023 supported = true;
1024 load_index = sbitmap_alloc (group_size);
1025 sbitmap_zero (load_index);
1026 for (j = 0; j < group_size; j++)
1028 for (i = j * group_size, k = 0;
1029 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1030 i++, k++)
1032 if (i != j * group_size && next != prev)
1034 supported = false;
1035 break;
1038 prev = next;
1041 if (TEST_BIT (load_index, prev))
1043 supported = false;
1044 break;
1047 SET_BIT (load_index, prev);
1050 for (j = 0; j < group_size; j++)
1051 if (!TEST_BIT (load_index, j))
1052 return false;
1054 sbitmap_free (load_index);
1056 if (supported && i == group_size * group_size
1057 && vect_supported_slp_permutation_p (slp_instn))
1058 return true;
1060 return false;
1064 /* Find the first load in the loop that belongs to INSTANCE.
1065 When loads are in several SLP nodes, there can be a case in which the first
1066 load does not appear in the first SLP node to be transformed, causing
1067 incorrect order of statements. Since we generate all the loads together,
1068 they must be inserted before the first load of the SLP instance and not
1069 before the first load of the first node of the instance. */
1070 static gimple
1071 vect_find_first_load_in_slp_instance (slp_instance instance)
1073 int i, j;
1074 slp_tree load_node;
1075 gimple first_load = NULL, load;
1077 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1078 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1079 first_load = get_earlier_stmt (load, first_load);
1081 return first_load;
1085 /* Find the last store in SLP INSTANCE. */
1086 static gimple
1087 vect_find_last_store_in_slp_instance (slp_instance instance)
1089 int i;
1090 slp_tree node;
1091 gimple last_store = NULL, store;
1093 node = SLP_INSTANCE_TREE (instance);
1094 for (i = 0;
1095 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1096 i++)
1097 last_store = get_later_stmt (store, last_store);
1099 return last_store;
1103 /* Analyze an SLP instance starting from a group of strided stores. Call
1104 vect_build_slp_tree to build a tree of packed stmts if possible.
1105 Return FALSE if it's impossible to SLP any stmt in the loop. */
1107 static bool
1108 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1109 gimple stmt)
1111 slp_instance new_instance;
1112 slp_tree node = XNEW (struct _slp_tree);
1113 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1114 unsigned int unrolling_factor = 1, nunits;
1115 tree vectype, scalar_type = NULL_TREE;
1116 gimple next;
1117 unsigned int vectorization_factor = 0;
1118 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1119 unsigned int max_nunits = 0;
1120 VEC (int, heap) *load_permutation;
1121 VEC (slp_tree, heap) *loads;
1122 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1124 if (dr)
1126 scalar_type = TREE_TYPE (DR_REF (dr));
1127 vectype = get_vectype_for_scalar_type (scalar_type);
1128 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1130 else
1132 gcc_assert (loop_vinfo);
1133 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1134 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1137 if (!vectype)
1139 if (vect_print_dump_info (REPORT_SLP))
1141 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1142 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1145 return false;
1148 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1149 if (loop_vinfo)
1150 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1151 else
1152 /* No multitypes in BB SLP. */
1153 vectorization_factor = nunits;
1155 /* Calculate the unrolling factor. */
1156 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1157 if (unrolling_factor != 1 && !loop_vinfo)
1159 if (vect_print_dump_info (REPORT_SLP))
1160 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1161 " block SLP");
1163 return false;
1166 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1167 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1168 next = stmt;
1169 if (dr)
1171 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1172 while (next)
1174 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1175 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1178 else
1180 /* Collect reduction statements. */
1181 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1182 next);
1183 i++)
1185 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1186 if (vect_print_dump_info (REPORT_DETAILS))
1188 fprintf (vect_dump, "pushing reduction into node: ");
1189 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1194 SLP_TREE_VEC_STMTS (node) = NULL;
1195 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1196 SLP_TREE_LEFT (node) = NULL;
1197 SLP_TREE_RIGHT (node) = NULL;
1198 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1199 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1201 /* Calculate the number of vector stmts to create based on the unrolling
1202 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1203 GROUP_SIZE / NUNITS otherwise. */
1204 ncopies_for_cost = unrolling_factor * group_size / nunits;
1206 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1207 loads = VEC_alloc (slp_tree, heap, group_size);
1209 /* Build the tree for the SLP instance. */
1210 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1211 &inside_cost, &outside_cost, ncopies_for_cost,
1212 &max_nunits, &load_permutation, &loads,
1213 vectorization_factor))
1215 /* Create a new SLP instance. */
1216 new_instance = XNEW (struct _slp_instance);
1217 SLP_INSTANCE_TREE (new_instance) = node;
1218 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1219 /* Calculate the unrolling factor based on the smallest type in the
1220 loop. */
1221 if (max_nunits > nunits)
1222 unrolling_factor = least_common_multiple (max_nunits, group_size)
1223 / group_size;
1225 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1226 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1227 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1228 SLP_INSTANCE_LOADS (new_instance) = loads;
1229 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1230 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1231 if (VEC_length (slp_tree, loads))
1233 if (!vect_supported_load_permutation_p (new_instance, group_size,
1234 load_permutation))
1236 if (vect_print_dump_info (REPORT_SLP))
1238 fprintf (vect_dump, "Build SLP failed: unsupported load "
1239 "permutation ");
1240 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1243 vect_free_slp_instance (new_instance);
1244 return false;
1247 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1248 = vect_find_first_load_in_slp_instance (new_instance);
1250 else
1251 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1253 if (loop_vinfo)
1254 VEC_safe_push (slp_instance, heap,
1255 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1256 new_instance);
1257 else
1258 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1259 new_instance);
1261 if (vect_print_dump_info (REPORT_SLP))
1262 vect_print_slp_tree (node);
1264 return true;
1267 /* Failed to SLP. */
1268 /* Free the allocated memory. */
1269 vect_free_slp_tree (node);
1270 VEC_free (int, heap, load_permutation);
1271 VEC_free (slp_tree, heap, loads);
1273 return false;
1277 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1278 trees of packed scalar stmts if SLP is possible. */
1280 bool
1281 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1283 unsigned int i;
1284 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1285 gimple store;
1286 bool ok = false;
1288 if (vect_print_dump_info (REPORT_SLP))
1289 fprintf (vect_dump, "=== vect_analyze_slp ===");
1291 if (loop_vinfo)
1293 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1294 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1296 else
1297 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1299 /* Find SLP sequences starting from groups of strided stores. */
1300 FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
1301 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1302 ok = true;
1304 if (bb_vinfo && !ok)
1306 if (vect_print_dump_info (REPORT_SLP))
1307 fprintf (vect_dump, "Failed to SLP the basic block.");
1309 return false;
1312 /* Find SLP sequences starting from groups of reductions. */
1313 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1314 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1315 VEC_index (gimple, reductions, 0)))
1316 ok = true;
1318 return true;
1322 /* For each possible SLP instance decide whether to SLP it and calculate overall
1323 unrolling factor needed to SLP the loop. */
1325 void
1326 vect_make_slp_decision (loop_vec_info loop_vinfo)
1328 unsigned int i, unrolling_factor = 1;
1329 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1330 slp_instance instance;
1331 int decided_to_slp = 0;
1333 if (vect_print_dump_info (REPORT_SLP))
1334 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1336 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1338 /* FORNOW: SLP if you can. */
1339 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1340 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1342 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1343 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1344 loop-based vectorization. Such stmts will be marked as HYBRID. */
1345 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1346 decided_to_slp++;
1349 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1351 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1352 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1353 decided_to_slp, unrolling_factor);
1357 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1358 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1360 static void
1361 vect_detect_hybrid_slp_stmts (slp_tree node)
1363 int i;
1364 gimple stmt;
1365 imm_use_iterator imm_iter;
1366 gimple use_stmt;
1367 stmt_vec_info stmt_vinfo;
1369 if (!node)
1370 return;
1372 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1373 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1374 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1375 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1376 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1377 && !STMT_SLP_TYPE (stmt_vinfo)
1378 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1379 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1380 && !(gimple_code (use_stmt) == GIMPLE_PHI
1381 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1382 == vect_reduction_def))
1383 vect_mark_slp_stmts (node, hybrid, i);
1385 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1386 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1390 /* Find stmts that must be both vectorized and SLPed. */
1392 void
1393 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1395 unsigned int i;
1396 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1397 slp_instance instance;
1399 if (vect_print_dump_info (REPORT_SLP))
1400 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1402 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1403 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1407 /* Create and initialize a new bb_vec_info struct for BB, as well as
1408 stmt_vec_info structs for all the stmts in it. */
1410 static bb_vec_info
1411 new_bb_vec_info (basic_block bb)
1413 bb_vec_info res = NULL;
1414 gimple_stmt_iterator gsi;
1416 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1417 BB_VINFO_BB (res) = bb;
1419 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1421 gimple stmt = gsi_stmt (gsi);
1422 gimple_set_uid (stmt, 0);
1423 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1426 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1427 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1429 bb->aux = res;
1430 return res;
1434 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1435 stmts in the basic block. */
1437 static void
1438 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1440 basic_block bb;
1441 gimple_stmt_iterator si;
1443 if (!bb_vinfo)
1444 return;
1446 bb = BB_VINFO_BB (bb_vinfo);
1448 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1450 gimple stmt = gsi_stmt (si);
1451 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1453 if (stmt_info)
1454 /* Free stmt_vec_info. */
1455 free_stmt_vec_info (stmt);
1458 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1459 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1460 free (bb_vinfo);
1461 bb->aux = NULL;
1465 /* Analyze statements contained in SLP tree node after recursively analyzing
1466 the subtree. Return TRUE if the operations are supported. */
1468 static bool
1469 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1471 bool dummy;
1472 int i;
1473 gimple stmt;
1475 if (!node)
1476 return true;
1478 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1479 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1480 return false;
1482 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1485 gcc_assert (stmt_info);
1486 gcc_assert (PURE_SLP_STMT (stmt_info));
1488 if (!vect_analyze_stmt (stmt, &dummy, node))
1489 return false;
1492 return true;
1496 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1497 operations are supported. */
1499 static bool
1500 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1502 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1503 slp_instance instance;
1504 int i;
1506 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1508 if (!vect_slp_analyze_node_operations (bb_vinfo,
1509 SLP_INSTANCE_TREE (instance)))
1511 vect_free_slp_instance (instance);
1512 VEC_ordered_remove (slp_instance, slp_instances, i);
1514 else
1515 i++;
1518 if (!VEC_length (slp_instance, slp_instances))
1519 return false;
1521 return true;
1524 /* Check if loads and stores are mixed in the basic block (in that
1525 case if we are not sure that the accesses differ, we can't vectorize the
1526 basic block). Also return FALSE in case that there is statement marked as
1527 not vectorizable. */
1529 static bool
1530 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1532 basic_block bb = BB_VINFO_BB (bb_vinfo);
1533 gimple_stmt_iterator si;
1534 bool detected_store = false;
1535 gimple stmt;
1536 struct data_reference *dr;
1538 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1540 stmt = gsi_stmt (si);
1542 /* We can't allow not analyzed statements, since they may contain data
1543 accesses. */
1544 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1545 return false;
1547 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1548 continue;
1550 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1551 if (DR_IS_READ (dr) && detected_store)
1552 return false;
1554 if (!DR_IS_READ (dr))
1555 detected_store = true;
1558 return true;
1561 /* Check if vectorization of the basic block is profitable. */
1563 static bool
1564 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1566 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1567 slp_instance instance;
1568 int i;
1569 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1570 unsigned int stmt_cost;
1571 gimple stmt;
1572 gimple_stmt_iterator si;
1573 basic_block bb = BB_VINFO_BB (bb_vinfo);
1574 stmt_vec_info stmt_info = NULL;
1575 tree dummy_type = NULL;
1576 int dummy = 0;
1578 /* Calculate vector costs. */
1579 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1581 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1582 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1585 /* Calculate scalar cost. */
1586 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1588 stmt = gsi_stmt (si);
1589 stmt_info = vinfo_for_stmt (stmt);
1591 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1592 || !PURE_SLP_STMT (stmt_info))
1593 continue;
1595 if (STMT_VINFO_DATA_REF (stmt_info))
1597 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1598 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1599 (scalar_load, dummy_type, dummy);
1600 else
1601 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1602 (scalar_store, dummy_type, dummy);
1604 else
1605 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1606 (scalar_stmt, dummy_type, dummy);
1608 scalar_cost += stmt_cost;
1611 if (vect_print_dump_info (REPORT_COST))
1613 fprintf (vect_dump, "Cost model analysis: \n");
1614 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1615 vec_inside_cost);
1616 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1617 vec_outside_cost);
1618 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1621 /* Vectorization is profitable if its cost is less than the cost of scalar
1622 version. */
1623 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1624 return false;
1626 return true;
1629 /* Check if the basic block can be vectorized. */
1631 bb_vec_info
1632 vect_slp_analyze_bb (basic_block bb)
1634 bb_vec_info bb_vinfo;
1635 VEC (ddr_p, heap) *ddrs;
1636 VEC (slp_instance, heap) *slp_instances;
1637 slp_instance instance;
1638 int i, insns = 0;
1639 gimple_stmt_iterator gsi;
1640 int min_vf = 2;
1641 int max_vf = MAX_VECTORIZATION_FACTOR;
1642 bool data_dependence_in_bb = false;
1645 if (vect_print_dump_info (REPORT_DETAILS))
1646 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1648 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650 gimple stmt = gsi_stmt (gsi);
1651 if (!is_gimple_debug (stmt)
1652 && !gimple_nop_p (stmt)
1653 && gimple_code (stmt) != GIMPLE_LABEL)
1654 insns++;
1657 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1659 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1660 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1661 "block.\n");
1663 return NULL;
1666 bb_vinfo = new_bb_vec_info (bb);
1667 if (!bb_vinfo)
1668 return NULL;
1670 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1672 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1673 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1674 "block.\n");
1676 destroy_bb_vec_info (bb_vinfo);
1677 return NULL;
1680 ddrs = BB_VINFO_DDRS (bb_vinfo);
1681 if (!VEC_length (ddr_p, ddrs))
1683 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1684 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1685 "block.\n");
1687 destroy_bb_vec_info (bb_vinfo);
1688 return NULL;
1691 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1692 &data_dependence_in_bb)
1693 || min_vf > max_vf
1694 || (data_dependence_in_bb
1695 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1698 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1699 "in basic block.\n");
1701 destroy_bb_vec_info (bb_vinfo);
1702 return NULL;
1705 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1707 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1708 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1709 "block.\n");
1711 destroy_bb_vec_info (bb_vinfo);
1712 return NULL;
1715 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1717 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1718 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1719 "block.\n");
1721 destroy_bb_vec_info (bb_vinfo);
1722 return NULL;
1725 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1727 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1728 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1729 "block.\n");
1731 destroy_bb_vec_info (bb_vinfo);
1732 return NULL;
1735 /* Check the SLP opportunities in the basic block, analyze and build SLP
1736 trees. */
1737 if (!vect_analyze_slp (NULL, bb_vinfo))
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1740 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1741 "in basic block.\n");
1743 destroy_bb_vec_info (bb_vinfo);
1744 return NULL;
1747 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1749 /* Mark all the statements that we want to vectorize as pure SLP and
1750 relevant. */
1751 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1753 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1754 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1757 if (!vect_slp_analyze_operations (bb_vinfo))
1759 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1760 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1762 destroy_bb_vec_info (bb_vinfo);
1763 return NULL;
1766 /* Cost model: check if the vectorization is worthwhile. */
1767 if (flag_vect_cost_model
1768 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1770 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1771 fprintf (vect_dump, "not vectorized: vectorization is not "
1772 "profitable.\n");
1774 destroy_bb_vec_info (bb_vinfo);
1775 return NULL;
1778 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1781 return bb_vinfo;
1785 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1786 the number of created vector stmts depends on the unrolling factor). However,
1787 the actual number of vector stmts for every SLP node depends on VF which is
1788 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1789 In this function we assume that the inside costs calculated in
1790 vect_model_xxx_cost are linear in ncopies. */
1792 void
1793 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1795 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1796 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1797 slp_instance instance;
1799 if (vect_print_dump_info (REPORT_SLP))
1800 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1802 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1803 /* We assume that costs are linear in ncopies. */
1804 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1805 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1809 /* For constant and loop invariant defs of SLP_NODE this function returns
1810 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1811 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1812 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1813 REDUC_INDEX is the index of the reduction operand in the statements, unless
1814 it is -1. */
1816 static void
1817 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1818 unsigned int op_num, unsigned int number_of_vectors,
1819 int reduc_index)
1821 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1822 gimple stmt = VEC_index (gimple, stmts, 0);
1823 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1824 int nunits;
1825 tree vec_cst;
1826 tree t = NULL_TREE;
1827 int j, number_of_places_left_in_vector;
1828 tree vector_type;
1829 tree op, vop;
1830 int group_size = VEC_length (gimple, stmts);
1831 unsigned int vec_num, i;
1832 int number_of_copies = 1;
1833 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1834 bool constant_p, is_store;
1835 tree neutral_op = NULL;
1837 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1839 enum tree_code code = gimple_assign_rhs_code (stmt);
1840 if (reduc_index == -1)
1842 VEC_free (tree, heap, *vec_oprnds);
1843 return;
1846 op_num = reduc_index - 1;
1847 op = gimple_op (stmt, op_num + 1);
1848 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1849 we need either neutral operands or the original operands. See
1850 get_initial_def_for_reduction() for details. */
1851 switch (code)
1853 case WIDEN_SUM_EXPR:
1854 case DOT_PROD_EXPR:
1855 case PLUS_EXPR:
1856 case MINUS_EXPR:
1857 case BIT_IOR_EXPR:
1858 case BIT_XOR_EXPR:
1859 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1860 neutral_op = build_real (TREE_TYPE (op), dconst0);
1861 else
1862 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1864 break;
1866 case MULT_EXPR:
1867 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1868 neutral_op = build_real (TREE_TYPE (op), dconst1);
1869 else
1870 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1872 break;
1874 case BIT_AND_EXPR:
1875 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1876 break;
1878 default:
1879 neutral_op = NULL;
1883 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1885 is_store = true;
1886 op = gimple_assign_rhs1 (stmt);
1888 else
1890 is_store = false;
1891 op = gimple_op (stmt, op_num + 1);
1894 if (CONSTANT_CLASS_P (op))
1895 constant_p = true;
1896 else
1897 constant_p = false;
1899 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1900 gcc_assert (vector_type);
1902 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1904 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1905 created vectors. It is greater than 1 if unrolling is performed.
1907 For example, we have two scalar operands, s1 and s2 (e.g., group of
1908 strided accesses of size two), while NUNITS is four (i.e., four scalars
1909 of this type can be packed in a vector). The output vector will contain
1910 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1911 will be 2).
1913 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1914 containing the operands.
1916 For example, NUNITS is four as before, and the group size is 8
1917 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1918 {s5, s6, s7, s8}. */
1920 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1922 number_of_places_left_in_vector = nunits;
1923 for (j = 0; j < number_of_copies; j++)
1925 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1927 if (is_store)
1928 op = gimple_assign_rhs1 (stmt);
1929 else
1930 op = gimple_op (stmt, op_num + 1);
1932 if (reduc_index != -1)
1934 struct loop *loop = (gimple_bb (stmt))->loop_father;
1935 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1937 gcc_assert (loop);
1938 /* Get the def before the loop. */
1939 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1940 loop_preheader_edge (loop));
1941 if (j != (number_of_copies - 1) && neutral_op)
1942 op = neutral_op;
1945 /* Create 'vect_ = {op0,op1,...,opn}'. */
1946 t = tree_cons (NULL_TREE, op, t);
1948 number_of_places_left_in_vector--;
1950 if (number_of_places_left_in_vector == 0)
1952 number_of_places_left_in_vector = nunits;
1954 if (constant_p)
1955 vec_cst = build_vector (vector_type, t);
1956 else
1957 vec_cst = build_constructor_from_list (vector_type, t);
1958 VEC_quick_push (tree, voprnds,
1959 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1960 t = NULL_TREE;
1965 /* Since the vectors are created in the reverse order, we should invert
1966 them. */
1967 vec_num = VEC_length (tree, voprnds);
1968 for (j = vec_num - 1; j >= 0; j--)
1970 vop = VEC_index (tree, voprnds, j);
1971 VEC_quick_push (tree, *vec_oprnds, vop);
1974 VEC_free (tree, heap, voprnds);
1976 /* In case that VF is greater than the unrolling factor needed for the SLP
1977 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1978 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1979 to replicate the vectors. */
1980 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1982 tree neutral_vec = NULL;
1984 if (neutral_op)
1986 if (!neutral_vec)
1988 t = NULL;
1989 for (i = 0; i < (unsigned) nunits; i++)
1990 t = tree_cons (NULL_TREE, neutral_op, t);
1991 neutral_vec = build_vector (vector_type, t);
1994 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1996 else
1998 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1999 VEC_quick_push (tree, *vec_oprnds, vop);
2005 /* Get vectorized definitions from SLP_NODE that contains corresponding
2006 vectorized def-stmts. */
2008 static void
2009 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2011 tree vec_oprnd;
2012 gimple vec_def_stmt;
2013 unsigned int i;
2015 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2017 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2019 gcc_assert (vec_def_stmt);
2020 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2021 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2026 /* Get vectorized definitions for SLP_NODE.
2027 If the scalar definitions are loop invariants or constants, collect them and
2028 call vect_get_constant_vectors() to create vector stmts.
2029 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2030 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2031 vect_get_slp_vect_defs() to retrieve them.
2032 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2033 the right node. This is used when the second operand must remain scalar. */
2035 void
2036 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
2037 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2039 gimple first_stmt;
2040 enum tree_code code;
2041 int number_of_vects;
2042 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2044 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2045 /* The number of vector defs is determined by the number of vector statements
2046 in the node from which we get those statements. */
2047 if (SLP_TREE_LEFT (slp_node))
2048 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2049 else
2051 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2052 /* Number of vector stmts was calculated according to LHS in
2053 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2054 necessary. See vect_get_smallest_scalar_type() for details. */
2055 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2056 &rhs_size_unit);
2057 if (rhs_size_unit != lhs_size_unit)
2059 number_of_vects *= rhs_size_unit;
2060 number_of_vects /= lhs_size_unit;
2064 /* Allocate memory for vectorized defs. */
2065 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2067 /* SLP_NODE corresponds either to a group of stores or to a group of
2068 unary/binary operations. We don't call this function for loads.
2069 For reduction defs we call vect_get_constant_vectors(), since we are
2070 looking for initial loop invariant values. */
2071 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2072 /* The defs are already vectorized. */
2073 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2074 else
2075 /* Build vectors from scalar defs. */
2076 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
2077 reduc_index);
2079 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2080 /* Since we don't call this function with loads, this is a group of
2081 stores. */
2082 return;
2084 /* For reductions, we only need initial values. */
2085 if (reduc_index != -1)
2086 return;
2088 code = gimple_assign_rhs_code (first_stmt);
2089 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2090 return;
2092 /* The number of vector defs is determined by the number of vector statements
2093 in the node from which we get those statements. */
2094 if (SLP_TREE_RIGHT (slp_node))
2095 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2096 else
2097 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2099 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2101 if (SLP_TREE_RIGHT (slp_node))
2102 /* The defs are already vectorized. */
2103 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2104 else
2105 /* Build vectors from scalar defs. */
2106 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
2110 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2111 building a vector of type MASK_TYPE from it) and two input vectors placed in
2112 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2113 shifting by STRIDE elements of DR_CHAIN for every copy.
2114 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2115 copies).
2116 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2117 the created stmts must be inserted. */
2119 static inline void
2120 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2121 tree mask, int first_vec_indx, int second_vec_indx,
2122 gimple_stmt_iterator *gsi, slp_tree node,
2123 tree builtin_decl, tree vectype,
2124 VEC(tree,heap) *dr_chain,
2125 int ncopies, int vect_stmts_counter)
2127 tree perm_dest;
2128 gimple perm_stmt = NULL;
2129 stmt_vec_info next_stmt_info;
2130 int i, stride;
2131 tree first_vec, second_vec, data_ref;
2133 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2135 /* Initialize the vect stmts of NODE to properly insert the generated
2136 stmts later. */
2137 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2138 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2139 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2141 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2142 for (i = 0; i < ncopies; i++)
2144 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2145 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2147 /* Generate the permute statement. */
2148 perm_stmt = gimple_build_call (builtin_decl,
2149 3, first_vec, second_vec, mask);
2150 data_ref = make_ssa_name (perm_dest, perm_stmt);
2151 gimple_call_set_lhs (perm_stmt, data_ref);
2152 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2154 /* Store the vector statement in NODE. */
2155 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2156 stride * i + vect_stmts_counter, perm_stmt);
2158 first_vec_indx += stride;
2159 second_vec_indx += stride;
2162 /* Mark the scalar stmt as vectorized. */
2163 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2164 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2168 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2169 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2170 representation. Check that the mask is valid and return FALSE if not.
2171 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2172 the next vector, i.e., the current first vector is not needed. */
2174 static bool
2175 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2176 int mask_nunits, bool only_one_vec, int index,
2177 int *mask, int *current_mask_element,
2178 bool *need_next_vector)
2180 int i;
2181 static int number_of_mask_fixes = 1;
2182 static bool mask_fixed = false;
2183 static bool needs_first_vector = false;
2185 /* Convert to target specific representation. */
2186 *current_mask_element = first_mask_element + m;
2187 /* Adjust the value in case it's a mask for second and third vectors. */
2188 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
2190 if (*current_mask_element < mask_nunits)
2191 needs_first_vector = true;
2193 /* We have only one input vector to permute but the mask accesses values in
2194 the next vector as well. */
2195 if (only_one_vec && *current_mask_element >= mask_nunits)
2197 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "permutation requires at least two vectors ");
2200 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2203 return false;
2206 /* The mask requires the next vector. */
2207 if (*current_mask_element >= mask_nunits * 2)
2209 if (needs_first_vector || mask_fixed)
2211 /* We either need the first vector too or have already moved to the
2212 next vector. In both cases, this permutation needs three
2213 vectors. */
2214 if (vect_print_dump_info (REPORT_DETAILS))
2216 fprintf (vect_dump, "permutation requires at "
2217 "least three vectors ");
2218 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2221 return false;
2224 /* We move to the next vector, dropping the first one and working with
2225 the second and the third - we need to adjust the values of the mask
2226 accordingly. */
2227 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2229 for (i = 0; i < index; i++)
2230 mask[i] -= mask_nunits * number_of_mask_fixes;
2232 (number_of_mask_fixes)++;
2233 mask_fixed = true;
2236 *need_next_vector = mask_fixed;
2238 /* This was the last element of this mask. Start a new one. */
2239 if (index == mask_nunits - 1)
2241 number_of_mask_fixes = 1;
2242 mask_fixed = false;
2243 needs_first_vector = false;
2246 return true;
2250 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2251 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2252 permute statements for SLP_NODE_INSTANCE. */
2253 bool
2254 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2255 gimple_stmt_iterator *gsi, int vf,
2256 slp_instance slp_node_instance, bool analyze_only)
2258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2259 tree mask_element_type = NULL_TREE, mask_type;
2260 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2261 slp_tree node;
2262 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2263 gimple next_scalar_stmt;
2264 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2265 int first_mask_element;
2266 int index, unroll_factor, *mask, current_mask_element, ncopies;
2267 bool only_one_vec = false, need_next_vector = false;
2268 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2270 if (!targetm.vectorize.builtin_vec_perm)
2272 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "no builtin for vect permute for ");
2275 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2278 return false;
2281 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2282 &mask_element_type);
2283 if (!builtin_decl || !mask_element_type)
2285 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "no builtin for vect permute for ");
2288 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2291 return false;
2294 mask_type = get_vectype_for_scalar_type (mask_element_type);
2295 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2296 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2297 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2298 scale = mask_nunits / nunits;
2299 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2301 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2302 unrolling factor. */
2303 orig_vec_stmts_num = group_size *
2304 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2305 if (orig_vec_stmts_num == 1)
2306 only_one_vec = true;
2308 /* Number of copies is determined by the final vectorization factor
2309 relatively to SLP_NODE_INSTANCE unrolling factor. */
2310 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2312 /* Generate permutation masks for every NODE. Number of masks for each NODE
2313 is equal to GROUP_SIZE.
2314 E.g., we have a group of three nodes with three loads from the same
2315 location in each node, and the vector size is 4. I.e., we have a
2316 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2317 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2318 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2321 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2322 scpecific type, e.g., in bytes for Altivec.
2323 The last mask is illegal since we assume two operands for permute
2324 operation, and the mask element values can't be outside that range. Hence,
2325 the last mask must be converted into {2,5,5,5}.
2326 For the first two permutations we need the first and the second input
2327 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2328 we need the second and the third vectors: {b1,c1,a2,b2} and
2329 {c2,a3,b3,c3}. */
2331 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2333 scalar_index = 0;
2334 index = 0;
2335 vect_stmts_counter = 0;
2336 vec_index = 0;
2337 first_vec_index = vec_index++;
2338 if (only_one_vec)
2339 second_vec_index = first_vec_index;
2340 else
2341 second_vec_index = vec_index++;
2343 for (j = 0; j < unroll_factor; j++)
2345 for (k = 0; k < group_size; k++)
2347 first_mask_element = (i + j * group_size) * scale;
2348 for (m = 0; m < scale; m++)
2350 if (!vect_get_mask_element (stmt, first_mask_element, m,
2351 mask_nunits, only_one_vec, index, mask,
2352 &current_mask_element, &need_next_vector))
2353 return false;
2355 mask[index++] = current_mask_element;
2358 if (index == mask_nunits)
2360 tree mask_vec = NULL;
2362 while (--index >= 0)
2364 tree t = build_int_cst (mask_element_type, mask[index]);
2365 mask_vec = tree_cons (NULL, t, mask_vec);
2367 mask_vec = build_vector (mask_type, mask_vec);
2368 index = 0;
2370 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2371 mask_vec))
2373 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "unsupported vect permute ");
2376 print_generic_expr (vect_dump, mask_vec, 0);
2378 free (mask);
2379 return false;
2382 if (!analyze_only)
2384 if (need_next_vector)
2386 first_vec_index = second_vec_index;
2387 second_vec_index = vec_index;
2390 next_scalar_stmt = VEC_index (gimple,
2391 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2393 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2394 mask_vec, first_vec_index, second_vec_index,
2395 gsi, node, builtin_decl, vectype, dr_chain,
2396 ncopies, vect_stmts_counter++);
2403 free (mask);
2404 return true;
2409 /* Vectorize SLP instance tree in postorder. */
2411 static bool
2412 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2413 unsigned int vectorization_factor)
2415 gimple stmt;
2416 bool strided_store, is_store;
2417 gimple_stmt_iterator si;
2418 stmt_vec_info stmt_info;
2419 unsigned int vec_stmts_size, nunits, group_size;
2420 tree vectype;
2421 int i;
2422 slp_tree loads_node;
2424 if (!node)
2425 return false;
2427 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2428 vectorization_factor);
2429 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2430 vectorization_factor);
2432 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2433 stmt_info = vinfo_for_stmt (stmt);
2435 /* VECTYPE is the type of the destination. */
2436 vectype = STMT_VINFO_VECTYPE (stmt_info);
2437 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2438 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2440 /* For each SLP instance calculate number of vector stmts to be created
2441 for the scalar stmts in each node of the SLP tree. Number of vector
2442 elements in one vector iteration is the number of scalar elements in
2443 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2444 size. */
2445 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2447 /* In case of load permutation we have to allocate vectorized statements for
2448 all the nodes that participate in that permutation. */
2449 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2451 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2453 if (!SLP_TREE_VEC_STMTS (loads_node))
2455 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2456 vec_stmts_size);
2457 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2462 if (!SLP_TREE_VEC_STMTS (node))
2464 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2465 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2468 if (vect_print_dump_info (REPORT_DETAILS))
2470 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2471 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2474 /* Loads should be inserted before the first load. */
2475 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2476 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2477 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2478 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2479 else
2480 si = gsi_for_stmt (stmt);
2482 /* Stores should be inserted just before the last store. */
2483 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2484 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2486 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2487 si = gsi_for_stmt (last_store);
2490 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2491 return is_store;
2495 bool
2496 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2498 VEC (slp_instance, heap) *slp_instances;
2499 slp_instance instance;
2500 unsigned int i, vf;
2501 bool is_store = false;
2503 if (loop_vinfo)
2505 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2506 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2508 else
2510 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2511 vf = 1;
2514 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2516 /* Schedule the tree of INSTANCE. */
2517 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2518 instance, vf);
2519 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2520 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2521 fprintf (vect_dump, "vectorizing stmts using SLP.");
2524 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2526 slp_tree root = SLP_INSTANCE_TREE (instance);
2527 gimple store;
2528 unsigned int j;
2529 gimple_stmt_iterator gsi;
2531 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2532 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2534 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2535 break;
2537 /* Free the attached stmt_vec_info and remove the stmt. */
2538 gsi = gsi_for_stmt (store);
2539 gsi_remove (&gsi, true);
2540 free_stmt_vec_info (store);
2544 return is_store;
2548 /* Vectorize the basic block. */
2550 void
2551 vect_slp_transform_bb (basic_block bb)
2553 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2554 gimple_stmt_iterator si;
2556 gcc_assert (bb_vinfo);
2558 if (vect_print_dump_info (REPORT_DETAILS))
2559 fprintf (vect_dump, "SLPing BB\n");
2561 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2563 gimple stmt = gsi_stmt (si);
2564 stmt_vec_info stmt_info;
2566 if (vect_print_dump_info (REPORT_DETAILS))
2568 fprintf (vect_dump, "------>SLPing statement: ");
2569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2572 stmt_info = vinfo_for_stmt (stmt);
2573 gcc_assert (stmt_info);
2575 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2576 if (STMT_SLP_TYPE (stmt_info))
2578 vect_schedule_slp (NULL, bb_vinfo);
2579 break;
2583 mark_sym_for_renaming (gimple_vop (cfun));
2584 /* The memory tags and pointers in vectorized statements need to
2585 have their SSA forms updated. FIXME, why can't this be delayed
2586 until all the loops have been transformed? */
2587 update_ssa (TODO_update_ssa);
2589 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2592 destroy_bb_vec_info (bb_vinfo);