PR target/35491
[official-gcc/alias-decl.git] / gcc / tree-vect-slp.c
blob19967bc699528f4f4230767f9e5b57806e95b838
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
221 else
223 if (!*first_stmt_dt1 && i == 1)
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
231 /* We assume that the stmt contains only one constant
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
235 if (vect_print_dump_info (REPORT_SLP))
236 fprintf (vect_dump, "Build SLP failed: two constant "
237 "oprnds in stmt");
238 return false;
240 *first_stmt_const_oprnd = oprnd;
243 else
245 /* Not first stmt of the group, check that the def-stmt/s match
246 the def-stmt/s of the first stmt. */
247 if ((i == 0
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
252 || (i == 1
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
257 || (!def
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
261 if (vect_print_dump_info (REPORT_SLP))
262 fprintf (vect_dump, "Build SLP failed: different types ");
264 return false;
269 /* Check the types of the definitions. */
270 switch (dt[i])
272 case vect_constant_def:
273 case vect_external_def:
274 break;
276 case vect_internal_def:
277 case vect_reduction_def:
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
284 default:
285 /* FORNOW: Not supported. */
286 if (vect_print_dump_info (REPORT_SLP))
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
292 return false;
296 return true;
300 /* Recursively build an SLP tree starting from NODE.
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
303 TRUE. */
305 static bool
306 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
310 VEC (int, heap) **load_permutation,
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
326 unsigned int ncopies;
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
337 gimple first_load, prev_first_load = NULL;
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
342 if (vect_print_dump_info (REPORT_SLP))
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 if (vect_print_dump_info (REPORT_SLP))
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
358 return false;
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
371 return false;
374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
378 if (vect_print_dump_info (REPORT_SLP))
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 return false;
386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
387 if (ncopies != 1)
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
406 /* Check the operation. */
407 if (i == 0)
409 first_stmt_code = rhs_code;
411 /* Shift arguments should be equal in all the packed stmts for a
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
417 vec_mode = TYPE_MODE (vectype);
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
423 if (!optab
424 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
426 /* No vector/vector shift, try for a vector/scalar shift. */
427 optab = optab_for_tree_code (rhs_code, vectype,
428 optab_scalar);
430 if (!optab)
432 if (vect_print_dump_info (REPORT_SLP))
433 fprintf (vect_dump, "Build SLP failed: no optab.");
434 return false;
436 icode = (int) optab_handler (optab, vec_mode);
437 if (icode == CODE_FOR_nothing)
439 if (vect_print_dump_info (REPORT_SLP))
440 fprintf (vect_dump, "Build SLP failed: "
441 "op not supported by target.");
442 return false;
444 optab_op2_mode = insn_data[icode].operand[2].mode;
445 if (!VECTOR_MODE_P (optab_op2_mode))
447 need_same_oprnds = true;
448 first_op1 = gimple_assign_rhs2 (stmt);
453 else
455 if (first_stmt_code != rhs_code
456 && (first_stmt_code != IMAGPART_EXPR
457 || rhs_code != REALPART_EXPR)
458 && (first_stmt_code != REALPART_EXPR
459 || rhs_code != IMAGPART_EXPR))
461 if (vect_print_dump_info (REPORT_SLP))
463 fprintf (vect_dump,
464 "Build SLP failed: different operation in stmt ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
468 return false;
471 if (need_same_oprnds
472 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
474 if (vect_print_dump_info (REPORT_SLP))
476 fprintf (vect_dump,
477 "Build SLP failed: different shift arguments in ");
478 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
481 return false;
485 /* Strided store or load. */
486 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
488 if (REFERENCE_CLASS_P (lhs))
490 /* Store. */
491 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
492 stmt, &def_stmts0, &def_stmts1,
493 &first_stmt_dt0,
494 &first_stmt_dt1,
495 &first_stmt_def0_type,
496 &first_stmt_def1_type,
497 &first_stmt_const_oprnd,
498 ncopies_for_cost,
499 &pattern0, &pattern1))
500 return false;
502 else
504 /* Load. */
505 /* FORNOW: Check that there is no gap between the loads. */
506 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
507 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
508 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
509 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
511 if (vect_print_dump_info (REPORT_SLP))
513 fprintf (vect_dump, "Build SLP failed: strided "
514 "loads have gaps ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518 return false;
521 /* Check that the size of interleaved loads group is not
522 greater than the SLP group size. */
523 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
525 if (vect_print_dump_info (REPORT_SLP))
527 fprintf (vect_dump, "Build SLP failed: the number of "
528 "interleaved loads is greater than"
529 " the SLP group size ");
530 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
533 return false;
536 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
537 if (prev_first_load)
539 /* Check that there are no loads from different interleaving
540 chains in the same node. The only exception is complex
541 numbers. */
542 if (prev_first_load != first_load
543 && rhs_code != REALPART_EXPR
544 && rhs_code != IMAGPART_EXPR)
546 if (vect_print_dump_info (REPORT_SLP))
548 fprintf (vect_dump, "Build SLP failed: different "
549 "interleaving chains in one node ");
550 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
553 return false;
556 else
557 prev_first_load = first_load;
559 if (first_load == stmt)
561 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
562 if (vect_supportable_dr_alignment (first_dr, false)
563 == dr_unaligned_unsupported)
565 if (vect_print_dump_info (REPORT_SLP))
567 fprintf (vect_dump, "Build SLP failed: unsupported "
568 "unaligned load ");
569 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
572 return false;
575 /* Analyze costs (for the first stmt in the group). */
576 vect_model_load_cost (vinfo_for_stmt (stmt),
577 ncopies_for_cost, *node);
580 /* Store the place of this load in the interleaving chain. In
581 case that permutation is needed we later decide if a specific
582 permutation is supported. */
583 load_place = vect_get_place_in_interleaving_chain (stmt,
584 first_load);
585 if (load_place != i)
586 permutation = true;
588 VEC_safe_push (int, heap, *load_permutation, load_place);
590 /* We stop the tree when we reach a group of loads. */
591 stop_recursion = true;
592 continue;
594 } /* Strided access. */
595 else
597 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
599 /* Not strided load. */
600 if (vect_print_dump_info (REPORT_SLP))
602 fprintf (vect_dump, "Build SLP failed: not strided load ");
603 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
606 /* FORNOW: Not strided loads are not supported. */
607 return false;
610 /* Not memory operation. */
611 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
612 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
614 if (vect_print_dump_info (REPORT_SLP))
616 fprintf (vect_dump, "Build SLP failed: operation");
617 fprintf (vect_dump, " unsupported ");
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 return false;
624 /* Find the def-stmts. */
625 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
626 &def_stmts0, &def_stmts1,
627 &first_stmt_dt0, &first_stmt_dt1,
628 &first_stmt_def0_type,
629 &first_stmt_def1_type,
630 &first_stmt_const_oprnd,
631 ncopies_for_cost,
632 &pattern0, &pattern1))
633 return false;
637 /* Add the costs of the node to the overall instance costs. */
638 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
639 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
641 /* Strided loads were reached - stop the recursion. */
642 if (stop_recursion)
644 if (permutation)
646 VEC_safe_push (slp_tree, heap, *loads, *node);
647 *inside_cost
648 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
649 * group_size;
651 else
653 /* We don't check here complex numbers chains, so we keep them in
654 LOADS for further check in vect_supported_load_permutation_p. */
655 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
656 VEC_safe_push (slp_tree, heap, *loads, *node);
659 return true;
662 /* Create SLP_TREE nodes for the definition node/s. */
663 if (first_stmt_dt0 == vect_internal_def)
665 slp_tree left_node = XNEW (struct _slp_tree);
666 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
667 SLP_TREE_VEC_STMTS (left_node) = NULL;
668 SLP_TREE_LEFT (left_node) = NULL;
669 SLP_TREE_RIGHT (left_node) = NULL;
670 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
671 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
672 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
673 inside_cost, outside_cost, ncopies_for_cost,
674 max_nunits, load_permutation, loads,
675 vectorization_factor))
676 return false;
678 SLP_TREE_LEFT (*node) = left_node;
681 if (first_stmt_dt1 == vect_internal_def)
683 slp_tree right_node = XNEW (struct _slp_tree);
684 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
685 SLP_TREE_VEC_STMTS (right_node) = NULL;
686 SLP_TREE_LEFT (right_node) = NULL;
687 SLP_TREE_RIGHT (right_node) = NULL;
688 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
689 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
690 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
691 inside_cost, outside_cost, ncopies_for_cost,
692 max_nunits, load_permutation, loads,
693 vectorization_factor))
694 return false;
696 SLP_TREE_RIGHT (*node) = right_node;
699 return true;
703 static void
704 vect_print_slp_tree (slp_tree node)
706 int i;
707 gimple stmt;
709 if (!node)
710 return;
712 fprintf (vect_dump, "node ");
713 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
715 fprintf (vect_dump, "\n\tstmt %d ", i);
716 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
718 fprintf (vect_dump, "\n");
720 vect_print_slp_tree (SLP_TREE_LEFT (node));
721 vect_print_slp_tree (SLP_TREE_RIGHT (node));
725 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
726 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
727 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
728 stmts in NODE are to be marked. */
730 static void
731 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
733 int i;
734 gimple stmt;
736 if (!node)
737 return;
739 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
740 if (j < 0 || i == j)
741 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
743 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
744 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
748 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
750 static void
751 vect_mark_slp_stmts_relevant (slp_tree node)
753 int i;
754 gimple stmt;
755 stmt_vec_info stmt_info;
757 if (!node)
758 return;
760 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
762 stmt_info = vinfo_for_stmt (stmt);
763 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
764 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
765 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
768 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
769 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
773 /* Check if the permutation required by the SLP INSTANCE is supported.
774 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
776 static bool
777 vect_supported_slp_permutation_p (slp_instance instance)
779 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
780 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
781 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
782 VEC (slp_tree, heap) *sorted_loads = NULL;
783 int index;
784 slp_tree *tmp_loads = NULL;
785 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
786 slp_tree load;
788 /* FORNOW: The only supported loads permutation is loads from the same
789 location in all the loads in the node, when the data-refs in
790 nodes of LOADS constitute an interleaving chain.
791 Sort the nodes according to the order of accesses in the chain. */
792 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
793 for (i = 0, j = 0;
794 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
795 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
796 i += group_size, j++)
798 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
799 /* Check that the loads are all in the same interleaving chain. */
800 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
802 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "Build SLP failed: unsupported data "
805 "permutation ");
806 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
809 free (tmp_loads);
810 return false;
813 tmp_loads[index] = load;
816 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
817 for (i = 0; i < group_size; i++)
818 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
820 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
821 SLP_INSTANCE_LOADS (instance) = sorted_loads;
822 free (tmp_loads);
824 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
825 SLP_INSTANCE_UNROLLING_FACTOR (instance),
826 instance, true))
827 return false;
829 return true;
833 /* Rearrange the statements of NODE according to PERMUTATION. */
835 static void
836 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
837 VEC (int, heap) *permutation)
839 gimple stmt;
840 VEC (gimple, heap) *tmp_stmts;
841 unsigned int index, i;
843 if (!node)
844 return;
846 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
847 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
849 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
850 tmp_stmts = VEC_alloc (gimple, heap, group_size);
852 for (i = 0; i < group_size; i++)
853 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
855 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
857 index = VEC_index (int, permutation, i);
858 VEC_replace (gimple, tmp_stmts, index, stmt);
861 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
862 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
866 /* Check if the required load permutation is supported.
867 LOAD_PERMUTATION contains a list of indices of the loads.
868 In SLP this permutation is relative to the order of strided stores that are
869 the base of the SLP instance. */
871 static bool
872 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
873 VEC (int, heap) *load_permutation)
875 int i = 0, j, prev = -1, next, k, number_of_groups;
876 bool supported, bad_permutation = false;
877 sbitmap load_index;
878 slp_tree node, other_complex_node;
879 gimple stmt, first = NULL, other_node_first;
880 unsigned complex_numbers = 0;
882 /* FORNOW: permutations are only supported in SLP. */
883 if (!slp_instn)
884 return false;
886 if (vect_print_dump_info (REPORT_SLP))
888 fprintf (vect_dump, "Load permutation ");
889 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
890 fprintf (vect_dump, "%d ", next);
893 /* In case of reduction every load permutation is allowed, since the order
894 of the reduction statements is not important (as opposed to the case of
895 strided stores). The only condition we need to check is that all the
896 load nodes are of the same size and have the same permutation (and then
897 rearrange all the nodes of the SLP instance according to this
898 permutation). */
900 /* Check that all the load nodes are of the same size. */
901 for (i = 0;
902 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
903 i++)
905 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
906 != (unsigned) group_size)
907 return false;
909 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
910 if (is_gimple_assign (stmt)
911 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
912 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
913 complex_numbers++;
916 /* Complex operands can be swapped as following:
917 real_c = real_b + real_a;
918 imag_c = imag_a + imag_b;
919 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
920 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
921 chains are mixed, they match the above pattern. */
922 if (complex_numbers)
924 for (i = 0;
925 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
926 i++)
928 for (j = 0;
929 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt);
930 j++)
932 if (j == 0)
933 first = stmt;
934 else
936 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
938 if (complex_numbers != 2)
939 return false;
941 if (i == 0)
942 k = 1;
943 else
944 k = 0;
946 other_complex_node = VEC_index (slp_tree,
947 SLP_INSTANCE_LOADS (slp_instn), k);
948 other_node_first = VEC_index (gimple,
949 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
951 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
952 != other_node_first)
953 return false;
960 /* We checked that this case ok, so there is no need to proceed with
961 permutation tests. */
962 if (complex_numbers == 2)
964 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
965 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
966 return true;
969 node = SLP_INSTANCE_TREE (slp_instn);
970 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
971 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
972 instance, not all the loads belong to the same node or interleaving
973 group. Hence, we need to divide them into groups according to
974 GROUP_SIZE. */
975 number_of_groups = VEC_length (int, load_permutation) / group_size;
977 /* Reduction (there are no data-refs in the root). */
978 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
980 int first_group_load_index;
982 /* Compare all the permutation sequences to the first one. */
983 for (i = 1; i < number_of_groups; i++)
985 k = 0;
986 for (j = i * group_size; j < i * group_size + group_size; j++)
988 next = VEC_index (int, load_permutation, j);
989 first_group_load_index = VEC_index (int, load_permutation, k);
991 if (next != first_group_load_index)
993 bad_permutation = true;
994 break;
997 k++;
1000 if (bad_permutation)
1001 break;
1004 if (!bad_permutation)
1006 /* This permutaion is valid for reduction. Since the order of the
1007 statements in the nodes is not important unless they are memory
1008 accesses, we can rearrange the statements in all the nodes
1009 according to the order of the loads. */
1010 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1011 load_permutation);
1012 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1013 return true;
1017 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1018 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1019 well (unless it's reduction). */
1020 if (VEC_length (int, load_permutation)
1021 != (unsigned int) (group_size * group_size))
1022 return false;
1024 supported = true;
1025 load_index = sbitmap_alloc (group_size);
1026 sbitmap_zero (load_index);
1027 for (j = 0; j < group_size; j++)
1029 for (i = j * group_size, k = 0;
1030 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1031 i++, k++)
1033 if (i != j * group_size && next != prev)
1035 supported = false;
1036 break;
1039 prev = next;
1042 if (TEST_BIT (load_index, prev))
1044 supported = false;
1045 break;
1048 SET_BIT (load_index, prev);
1051 for (j = 0; j < group_size; j++)
1052 if (!TEST_BIT (load_index, j))
1053 return false;
1055 sbitmap_free (load_index);
1057 if (supported && i == group_size * group_size
1058 && vect_supported_slp_permutation_p (slp_instn))
1059 return true;
1061 return false;
1065 /* Find the first load in the loop that belongs to INSTANCE.
1066 When loads are in several SLP nodes, there can be a case in which the first
1067 load does not appear in the first SLP node to be transformed, causing
1068 incorrect order of statements. Since we generate all the loads together,
1069 they must be inserted before the first load of the SLP instance and not
1070 before the first load of the first node of the instance. */
1071 static gimple
1072 vect_find_first_load_in_slp_instance (slp_instance instance)
1074 int i, j;
1075 slp_tree load_node;
1076 gimple first_load = NULL, load;
1078 for (i = 0;
1079 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
1080 i++)
1081 for (j = 0;
1082 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1083 j++)
1084 first_load = get_earlier_stmt (load, first_load);
1086 return first_load;
1090 /* Analyze an SLP instance starting from a group of strided stores. Call
1091 vect_build_slp_tree to build a tree of packed stmts if possible.
1092 Return FALSE if it's impossible to SLP any stmt in the loop. */
1094 static bool
1095 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1096 gimple stmt)
1098 slp_instance new_instance;
1099 slp_tree node = XNEW (struct _slp_tree);
1100 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1101 unsigned int unrolling_factor = 1, nunits;
1102 tree vectype, scalar_type = NULL_TREE;
1103 gimple next;
1104 unsigned int vectorization_factor = 0;
1105 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1106 unsigned int max_nunits = 0;
1107 VEC (int, heap) *load_permutation;
1108 VEC (slp_tree, heap) *loads;
1109 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1111 if (dr)
1113 scalar_type = TREE_TYPE (DR_REF (dr));
1114 vectype = get_vectype_for_scalar_type (scalar_type);
1115 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1117 else
1119 gcc_assert (loop_vinfo);
1120 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1121 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1124 if (!vectype)
1126 if (vect_print_dump_info (REPORT_SLP))
1128 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1129 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1132 return false;
1135 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1136 if (loop_vinfo)
1137 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1138 else
1139 /* No multitypes in BB SLP. */
1140 vectorization_factor = nunits;
1142 /* Calculate the unrolling factor. */
1143 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1144 if (unrolling_factor != 1 && !loop_vinfo)
1146 if (vect_print_dump_info (REPORT_SLP))
1147 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1148 " block SLP");
1150 return false;
1153 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1154 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1155 next = stmt;
1156 if (dr)
1158 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1159 while (next)
1161 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1162 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1165 else
1167 /* Collect reduction statements. */
1168 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1169 next);
1170 i++)
1172 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1173 if (vect_print_dump_info (REPORT_DETAILS))
1175 fprintf (vect_dump, "pushing reduction into node: ");
1176 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1181 SLP_TREE_VEC_STMTS (node) = NULL;
1182 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1183 SLP_TREE_LEFT (node) = NULL;
1184 SLP_TREE_RIGHT (node) = NULL;
1185 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1186 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1188 /* Calculate the number of vector stmts to create based on the unrolling
1189 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1190 GROUP_SIZE / NUNITS otherwise. */
1191 ncopies_for_cost = unrolling_factor * group_size / nunits;
1193 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1194 loads = VEC_alloc (slp_tree, heap, group_size);
1196 /* Build the tree for the SLP instance. */
1197 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1198 &inside_cost, &outside_cost, ncopies_for_cost,
1199 &max_nunits, &load_permutation, &loads,
1200 vectorization_factor))
1202 /* Create a new SLP instance. */
1203 new_instance = XNEW (struct _slp_instance);
1204 SLP_INSTANCE_TREE (new_instance) = node;
1205 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1206 /* Calculate the unrolling factor based on the smallest type in the
1207 loop. */
1208 if (max_nunits > nunits)
1209 unrolling_factor = least_common_multiple (max_nunits, group_size)
1210 / group_size;
1212 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1213 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1214 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1215 SLP_INSTANCE_LOADS (new_instance) = loads;
1216 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1217 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1218 if (VEC_length (slp_tree, loads))
1220 if (!vect_supported_load_permutation_p (new_instance, group_size,
1221 load_permutation))
1223 if (vect_print_dump_info (REPORT_SLP))
1225 fprintf (vect_dump, "Build SLP failed: unsupported load "
1226 "permutation ");
1227 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1230 vect_free_slp_instance (new_instance);
1231 return false;
1234 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1235 = vect_find_first_load_in_slp_instance (new_instance);
1237 else
1238 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1240 if (loop_vinfo)
1241 VEC_safe_push (slp_instance, heap,
1242 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1243 new_instance);
1244 else
1245 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1246 new_instance);
1248 if (vect_print_dump_info (REPORT_SLP))
1249 vect_print_slp_tree (node);
1251 return true;
1254 /* Failed to SLP. */
1255 /* Free the allocated memory. */
1256 vect_free_slp_tree (node);
1257 VEC_free (int, heap, load_permutation);
1258 VEC_free (slp_tree, heap, loads);
1260 return false;
1264 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1265 trees of packed scalar stmts if SLP is possible. */
1267 bool
1268 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1270 unsigned int i;
1271 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1272 gimple store;
1273 bool ok = false;
1275 if (vect_print_dump_info (REPORT_SLP))
1276 fprintf (vect_dump, "=== vect_analyze_slp ===");
1278 if (loop_vinfo)
1280 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1281 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1283 else
1284 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1286 /* Find SLP sequences starting from groups of strided stores. */
1287 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1288 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1289 ok = true;
1291 if (bb_vinfo && !ok)
1293 if (vect_print_dump_info (REPORT_SLP))
1294 fprintf (vect_dump, "Failed to SLP the basic block.");
1296 return false;
1299 /* Find SLP sequences starting from groups of reductions. */
1300 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1301 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1302 VEC_index (gimple, reductions, 0)))
1303 ok = true;
1305 return true;
1309 /* For each possible SLP instance decide whether to SLP it and calculate overall
1310 unrolling factor needed to SLP the loop. */
1312 void
1313 vect_make_slp_decision (loop_vec_info loop_vinfo)
1315 unsigned int i, unrolling_factor = 1;
1316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1317 slp_instance instance;
1318 int decided_to_slp = 0;
1320 if (vect_print_dump_info (REPORT_SLP))
1321 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1323 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1325 /* FORNOW: SLP if you can. */
1326 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1327 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1329 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1330 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1331 loop-based vectorization. Such stmts will be marked as HYBRID. */
1332 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1333 decided_to_slp++;
1336 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1338 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1339 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1340 decided_to_slp, unrolling_factor);
1344 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1345 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1347 static void
1348 vect_detect_hybrid_slp_stmts (slp_tree node)
1350 int i;
1351 gimple stmt;
1352 imm_use_iterator imm_iter;
1353 gimple use_stmt;
1354 stmt_vec_info stmt_vinfo;
1356 if (!node)
1357 return;
1359 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1360 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1361 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1362 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1363 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1364 && !STMT_SLP_TYPE (stmt_vinfo)
1365 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1366 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1367 && !(gimple_code (use_stmt) == GIMPLE_PHI
1368 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1369 == vect_reduction_def))
1370 vect_mark_slp_stmts (node, hybrid, i);
1372 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1373 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1377 /* Find stmts that must be both vectorized and SLPed. */
1379 void
1380 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1382 unsigned int i;
1383 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1384 slp_instance instance;
1386 if (vect_print_dump_info (REPORT_SLP))
1387 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1389 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1390 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1394 /* Create and initialize a new bb_vec_info struct for BB, as well as
1395 stmt_vec_info structs for all the stmts in it. */
1397 static bb_vec_info
1398 new_bb_vec_info (basic_block bb)
1400 bb_vec_info res = NULL;
1401 gimple_stmt_iterator gsi;
1403 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1404 BB_VINFO_BB (res) = bb;
1406 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1408 gimple stmt = gsi_stmt (gsi);
1409 gimple_set_uid (stmt, 0);
1410 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1413 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1414 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1416 bb->aux = res;
1417 return res;
1421 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1422 stmts in the basic block. */
1424 static void
1425 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1427 basic_block bb;
1428 gimple_stmt_iterator si;
1430 if (!bb_vinfo)
1431 return;
1433 bb = BB_VINFO_BB (bb_vinfo);
1435 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1437 gimple stmt = gsi_stmt (si);
1438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1440 if (stmt_info)
1441 /* Free stmt_vec_info. */
1442 free_stmt_vec_info (stmt);
1445 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1446 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1447 free (bb_vinfo);
1448 bb->aux = NULL;
1452 /* Analyze statements contained in SLP tree node after recursively analyzing
1453 the subtree. Return TRUE if the operations are supported. */
1455 static bool
1456 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1458 bool dummy;
1459 int i;
1460 gimple stmt;
1462 if (!node)
1463 return true;
1465 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1466 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1467 return false;
1469 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1472 gcc_assert (stmt_info);
1473 gcc_assert (PURE_SLP_STMT (stmt_info));
1475 if (!vect_analyze_stmt (stmt, &dummy, node))
1476 return false;
1479 return true;
1483 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1484 operations are supported. */
1486 static bool
1487 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1489 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1490 slp_instance instance;
1491 int i;
1493 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1495 if (!vect_slp_analyze_node_operations (bb_vinfo,
1496 SLP_INSTANCE_TREE (instance)))
1498 vect_free_slp_instance (instance);
1499 VEC_ordered_remove (slp_instance, slp_instances, i);
1501 else
1502 i++;
1505 if (!VEC_length (slp_instance, slp_instances))
1506 return false;
1508 return true;
1512 /* Cheick if the basic block can be vectorized. */
1514 bb_vec_info
1515 vect_slp_analyze_bb (basic_block bb)
1517 bb_vec_info bb_vinfo;
1518 VEC (ddr_p, heap) *ddrs;
1519 VEC (slp_instance, heap) *slp_instances;
1520 slp_instance instance;
1521 int i, insns = 0;
1522 gimple_stmt_iterator gsi;
1523 int min_vf = 2;
1524 int max_vf = MAX_VECTORIZATION_FACTOR;
1526 if (vect_print_dump_info (REPORT_DETAILS))
1527 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1529 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1531 gimple stmt = gsi_stmt (gsi);
1532 if (!is_gimple_debug (stmt)
1533 && !gimple_nop_p (stmt)
1534 && gimple_code (stmt) != GIMPLE_LABEL)
1535 insns++;
1538 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1541 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1542 "block.\n");
1544 return NULL;
1547 bb_vinfo = new_bb_vec_info (bb);
1548 if (!bb_vinfo)
1549 return NULL;
1551 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1554 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1555 "block.\n");
1557 destroy_bb_vec_info (bb_vinfo);
1558 return NULL;
1561 ddrs = BB_VINFO_DDRS (bb_vinfo);
1562 if (!VEC_length (ddr_p, ddrs))
1564 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1565 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1566 "block.\n");
1568 destroy_bb_vec_info (bb_vinfo);
1569 return NULL;
1572 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1573 || min_vf > max_vf)
1575 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1576 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1577 "in basic block.\n");
1579 destroy_bb_vec_info (bb_vinfo);
1580 return NULL;
1583 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1585 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1586 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1587 "block.\n");
1589 destroy_bb_vec_info (bb_vinfo);
1590 return NULL;
1593 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1595 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1596 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1597 "block.\n");
1599 destroy_bb_vec_info (bb_vinfo);
1600 return NULL;
1603 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1606 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1607 "block.\n");
1609 destroy_bb_vec_info (bb_vinfo);
1610 return NULL;
1613 /* Check the SLP opportunities in the basic block, analyze and build SLP
1614 trees. */
1615 if (!vect_analyze_slp (NULL, bb_vinfo))
1617 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1618 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1619 "in basic block.\n");
1621 destroy_bb_vec_info (bb_vinfo);
1622 return NULL;
1625 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1627 /* Mark all the statements that we want to vectorize as pure SLP and
1628 relevant. */
1629 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1631 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1632 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1635 if (!vect_slp_analyze_operations (bb_vinfo))
1637 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1638 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1640 destroy_bb_vec_info (bb_vinfo);
1641 return NULL;
1644 if (vect_print_dump_info (REPORT_DETAILS))
1645 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1647 return bb_vinfo;
1651 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1652 the number of created vector stmts depends on the unrolling factor). However,
1653 the actual number of vector stmts for every SLP node depends on VF which is
1654 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1655 In this function we assume that the inside costs calculated in
1656 vect_model_xxx_cost are linear in ncopies. */
1658 void
1659 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1661 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1662 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1663 slp_instance instance;
1665 if (vect_print_dump_info (REPORT_SLP))
1666 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1668 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1669 /* We assume that costs are linear in ncopies. */
1670 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1671 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1675 /* For constant and loop invariant defs of SLP_NODE this function returns
1676 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1677 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1678 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1679 REDUC_INDEX is the index of the reduction operand in the statements, unless
1680 it is -1. */
1682 static void
1683 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1684 unsigned int op_num, unsigned int number_of_vectors,
1685 int reduc_index)
1687 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1688 gimple stmt = VEC_index (gimple, stmts, 0);
1689 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1690 int nunits;
1691 tree vec_cst;
1692 tree t = NULL_TREE;
1693 int j, number_of_places_left_in_vector;
1694 tree vector_type;
1695 tree op, vop;
1696 int group_size = VEC_length (gimple, stmts);
1697 unsigned int vec_num, i;
1698 int number_of_copies = 1;
1699 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1700 bool constant_p, is_store;
1701 tree neutral_op = NULL;
1703 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1705 enum tree_code code = gimple_assign_rhs_code (stmt);
1706 if (reduc_index == -1)
1708 VEC_free (tree, heap, *vec_oprnds);
1709 return;
1712 op_num = reduc_index - 1;
1713 op = gimple_op (stmt, op_num + 1);
1714 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1715 we need either neutral operands or the original operands. See
1716 get_initial_def_for_reduction() for details. */
1717 switch (code)
1719 case WIDEN_SUM_EXPR:
1720 case DOT_PROD_EXPR:
1721 case PLUS_EXPR:
1722 case MINUS_EXPR:
1723 case BIT_IOR_EXPR:
1724 case BIT_XOR_EXPR:
1725 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1726 neutral_op = build_real (TREE_TYPE (op), dconst0);
1727 else
1728 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1730 break;
1732 case MULT_EXPR:
1733 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1734 neutral_op = build_real (TREE_TYPE (op), dconst1);
1735 else
1736 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1738 break;
1740 case BIT_AND_EXPR:
1741 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1742 break;
1744 default:
1745 neutral_op = NULL;
1749 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1751 is_store = true;
1752 op = gimple_assign_rhs1 (stmt);
1754 else
1756 is_store = false;
1757 op = gimple_op (stmt, op_num + 1);
1760 if (CONSTANT_CLASS_P (op))
1761 constant_p = true;
1762 else
1763 constant_p = false;
1765 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1766 gcc_assert (vector_type);
1768 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1770 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1771 created vectors. It is greater than 1 if unrolling is performed.
1773 For example, we have two scalar operands, s1 and s2 (e.g., group of
1774 strided accesses of size two), while NUNITS is four (i.e., four scalars
1775 of this type can be packed in a vector). The output vector will contain
1776 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1777 will be 2).
1779 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1780 containing the operands.
1782 For example, NUNITS is four as before, and the group size is 8
1783 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1784 {s5, s6, s7, s8}. */
1786 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1788 number_of_places_left_in_vector = nunits;
1789 for (j = 0; j < number_of_copies; j++)
1791 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1793 if (is_store)
1794 op = gimple_assign_rhs1 (stmt);
1795 else
1796 op = gimple_op (stmt, op_num + 1);
1798 if (reduc_index != -1)
1800 struct loop *loop = (gimple_bb (stmt))->loop_father;
1801 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1803 gcc_assert (loop);
1804 /* Get the def before the loop. */
1805 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1806 loop_preheader_edge (loop));
1807 if (j != (number_of_copies - 1) && neutral_op)
1808 op = neutral_op;
1811 /* Create 'vect_ = {op0,op1,...,opn}'. */
1812 t = tree_cons (NULL_TREE, op, t);
1814 number_of_places_left_in_vector--;
1816 if (number_of_places_left_in_vector == 0)
1818 number_of_places_left_in_vector = nunits;
1820 if (constant_p)
1821 vec_cst = build_vector (vector_type, t);
1822 else
1823 vec_cst = build_constructor_from_list (vector_type, t);
1824 VEC_quick_push (tree, voprnds,
1825 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1826 t = NULL_TREE;
1831 /* Since the vectors are created in the reverse order, we should invert
1832 them. */
1833 vec_num = VEC_length (tree, voprnds);
1834 for (j = vec_num - 1; j >= 0; j--)
1836 vop = VEC_index (tree, voprnds, j);
1837 VEC_quick_push (tree, *vec_oprnds, vop);
1840 VEC_free (tree, heap, voprnds);
1842 /* In case that VF is greater than the unrolling factor needed for the SLP
1843 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1844 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1845 to replicate the vectors. */
1846 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1848 tree neutral_vec = NULL;
1850 if (neutral_op)
1852 if (!neutral_vec)
1854 t = NULL;
1855 for (i = 0; i < (unsigned) nunits; i++)
1856 t = tree_cons (NULL_TREE, neutral_op, t);
1857 neutral_vec = build_vector (vector_type, t);
1860 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1862 else
1864 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1865 VEC_quick_push (tree, *vec_oprnds, vop);
1871 /* Get vectorized definitions from SLP_NODE that contains corresponding
1872 vectorized def-stmts. */
1874 static void
1875 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1877 tree vec_oprnd;
1878 gimple vec_def_stmt;
1879 unsigned int i;
1881 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1883 for (i = 0;
1884 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1885 i++)
1887 gcc_assert (vec_def_stmt);
1888 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1889 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1894 /* Get vectorized definitions for SLP_NODE.
1895 If the scalar definitions are loop invariants or constants, collect them and
1896 call vect_get_constant_vectors() to create vector stmts.
1897 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1898 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1899 vect_get_slp_vect_defs() to retrieve them.
1900 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1901 the right node. This is used when the second operand must remain scalar. */
1903 void
1904 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1905 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1907 gimple first_stmt;
1908 enum tree_code code;
1909 int number_of_vects;
1910 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1912 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1913 /* The number of vector defs is determined by the number of vector statements
1914 in the node from which we get those statements. */
1915 if (SLP_TREE_LEFT (slp_node))
1916 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1917 else
1919 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1920 /* Number of vector stmts was calculated according to LHS in
1921 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1922 necessary. See vect_get_smallest_scalar_type() for details. */
1923 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1924 &rhs_size_unit);
1925 if (rhs_size_unit != lhs_size_unit)
1927 number_of_vects *= rhs_size_unit;
1928 number_of_vects /= lhs_size_unit;
1932 /* Allocate memory for vectorized defs. */
1933 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1935 /* SLP_NODE corresponds either to a group of stores or to a group of
1936 unary/binary operations. We don't call this function for loads.
1937 For reduction defs we call vect_get_constant_vectors(), since we are
1938 looking for initial loop invariant values. */
1939 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1940 /* The defs are already vectorized. */
1941 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1942 else
1943 /* Build vectors from scalar defs. */
1944 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1945 reduc_index);
1947 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1948 /* Since we don't call this function with loads, this is a group of
1949 stores. */
1950 return;
1952 /* For reductions, we only need initial values. */
1953 if (reduc_index != -1)
1954 return;
1956 code = gimple_assign_rhs_code (first_stmt);
1957 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1958 return;
1960 /* The number of vector defs is determined by the number of vector statements
1961 in the node from which we get those statements. */
1962 if (SLP_TREE_RIGHT (slp_node))
1963 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1964 else
1965 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1967 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1969 if (SLP_TREE_RIGHT (slp_node))
1970 /* The defs are already vectorized. */
1971 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1972 else
1973 /* Build vectors from scalar defs. */
1974 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1978 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1979 building a vector of type MASK_TYPE from it) and two input vectors placed in
1980 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1981 shifting by STRIDE elements of DR_CHAIN for every copy.
1982 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1983 copies).
1984 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1985 the created stmts must be inserted. */
1987 static inline void
1988 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1989 tree mask, int first_vec_indx, int second_vec_indx,
1990 gimple_stmt_iterator *gsi, slp_tree node,
1991 tree builtin_decl, tree vectype,
1992 VEC(tree,heap) *dr_chain,
1993 int ncopies, int vect_stmts_counter)
1995 tree perm_dest;
1996 gimple perm_stmt = NULL;
1997 stmt_vec_info next_stmt_info;
1998 int i, stride;
1999 tree first_vec, second_vec, data_ref;
2001 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2003 /* Initialize the vect stmts of NODE to properly insert the generated
2004 stmts later. */
2005 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2006 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2007 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2009 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2010 for (i = 0; i < ncopies; i++)
2012 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2013 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2015 /* Generate the permute statement. */
2016 perm_stmt = gimple_build_call (builtin_decl,
2017 3, first_vec, second_vec, mask);
2018 data_ref = make_ssa_name (perm_dest, perm_stmt);
2019 gimple_call_set_lhs (perm_stmt, data_ref);
2020 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2022 /* Store the vector statement in NODE. */
2023 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2024 stride * i + vect_stmts_counter, perm_stmt);
2026 first_vec_indx += stride;
2027 second_vec_indx += stride;
2030 /* Mark the scalar stmt as vectorized. */
2031 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2032 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2036 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2037 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2038 representation. Check that the mask is valid and return FALSE if not.
2039 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2040 the next vector, i.e., the current first vector is not needed. */
2042 static bool
2043 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2044 int mask_nunits, bool only_one_vec, int index,
2045 int *mask, int *current_mask_element,
2046 bool *need_next_vector)
2048 int i;
2049 static int number_of_mask_fixes = 1;
2050 static bool mask_fixed = false;
2051 static bool needs_first_vector = false;
2053 /* Convert to target specific representation. */
2054 *current_mask_element = first_mask_element + m;
2055 /* Adjust the value in case it's a mask for second and third vectors. */
2056 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
2058 if (*current_mask_element < mask_nunits)
2059 needs_first_vector = true;
2061 /* We have only one input vector to permute but the mask accesses values in
2062 the next vector as well. */
2063 if (only_one_vec && *current_mask_element >= mask_nunits)
2065 if (vect_print_dump_info (REPORT_DETAILS))
2067 fprintf (vect_dump, "permutation requires at least two vectors ");
2068 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2071 return false;
2074 /* The mask requires the next vector. */
2075 if (*current_mask_element >= mask_nunits * 2)
2077 if (needs_first_vector || mask_fixed)
2079 /* We either need the first vector too or have already moved to the
2080 next vector. In both cases, this permutation needs three
2081 vectors. */
2082 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "permutation requires at "
2085 "least three vectors ");
2086 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2089 return false;
2092 /* We move to the next vector, dropping the first one and working with
2093 the second and the third - we need to adjust the values of the mask
2094 accordingly. */
2095 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2097 for (i = 0; i < index; i++)
2098 mask[i] -= mask_nunits * number_of_mask_fixes;
2100 (number_of_mask_fixes)++;
2101 mask_fixed = true;
2104 *need_next_vector = mask_fixed;
2106 /* This was the last element of this mask. Start a new one. */
2107 if (index == mask_nunits - 1)
2109 number_of_mask_fixes = 1;
2110 mask_fixed = false;
2111 needs_first_vector = false;
2114 return true;
2118 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2119 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2120 permute statements for SLP_NODE_INSTANCE. */
2121 bool
2122 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2123 gimple_stmt_iterator *gsi, int vf,
2124 slp_instance slp_node_instance, bool analyze_only)
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 tree mask_element_type = NULL_TREE, mask_type;
2128 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2129 slp_tree node;
2130 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2131 gimple next_scalar_stmt;
2132 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2133 int first_mask_element;
2134 int index, unroll_factor, *mask, current_mask_element, ncopies;
2135 bool only_one_vec = false, need_next_vector = false;
2136 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2138 if (!targetm.vectorize.builtin_vec_perm)
2140 if (vect_print_dump_info (REPORT_DETAILS))
2142 fprintf (vect_dump, "no builtin for vect permute for ");
2143 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2146 return false;
2149 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2150 &mask_element_type);
2151 if (!builtin_decl || !mask_element_type)
2153 if (vect_print_dump_info (REPORT_DETAILS))
2155 fprintf (vect_dump, "no builtin for vect permute for ");
2156 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2159 return false;
2162 mask_type = get_vectype_for_scalar_type (mask_element_type);
2163 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2164 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2165 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2166 scale = mask_nunits / nunits;
2167 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2169 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2170 unrolling factor. */
2171 orig_vec_stmts_num = group_size *
2172 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2173 if (orig_vec_stmts_num == 1)
2174 only_one_vec = true;
2176 /* Number of copies is determined by the final vectorization factor
2177 relatively to SLP_NODE_INSTANCE unrolling factor. */
2178 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2180 /* Generate permutation masks for every NODE. Number of masks for each NODE
2181 is equal to GROUP_SIZE.
2182 E.g., we have a group of three nodes with three loads from the same
2183 location in each node, and the vector size is 4. I.e., we have a
2184 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2185 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2186 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2189 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2190 scpecific type, e.g., in bytes for Altivec.
2191 The last mask is illegal since we assume two operands for permute
2192 operation, and the mask element values can't be outside that range. Hence,
2193 the last mask must be converted into {2,5,5,5}.
2194 For the first two permutations we need the first and the second input
2195 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2196 we need the second and the third vectors: {b1,c1,a2,b2} and
2197 {c2,a3,b3,c3}. */
2199 for (i = 0;
2200 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2201 i, node);
2202 i++)
2204 scalar_index = 0;
2205 index = 0;
2206 vect_stmts_counter = 0;
2207 vec_index = 0;
2208 first_vec_index = vec_index++;
2209 if (only_one_vec)
2210 second_vec_index = first_vec_index;
2211 else
2212 second_vec_index = vec_index++;
2214 for (j = 0; j < unroll_factor; j++)
2216 for (k = 0; k < group_size; k++)
2218 first_mask_element = (i + j * group_size) * scale;
2219 for (m = 0; m < scale; m++)
2221 if (!vect_get_mask_element (stmt, first_mask_element, m,
2222 mask_nunits, only_one_vec, index, mask,
2223 &current_mask_element, &need_next_vector))
2224 return false;
2226 mask[index++] = current_mask_element;
2229 if (index == mask_nunits)
2231 tree mask_vec = NULL;
2233 while (--index >= 0)
2235 tree t = build_int_cst (mask_element_type, mask[index]);
2236 mask_vec = tree_cons (NULL, t, mask_vec);
2238 mask_vec = build_vector (mask_type, mask_vec);
2239 index = 0;
2241 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2242 mask_vec))
2244 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "unsupported vect permute ");
2247 print_generic_expr (vect_dump, mask_vec, 0);
2249 free (mask);
2250 return false;
2253 if (!analyze_only)
2255 if (need_next_vector)
2257 first_vec_index = second_vec_index;
2258 second_vec_index = vec_index;
2261 next_scalar_stmt = VEC_index (gimple,
2262 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2264 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2265 mask_vec, first_vec_index, second_vec_index,
2266 gsi, node, builtin_decl, vectype, dr_chain,
2267 ncopies, vect_stmts_counter++);
2274 free (mask);
2275 return true;
2280 /* Vectorize SLP instance tree in postorder. */
2282 static bool
2283 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2284 unsigned int vectorization_factor)
2286 gimple stmt;
2287 bool strided_store, is_store;
2288 gimple_stmt_iterator si;
2289 stmt_vec_info stmt_info;
2290 unsigned int vec_stmts_size, nunits, group_size;
2291 tree vectype;
2292 int i;
2293 slp_tree loads_node;
2295 if (!node)
2296 return false;
2298 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2299 vectorization_factor);
2300 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2301 vectorization_factor);
2303 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2304 stmt_info = vinfo_for_stmt (stmt);
2306 /* VECTYPE is the type of the destination. */
2307 vectype = STMT_VINFO_VECTYPE (stmt_info);
2308 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2309 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2311 /* For each SLP instance calculate number of vector stmts to be created
2312 for the scalar stmts in each node of the SLP tree. Number of vector
2313 elements in one vector iteration is the number of scalar elements in
2314 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2315 size. */
2316 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2318 /* In case of load permutation we have to allocate vectorized statements for
2319 all the nodes that participate in that permutation. */
2320 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2322 for (i = 0;
2323 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2324 i++)
2326 if (!SLP_TREE_VEC_STMTS (loads_node))
2328 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2329 vec_stmts_size);
2330 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2335 if (!SLP_TREE_VEC_STMTS (node))
2337 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2338 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2341 if (vect_print_dump_info (REPORT_DETAILS))
2343 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2344 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2347 /* Loads should be inserted before the first load. */
2348 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2349 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2350 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2351 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2352 else
2353 si = gsi_for_stmt (stmt);
2355 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2356 return is_store;
2360 bool
2361 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2363 VEC (slp_instance, heap) *slp_instances;
2364 slp_instance instance;
2365 unsigned int i, vf;
2366 bool is_store = false;
2368 if (loop_vinfo)
2370 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2371 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2373 else
2375 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2376 vf = 1;
2379 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2381 /* Schedule the tree of INSTANCE. */
2382 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2383 instance, vf);
2384 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2385 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2386 fprintf (vect_dump, "vectorizing stmts using SLP.");
2389 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2391 slp_tree root = SLP_INSTANCE_TREE (instance);
2392 gimple store;
2393 unsigned int j;
2394 gimple_stmt_iterator gsi;
2396 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2397 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2399 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2400 break;
2402 /* Free the attached stmt_vec_info and remove the stmt. */
2403 gsi = gsi_for_stmt (store);
2404 gsi_remove (&gsi, true);
2405 free_stmt_vec_info (store);
2409 return is_store;
2413 /* Vectorize the basic block. */
2415 void
2416 vect_slp_transform_bb (basic_block bb)
2418 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2419 gimple_stmt_iterator si;
2421 gcc_assert (bb_vinfo);
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "SLPing BB\n");
2426 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2428 gimple stmt = gsi_stmt (si);
2429 stmt_vec_info stmt_info;
2431 if (vect_print_dump_info (REPORT_DETAILS))
2433 fprintf (vect_dump, "------>SLPing statement: ");
2434 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2437 stmt_info = vinfo_for_stmt (stmt);
2438 gcc_assert (stmt_info);
2440 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2441 if (STMT_SLP_TYPE (stmt_info))
2443 vect_schedule_slp (NULL, bb_vinfo);
2444 break;
2448 mark_sym_for_renaming (gimple_vop (cfun));
2449 /* The memory tags and pointers in vectorized statements need to
2450 have their SSA forms updated. FIXME, why can't this be delayed
2451 until all the loops have been transformed? */
2452 update_ssa (TODO_update_ssa);
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2457 destroy_bb_vec_info (bb_vinfo);