Merged r158465 through r158660 into branch.
[official-gcc.git] / gcc / tree-vect-slp.c
blob6949ebdf8730cd8aa742128513e6eec7841245b9
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
44 LOC
45 find_bb_location (basic_block bb)
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
50 if (!bb)
51 return UNKNOWN_LOC;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
60 return UNKNOWN_LOC;
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66 static void
67 vect_free_slp_tree (slp_tree node)
69 if (!node)
70 return;
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83 free (node);
87 /* Free the memory allocated for the SLP instance. */
89 void
90 vect_free_slp_instance (slp_instance instance)
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
98 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
102 static bool
103 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104 slp_tree slp_node, gimple stmt,
105 VEC (gimple, heap) **def_stmts0,
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
109 tree *first_stmt_def0_type,
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120 stmt_vec_info stmt_info =
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
123 struct loop *loop = NULL;
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131 for (i = 0; i < number_of_oprnds; i++)
133 oprnd = gimple_op (stmt, i + 1);
135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136 &dt[i])
137 || (!def_stmt && dt[i] != vect_constant_def))
139 if (vect_print_dump_info (REPORT_SLP))
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
145 return false;
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
150 pattern. */
151 if (loop && def_stmt && gimple_bb (def_stmt)
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
171 return false;
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178 if (*dt == vect_unknown_def_type)
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
185 switch (gimple_code (def_stmt))
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
202 if (!*first_stmt_dt0)
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
220 else
222 if (!*first_stmt_dt1 && i == 1)
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
230 /* We assume that the stmt contains only one constant
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
234 if (vect_print_dump_info (REPORT_SLP))
235 fprintf (vect_dump, "Build SLP failed: two constant "
236 "oprnds in stmt");
237 return false;
239 *first_stmt_const_oprnd = oprnd;
242 else
244 /* Not first stmt of the group, check that the def-stmt/s match
245 the def-stmt/s of the first stmt. */
246 if ((i == 0
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
249 && !types_compatible_p (*first_stmt_def0_type,
250 TREE_TYPE (def)))))
251 || (i == 1
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
254 && !types_compatible_p (*first_stmt_def1_type,
255 TREE_TYPE (def)))))
256 || (!def
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258 TREE_TYPE (oprnd))))
260 if (vect_print_dump_info (REPORT_SLP))
261 fprintf (vect_dump, "Build SLP failed: different types ");
263 return false;
268 /* Check the types of the definitions. */
269 switch (dt[i])
271 case vect_constant_def:
272 case vect_external_def:
273 break;
275 case vect_internal_def:
276 case vect_reduction_def:
277 if (i == 0)
278 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 else
280 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
281 break;
283 default:
284 /* FORNOW: Not supported. */
285 if (vect_print_dump_info (REPORT_SLP))
287 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
288 print_generic_expr (vect_dump, def, TDF_SLIM);
291 return false;
295 return true;
299 /* Recursively build an SLP tree starting from NODE.
300 Fail (and return FALSE) if def-stmts are not isomorphic, require data
301 permutation or are of unsupported types of operation. Otherwise, return
302 TRUE. */
304 static bool
305 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
306 slp_tree *node, unsigned int group_size,
307 int *inside_cost, int *outside_cost,
308 int ncopies_for_cost, unsigned int *max_nunits,
309 VEC (int, heap) **load_permutation,
310 VEC (slp_tree, heap) **loads,
311 unsigned int vectorization_factor)
313 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
314 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 unsigned int i;
316 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
317 gimple stmt = VEC_index (gimple, stmts, 0);
318 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
319 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
320 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
321 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 tree lhs;
323 bool stop_recursion = false, need_same_oprnds = false;
324 tree vectype, scalar_type, first_op1 = NULL_TREE;
325 unsigned int ncopies;
326 optab optab;
327 int icode;
328 enum machine_mode optab_op2_mode;
329 enum machine_mode vec_mode;
330 tree first_stmt_const_oprnd = NULL_TREE;
331 struct data_reference *first_dr;
332 bool pattern0 = false, pattern1 = false;
333 HOST_WIDE_INT dummy;
334 bool permutation = false;
335 unsigned int load_place;
336 gimple first_load, prev_first_load = NULL;
338 /* For every stmt in NODE find its def stmt/s. */
339 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
341 if (vect_print_dump_info (REPORT_SLP))
343 fprintf (vect_dump, "Build SLP for ");
344 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
347 lhs = gimple_get_lhs (stmt);
348 if (lhs == NULL_TREE)
350 if (vect_print_dump_info (REPORT_SLP))
352 fprintf (vect_dump,
353 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
357 return false;
360 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
361 vectype = get_vectype_for_scalar_type (scalar_type);
362 if (!vectype)
364 if (vect_print_dump_info (REPORT_SLP))
366 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
367 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
369 return false;
372 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
373 if (ncopies != 1)
375 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump, "SLP with multiple types ");
378 /* FORNOW: multiple types are unsupported in BB SLP. */
379 if (bb_vinfo)
380 return false;
383 /* In case of multiple types we need to detect the smallest type. */
384 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
385 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
387 if (is_gimple_call (stmt))
388 rhs_code = CALL_EXPR;
389 else
390 rhs_code = gimple_assign_rhs_code (stmt);
392 /* Check the operation. */
393 if (i == 0)
395 first_stmt_code = rhs_code;
397 /* Shift arguments should be equal in all the packed stmts for a
398 vector shift with scalar shift operand. */
399 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
400 || rhs_code == LROTATE_EXPR
401 || rhs_code == RROTATE_EXPR)
403 vec_mode = TYPE_MODE (vectype);
405 /* First see if we have a vector/vector shift. */
406 optab = optab_for_tree_code (rhs_code, vectype,
407 optab_vector);
409 if (!optab
410 || (optab->handlers[(int) vec_mode].insn_code
411 == CODE_FOR_nothing))
413 /* No vector/vector shift, try for a vector/scalar shift. */
414 optab = optab_for_tree_code (rhs_code, vectype,
415 optab_scalar);
417 if (!optab)
419 if (vect_print_dump_info (REPORT_SLP))
420 fprintf (vect_dump, "Build SLP failed: no optab.");
421 return false;
423 icode = (int) optab->handlers[(int) vec_mode].insn_code;
424 if (icode == CODE_FOR_nothing)
426 if (vect_print_dump_info (REPORT_SLP))
427 fprintf (vect_dump, "Build SLP failed: "
428 "op not supported by target.");
429 return false;
431 optab_op2_mode = insn_data[icode].operand[2].mode;
432 if (!VECTOR_MODE_P (optab_op2_mode))
434 need_same_oprnds = true;
435 first_op1 = gimple_assign_rhs2 (stmt);
440 else
442 if (first_stmt_code != rhs_code
443 && (first_stmt_code != IMAGPART_EXPR
444 || rhs_code != REALPART_EXPR)
445 && (first_stmt_code != REALPART_EXPR
446 || rhs_code != IMAGPART_EXPR))
448 if (vect_print_dump_info (REPORT_SLP))
450 fprintf (vect_dump,
451 "Build SLP failed: different operation in stmt ");
452 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
455 return false;
458 if (need_same_oprnds
459 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
461 if (vect_print_dump_info (REPORT_SLP))
463 fprintf (vect_dump,
464 "Build SLP failed: different shift arguments in ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
468 return false;
472 /* Strided store or load. */
473 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
475 if (REFERENCE_CLASS_P (lhs))
477 /* Store. */
478 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
479 stmt, &def_stmts0, &def_stmts1,
480 &first_stmt_dt0,
481 &first_stmt_dt1,
482 &first_stmt_def0_type,
483 &first_stmt_def1_type,
484 &first_stmt_const_oprnd,
485 ncopies_for_cost,
486 &pattern0, &pattern1))
487 return false;
489 else
491 /* Load. */
492 /* FORNOW: Check that there is no gap between the loads. */
493 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
494 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
495 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
496 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
498 if (vect_print_dump_info (REPORT_SLP))
500 fprintf (vect_dump, "Build SLP failed: strided "
501 "loads have gaps ");
502 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
505 return false;
508 /* Check that the size of interleaved loads group is not
509 greater than the SLP group size. */
510 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
512 if (vect_print_dump_info (REPORT_SLP))
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
520 return false;
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
524 if (prev_first_load)
526 /* Check that there are no loads from different interleaving
527 chains in the same node. The only exception is complex
528 numbers. */
529 if (prev_first_load != first_load
530 && rhs_code != REALPART_EXPR
531 && rhs_code != IMAGPART_EXPR)
533 if (vect_print_dump_info (REPORT_SLP))
535 fprintf (vect_dump, "Build SLP failed: different "
536 "interleaving chains in one node ");
537 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
540 return false;
543 else
544 prev_first_load = first_load;
546 if (first_load == stmt)
548 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
549 if (vect_supportable_dr_alignment (first_dr)
550 == dr_unaligned_unsupported)
552 if (vect_print_dump_info (REPORT_SLP))
554 fprintf (vect_dump, "Build SLP failed: unsupported "
555 "unaligned load ");
556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
559 return false;
562 /* Analyze costs (for the first stmt in the group). */
563 vect_model_load_cost (vinfo_for_stmt (stmt),
564 ncopies_for_cost, *node);
567 /* Store the place of this load in the interleaving chain. In
568 case that permutation is needed we later decide if a specific
569 permutation is supported. */
570 load_place = vect_get_place_in_interleaving_chain (stmt,
571 first_load);
572 if (load_place != i)
573 permutation = true;
575 VEC_safe_push (int, heap, *load_permutation, load_place);
577 /* We stop the tree when we reach a group of loads. */
578 stop_recursion = true;
579 continue;
581 } /* Strided access. */
582 else
584 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
586 /* Not strided load. */
587 if (vect_print_dump_info (REPORT_SLP))
589 fprintf (vect_dump, "Build SLP failed: not strided load ");
590 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
593 /* FORNOW: Not strided loads are not supported. */
594 return false;
597 /* Not memory operation. */
598 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
599 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
601 if (vect_print_dump_info (REPORT_SLP))
603 fprintf (vect_dump, "Build SLP failed: operation");
604 fprintf (vect_dump, " unsupported ");
605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
608 return false;
611 /* Find the def-stmts. */
612 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
613 &def_stmts0, &def_stmts1,
614 &first_stmt_dt0, &first_stmt_dt1,
615 &first_stmt_def0_type,
616 &first_stmt_def1_type,
617 &first_stmt_const_oprnd,
618 ncopies_for_cost,
619 &pattern0, &pattern1))
620 return false;
624 /* Add the costs of the node to the overall instance costs. */
625 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
626 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
628 /* Strided loads were reached - stop the recursion. */
629 if (stop_recursion)
631 if (permutation)
633 VEC_safe_push (slp_tree, heap, *loads, *node);
634 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
637 return true;
640 /* Create SLP_TREE nodes for the definition node/s. */
641 if (first_stmt_dt0 == vect_internal_def)
643 slp_tree left_node = XNEW (struct _slp_tree);
644 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
645 SLP_TREE_VEC_STMTS (left_node) = NULL;
646 SLP_TREE_LEFT (left_node) = NULL;
647 SLP_TREE_RIGHT (left_node) = NULL;
648 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
649 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
650 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
651 inside_cost, outside_cost, ncopies_for_cost,
652 max_nunits, load_permutation, loads,
653 vectorization_factor))
654 return false;
656 SLP_TREE_LEFT (*node) = left_node;
659 if (first_stmt_dt1 == vect_internal_def)
661 slp_tree right_node = XNEW (struct _slp_tree);
662 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
663 SLP_TREE_VEC_STMTS (right_node) = NULL;
664 SLP_TREE_LEFT (right_node) = NULL;
665 SLP_TREE_RIGHT (right_node) = NULL;
666 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
667 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
668 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
669 inside_cost, outside_cost, ncopies_for_cost,
670 max_nunits, load_permutation, loads,
671 vectorization_factor))
672 return false;
674 SLP_TREE_RIGHT (*node) = right_node;
677 return true;
681 static void
682 vect_print_slp_tree (slp_tree node)
684 int i;
685 gimple stmt;
687 if (!node)
688 return;
690 fprintf (vect_dump, "node ");
691 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
693 fprintf (vect_dump, "\n\tstmt %d ", i);
694 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
696 fprintf (vect_dump, "\n");
698 vect_print_slp_tree (SLP_TREE_LEFT (node));
699 vect_print_slp_tree (SLP_TREE_RIGHT (node));
703 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
704 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
705 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
706 stmts in NODE are to be marked. */
708 static void
709 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
711 int i;
712 gimple stmt;
714 if (!node)
715 return;
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
718 if (j < 0 || i == j)
719 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
721 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
722 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
726 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
728 static void
729 vect_mark_slp_stmts_relevant (slp_tree node)
731 int i;
732 gimple stmt;
733 stmt_vec_info stmt_info;
735 if (!node)
736 return;
738 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
740 stmt_info = vinfo_for_stmt (stmt);
741 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
742 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
743 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
746 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
747 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
751 /* Check if the permutation required by the SLP INSTANCE is supported.
752 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
754 static bool
755 vect_supported_slp_permutation_p (slp_instance instance)
757 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
758 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
759 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
760 VEC (slp_tree, heap) *sorted_loads = NULL;
761 int index;
762 slp_tree *tmp_loads = NULL;
763 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
764 slp_tree load;
766 /* FORNOW: The only supported loads permutation is loads from the same
767 location in all the loads in the node, when the data-refs in
768 nodes of LOADS constitute an interleaving chain.
769 Sort the nodes according to the order of accesses in the chain. */
770 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
771 for (i = 0, j = 0;
772 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
773 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
774 i += group_size, j++)
776 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
777 /* Check that the loads are all in the same interleaving chain. */
778 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
780 if (vect_print_dump_info (REPORT_DETAILS))
782 fprintf (vect_dump, "Build SLP failed: unsupported data "
783 "permutation ");
784 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
787 free (tmp_loads);
788 return false;
791 tmp_loads[index] = load;
794 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
795 for (i = 0; i < group_size; i++)
796 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
798 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
799 SLP_INSTANCE_LOADS (instance) = sorted_loads;
800 free (tmp_loads);
802 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
803 SLP_INSTANCE_UNROLLING_FACTOR (instance),
804 instance, true))
805 return false;
807 return true;
811 /* Rearrange the statements of NODE according to PERMUTATION. */
813 static void
814 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
815 VEC (int, heap) *permutation)
817 gimple stmt;
818 VEC (gimple, heap) *tmp_stmts;
819 unsigned int index, i;
821 if (!node)
822 return;
824 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
825 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
827 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
828 tmp_stmts = VEC_alloc (gimple, heap, group_size);
830 for (i = 0; i < group_size; i++)
831 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
833 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
835 index = VEC_index (int, permutation, i);
836 VEC_replace (gimple, tmp_stmts, index, stmt);
839 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
840 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
844 /* Check if the required load permutation is supported.
845 LOAD_PERMUTATION contains a list of indices of the loads.
846 In SLP this permutation is relative to the order of strided stores that are
847 the base of the SLP instance. */
849 static bool
850 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
851 VEC (int, heap) *load_permutation)
853 int i = 0, j, prev = -1, next, k, number_of_groups;
854 bool supported, bad_permutation = false;
855 sbitmap load_index;
856 slp_tree node;
857 gimple stmt;
859 /* FORNOW: permutations are only supported in SLP. */
860 if (!slp_instn)
861 return false;
863 if (vect_print_dump_info (REPORT_SLP))
865 fprintf (vect_dump, "Load permutation ");
866 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
867 fprintf (vect_dump, "%d ", next);
870 /* In case of reduction every load permutation is allowed, since the order
871 of the reduction statements is not important (as opposed to the case of
872 strided stores). The only condition we need to check is that all the
873 load nodes are of the same size and have the same permutation (and then
874 rearrange all the nodes of the SLP instance according to this
875 permutation). */
877 /* Check that all the load nodes are of the same size. */
878 for (i = 0;
879 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
880 i++)
881 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
882 != (unsigned) group_size)
883 return false;
885 node = SLP_INSTANCE_TREE (slp_instn);
886 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
887 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
888 instance, not all the loads belong to the same node or interleaving
889 group. Hence, we need to divide them into groups according to
890 GROUP_SIZE. */
891 number_of_groups = VEC_length (int, load_permutation) / group_size;
893 /* Reduction (there are no data-refs in the root). */
894 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
896 int first_group_load_index;
898 /* Compare all the permutation sequences to the first one. */
899 for (i = 1; i < number_of_groups; i++)
901 k = 0;
902 for (j = i * group_size; j < i * group_size + group_size; j++)
904 next = VEC_index (int, load_permutation, j);
905 first_group_load_index = VEC_index (int, load_permutation, k);
907 if (next != first_group_load_index)
909 bad_permutation = true;
910 break;
913 k++;
916 if (bad_permutation)
917 break;
920 if (!bad_permutation)
922 /* This permutaion is valid for reduction. Since the order of the
923 statements in the nodes is not important unless they are memory
924 accesses, we can rearrange the statements in all the nodes
925 according to the order of the loads. */
926 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
927 load_permutation);
928 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
929 return true;
933 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
934 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
935 well (unless it's reduction). */
936 if (VEC_length (int, load_permutation)
937 != (unsigned int) (group_size * group_size))
938 return false;
940 supported = true;
941 load_index = sbitmap_alloc (group_size);
942 sbitmap_zero (load_index);
943 for (j = 0; j < group_size; j++)
945 for (i = j * group_size, k = 0;
946 VEC_iterate (int, load_permutation, i, next) && k < group_size;
947 i++, k++)
949 if (i != j * group_size && next != prev)
951 supported = false;
952 break;
955 prev = next;
958 if (TEST_BIT (load_index, prev))
960 supported = false;
961 break;
964 SET_BIT (load_index, prev);
967 for (j = 0; j < group_size; j++)
968 if (!TEST_BIT (load_index, j))
969 return false;
971 sbitmap_free (load_index);
973 if (supported && i == group_size * group_size
974 && vect_supported_slp_permutation_p (slp_instn))
975 return true;
977 return false;
981 /* Find the first load in the loop that belongs to INSTANCE.
982 When loads are in several SLP nodes, there can be a case in which the first
983 load does not appear in the first SLP node to be transformed, causing
984 incorrect order of statements. Since we generate all the loads together,
985 they must be inserted before the first load of the SLP instance and not
986 before the first load of the first node of the instance. */
987 static gimple
988 vect_find_first_load_in_slp_instance (slp_instance instance)
990 int i, j;
991 slp_tree load_node;
992 gimple first_load = NULL, load;
994 for (i = 0;
995 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
996 i++)
997 for (j = 0;
998 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
999 j++)
1000 first_load = get_earlier_stmt (load, first_load);
1002 return first_load;
1006 /* Analyze an SLP instance starting from a group of strided stores. Call
1007 vect_build_slp_tree to build a tree of packed stmts if possible.
1008 Return FALSE if it's impossible to SLP any stmt in the loop. */
1010 static bool
1011 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1012 gimple stmt)
1014 slp_instance new_instance;
1015 slp_tree node = XNEW (struct _slp_tree);
1016 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1017 unsigned int unrolling_factor = 1, nunits;
1018 tree vectype, scalar_type = NULL_TREE;
1019 gimple next;
1020 unsigned int vectorization_factor = 0;
1021 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1022 unsigned int max_nunits = 0;
1023 VEC (int, heap) *load_permutation;
1024 VEC (slp_tree, heap) *loads;
1025 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1027 if (dr)
1029 scalar_type = TREE_TYPE (DR_REF (dr));
1030 vectype = get_vectype_for_scalar_type (scalar_type);
1031 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1033 else
1035 gcc_assert (loop_vinfo);
1036 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1037 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1040 if (!vectype)
1042 if (vect_print_dump_info (REPORT_SLP))
1044 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1045 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1048 return false;
1051 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1052 if (loop_vinfo)
1053 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1054 else
1055 /* No multitypes in BB SLP. */
1056 vectorization_factor = nunits;
1058 /* Calculate the unrolling factor. */
1059 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1060 if (unrolling_factor != 1 && !loop_vinfo)
1062 if (vect_print_dump_info (REPORT_SLP))
1063 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1064 " block SLP");
1066 return false;
1069 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1070 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1071 next = stmt;
1072 if (dr)
1074 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1075 while (next)
1077 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1078 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1081 else
1083 /* Collect reduction statements. */
1084 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1085 next);
1086 i++)
1088 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1089 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "pushing reduction into node: ");
1092 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1097 SLP_TREE_VEC_STMTS (node) = NULL;
1098 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1099 SLP_TREE_LEFT (node) = NULL;
1100 SLP_TREE_RIGHT (node) = NULL;
1101 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1102 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1104 /* Calculate the number of vector stmts to create based on the unrolling
1105 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1106 GROUP_SIZE / NUNITS otherwise. */
1107 ncopies_for_cost = unrolling_factor * group_size / nunits;
1109 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1110 loads = VEC_alloc (slp_tree, heap, group_size);
1112 /* Build the tree for the SLP instance. */
1113 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1114 &inside_cost, &outside_cost, ncopies_for_cost,
1115 &max_nunits, &load_permutation, &loads,
1116 vectorization_factor))
1118 /* Create a new SLP instance. */
1119 new_instance = XNEW (struct _slp_instance);
1120 SLP_INSTANCE_TREE (new_instance) = node;
1121 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1122 /* Calculate the unrolling factor based on the smallest type in the
1123 loop. */
1124 if (max_nunits > nunits)
1125 unrolling_factor = least_common_multiple (max_nunits, group_size)
1126 / group_size;
1128 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1129 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1130 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1131 SLP_INSTANCE_LOADS (new_instance) = loads;
1132 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1133 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1134 if (VEC_length (slp_tree, loads))
1136 if (!vect_supported_load_permutation_p (new_instance, group_size,
1137 load_permutation))
1139 if (vect_print_dump_info (REPORT_SLP))
1141 fprintf (vect_dump, "Build SLP failed: unsupported load "
1142 "permutation ");
1143 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1146 vect_free_slp_instance (new_instance);
1147 return false;
1150 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1151 = vect_find_first_load_in_slp_instance (new_instance);
1153 else
1154 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1156 if (loop_vinfo)
1157 VEC_safe_push (slp_instance, heap,
1158 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1159 new_instance);
1160 else
1161 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1162 new_instance);
1164 if (vect_print_dump_info (REPORT_SLP))
1165 vect_print_slp_tree (node);
1167 return true;
1170 /* Failed to SLP. */
1171 /* Free the allocated memory. */
1172 vect_free_slp_tree (node);
1173 VEC_free (int, heap, load_permutation);
1174 VEC_free (slp_tree, heap, loads);
1176 return false;
1180 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1181 trees of packed scalar stmts if SLP is possible. */
1183 bool
1184 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1186 unsigned int i;
1187 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1188 gimple store;
1189 bool ok = false;
1191 if (vect_print_dump_info (REPORT_SLP))
1192 fprintf (vect_dump, "=== vect_analyze_slp ===");
1194 if (loop_vinfo)
1196 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1197 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1199 else
1200 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1202 /* Find SLP sequences starting from groups of strided stores. */
1203 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1204 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1205 ok = true;
1207 if (bb_vinfo && !ok)
1209 if (vect_print_dump_info (REPORT_SLP))
1210 fprintf (vect_dump, "Failed to SLP the basic block.");
1212 return false;
1215 /* Find SLP sequences starting from groups of reductions. */
1216 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1217 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1218 VEC_index (gimple, reductions, 0)))
1219 ok = true;
1221 return true;
1225 /* For each possible SLP instance decide whether to SLP it and calculate overall
1226 unrolling factor needed to SLP the loop. */
1228 void
1229 vect_make_slp_decision (loop_vec_info loop_vinfo)
1231 unsigned int i, unrolling_factor = 1;
1232 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1233 slp_instance instance;
1234 int decided_to_slp = 0;
1236 if (vect_print_dump_info (REPORT_SLP))
1237 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1239 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1241 /* FORNOW: SLP if you can. */
1242 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1243 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1245 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1246 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1247 loop-based vectorization. Such stmts will be marked as HYBRID. */
1248 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1249 decided_to_slp++;
1252 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1254 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1255 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1256 decided_to_slp, unrolling_factor);
1260 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1261 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1263 static void
1264 vect_detect_hybrid_slp_stmts (slp_tree node)
1266 int i;
1267 gimple stmt;
1268 imm_use_iterator imm_iter;
1269 gimple use_stmt;
1270 stmt_vec_info stmt_vinfo;
1272 if (!node)
1273 return;
1275 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1276 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1277 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1278 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1279 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1280 && !STMT_SLP_TYPE (stmt_vinfo)
1281 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1282 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1283 && !(gimple_code (use_stmt) == GIMPLE_PHI
1284 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1285 == vect_reduction_def))
1286 vect_mark_slp_stmts (node, hybrid, i);
1288 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1289 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1293 /* Find stmts that must be both vectorized and SLPed. */
1295 void
1296 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1298 unsigned int i;
1299 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1300 slp_instance instance;
1302 if (vect_print_dump_info (REPORT_SLP))
1303 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1305 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1306 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1310 /* Create and initialize a new bb_vec_info struct for BB, as well as
1311 stmt_vec_info structs for all the stmts in it. */
1313 static bb_vec_info
1314 new_bb_vec_info (basic_block bb)
1316 bb_vec_info res = NULL;
1317 gimple_stmt_iterator gsi;
1319 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1320 BB_VINFO_BB (res) = bb;
1322 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1324 gimple stmt = gsi_stmt (gsi);
1325 gimple_set_uid (stmt, 0);
1326 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1329 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1330 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1332 bb->aux = res;
1333 return res;
1337 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1338 stmts in the basic block. */
1340 static void
1341 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1343 basic_block bb;
1344 gimple_stmt_iterator si;
1346 if (!bb_vinfo)
1347 return;
1349 bb = BB_VINFO_BB (bb_vinfo);
1351 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1353 gimple stmt = gsi_stmt (si);
1354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1356 if (stmt_info)
1357 /* Free stmt_vec_info. */
1358 free_stmt_vec_info (stmt);
1361 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1362 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1363 free (bb_vinfo);
1364 bb->aux = NULL;
1368 /* Analyze statements contained in SLP tree node after recursively analyzing
1369 the subtree. Return TRUE if the operations are supported. */
1371 static bool
1372 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1374 bool dummy;
1375 int i;
1376 gimple stmt;
1378 if (!node)
1379 return true;
1381 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1382 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1383 return false;
1385 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388 gcc_assert (stmt_info);
1389 gcc_assert (PURE_SLP_STMT (stmt_info));
1391 if (!vect_analyze_stmt (stmt, &dummy, node))
1392 return false;
1395 return true;
1399 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1400 operations are supported. */
1402 static bool
1403 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1405 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1406 slp_instance instance;
1407 int i;
1409 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1411 if (!vect_slp_analyze_node_operations (bb_vinfo,
1412 SLP_INSTANCE_TREE (instance)))
1414 vect_free_slp_instance (instance);
1415 VEC_ordered_remove (slp_instance, slp_instances, i);
1417 else
1418 i++;
1421 if (!VEC_length (slp_instance, slp_instances))
1422 return false;
1424 return true;
1428 /* Cheick if the basic block can be vectorized. */
1430 bb_vec_info
1431 vect_slp_analyze_bb (basic_block bb)
1433 bb_vec_info bb_vinfo;
1434 VEC (ddr_p, heap) *ddrs;
1435 VEC (slp_instance, heap) *slp_instances;
1436 slp_instance instance;
1437 int i, insns = 0;
1438 gimple_stmt_iterator gsi;
1439 int min_vf = 2;
1440 int max_vf = MAX_VECTORIZATION_FACTOR;
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1447 gimple stmt = gsi_stmt (gsi);
1448 if (!is_gimple_debug (stmt)
1449 && !gimple_nop_p (stmt)
1450 && gimple_code (stmt) != GIMPLE_LABEL)
1451 insns++;
1454 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1457 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1458 "block.\n");
1460 return NULL;
1463 bb_vinfo = new_bb_vec_info (bb);
1464 if (!bb_vinfo)
1465 return NULL;
1467 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1470 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1471 "block.\n");
1473 destroy_bb_vec_info (bb_vinfo);
1474 return NULL;
1477 ddrs = BB_VINFO_DDRS (bb_vinfo);
1478 if (!VEC_length (ddr_p, ddrs))
1480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1481 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1482 "block.\n");
1484 destroy_bb_vec_info (bb_vinfo);
1485 return NULL;
1488 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1489 || min_vf > max_vf)
1491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1492 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1493 "in basic block.\n");
1495 destroy_bb_vec_info (bb_vinfo);
1496 return NULL;
1499 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1502 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1503 "block.\n");
1505 destroy_bb_vec_info (bb_vinfo);
1506 return NULL;
1509 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1512 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1513 "block.\n");
1515 destroy_bb_vec_info (bb_vinfo);
1516 return NULL;
1519 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1522 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1523 "block.\n");
1525 destroy_bb_vec_info (bb_vinfo);
1526 return NULL;
1529 /* Check the SLP opportunities in the basic block, analyze and build SLP
1530 trees. */
1531 if (!vect_analyze_slp (NULL, bb_vinfo))
1533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1534 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1535 "in basic block.\n");
1537 destroy_bb_vec_info (bb_vinfo);
1538 return NULL;
1541 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1543 /* Mark all the statements that we want to vectorize as pure SLP and
1544 relevant. */
1545 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1547 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1548 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1551 if (!vect_slp_analyze_operations (bb_vinfo))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1554 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1556 destroy_bb_vec_info (bb_vinfo);
1557 return NULL;
1560 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1563 return bb_vinfo;
1567 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1568 the number of created vector stmts depends on the unrolling factor). However,
1569 the actual number of vector stmts for every SLP node depends on VF which is
1570 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1571 In this function we assume that the inside costs calculated in
1572 vect_model_xxx_cost are linear in ncopies. */
1574 void
1575 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1577 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1578 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1579 slp_instance instance;
1581 if (vect_print_dump_info (REPORT_SLP))
1582 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1584 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1585 /* We assume that costs are linear in ncopies. */
1586 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1587 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1591 /* For constant and loop invariant defs of SLP_NODE this function returns
1592 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1593 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1594 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1595 REDUC_INDEX is the index of the reduction operand in the statements, unless
1596 it is -1. */
1598 static void
1599 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1600 unsigned int op_num, unsigned int number_of_vectors,
1601 int reduc_index)
1603 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1604 gimple stmt = VEC_index (gimple, stmts, 0);
1605 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1606 int nunits;
1607 tree vec_cst;
1608 tree t = NULL_TREE;
1609 int j, number_of_places_left_in_vector;
1610 tree vector_type;
1611 tree op, vop;
1612 int group_size = VEC_length (gimple, stmts);
1613 unsigned int vec_num, i;
1614 int number_of_copies = 1;
1615 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1616 bool constant_p, is_store;
1617 tree neutral_op = NULL;
1619 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1621 enum tree_code code = gimple_assign_rhs_code (stmt);
1622 if (reduc_index == -1)
1624 VEC_free (tree, heap, *vec_oprnds);
1625 return;
1628 op_num = reduc_index - 1;
1629 op = gimple_op (stmt, op_num + 1);
1630 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1631 we need either neutral operands or the original operands. See
1632 get_initial_def_for_reduction() for details. */
1633 switch (code)
1635 case WIDEN_SUM_EXPR:
1636 case DOT_PROD_EXPR:
1637 case PLUS_EXPR:
1638 case MINUS_EXPR:
1639 case BIT_IOR_EXPR:
1640 case BIT_XOR_EXPR:
1641 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1642 neutral_op = build_real (TREE_TYPE (op), dconst0);
1643 else
1644 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1646 break;
1648 case MULT_EXPR:
1649 case BIT_AND_EXPR:
1650 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1651 neutral_op = build_real (TREE_TYPE (op), dconst1);
1652 else
1653 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1655 break;
1657 default:
1658 neutral_op = NULL;
1662 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1664 is_store = true;
1665 op = gimple_assign_rhs1 (stmt);
1667 else
1669 is_store = false;
1670 op = gimple_op (stmt, op_num + 1);
1673 if (CONSTANT_CLASS_P (op))
1674 constant_p = true;
1675 else
1676 constant_p = false;
1678 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1679 gcc_assert (vector_type);
1681 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1683 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1684 created vectors. It is greater than 1 if unrolling is performed.
1686 For example, we have two scalar operands, s1 and s2 (e.g., group of
1687 strided accesses of size two), while NUNITS is four (i.e., four scalars
1688 of this type can be packed in a vector). The output vector will contain
1689 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1690 will be 2).
1692 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1693 containing the operands.
1695 For example, NUNITS is four as before, and the group size is 8
1696 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1697 {s5, s6, s7, s8}. */
1699 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1701 number_of_places_left_in_vector = nunits;
1702 for (j = 0; j < number_of_copies; j++)
1704 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1706 if (is_store)
1707 op = gimple_assign_rhs1 (stmt);
1708 else
1709 op = gimple_op (stmt, op_num + 1);
1711 if (reduc_index != -1)
1713 struct loop *loop = (gimple_bb (stmt))->loop_father;
1714 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1716 gcc_assert (loop);
1717 /* Get the def before the loop. */
1718 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1719 loop_preheader_edge (loop));
1720 if (j != (number_of_copies - 1) && neutral_op)
1721 op = neutral_op;
1724 /* Create 'vect_ = {op0,op1,...,opn}'. */
1725 t = tree_cons (NULL_TREE, op, t);
1727 number_of_places_left_in_vector--;
1729 if (number_of_places_left_in_vector == 0)
1731 number_of_places_left_in_vector = nunits;
1733 if (constant_p)
1734 vec_cst = build_vector (vector_type, t);
1735 else
1736 vec_cst = build_constructor_from_list (vector_type, t);
1737 VEC_quick_push (tree, voprnds,
1738 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1739 t = NULL_TREE;
1744 /* Since the vectors are created in the reverse order, we should invert
1745 them. */
1746 vec_num = VEC_length (tree, voprnds);
1747 for (j = vec_num - 1; j >= 0; j--)
1749 vop = VEC_index (tree, voprnds, j);
1750 VEC_quick_push (tree, *vec_oprnds, vop);
1753 VEC_free (tree, heap, voprnds);
1755 /* In case that VF is greater than the unrolling factor needed for the SLP
1756 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1757 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1758 to replicate the vectors. */
1759 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1761 tree neutral_vec = NULL;
1763 if (neutral_op)
1765 if (!neutral_vec)
1767 t = NULL;
1768 for (i = 0; i < (unsigned) nunits; i++)
1769 t = tree_cons (NULL_TREE, neutral_op, t);
1770 neutral_vec = build_vector (vector_type, t);
1773 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1775 else
1777 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1778 VEC_quick_push (tree, *vec_oprnds, vop);
1784 /* Get vectorized definitions from SLP_NODE that contains corresponding
1785 vectorized def-stmts. */
1787 static void
1788 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1790 tree vec_oprnd;
1791 gimple vec_def_stmt;
1792 unsigned int i;
1794 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1796 for (i = 0;
1797 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1798 i++)
1800 gcc_assert (vec_def_stmt);
1801 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1802 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1807 /* Get vectorized definitions for SLP_NODE.
1808 If the scalar definitions are loop invariants or constants, collect them and
1809 call vect_get_constant_vectors() to create vector stmts.
1810 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1811 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1812 vect_get_slp_vect_defs() to retrieve them.
1813 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1814 the right node. This is used when the second operand must remain scalar. */
1816 void
1817 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1818 VEC (tree,heap) **vec_oprnds1, int reduc_index)
1820 gimple first_stmt;
1821 enum tree_code code;
1822 int number_of_vects;
1823 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1825 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1826 /* The number of vector defs is determined by the number of vector statements
1827 in the node from which we get those statements. */
1828 if (SLP_TREE_LEFT (slp_node))
1829 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1830 else
1832 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1833 /* Number of vector stmts was calculated according to LHS in
1834 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1835 necessary. See vect_get_smallest_scalar_type() for details. */
1836 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1837 &rhs_size_unit);
1838 if (rhs_size_unit != lhs_size_unit)
1840 number_of_vects *= rhs_size_unit;
1841 number_of_vects /= lhs_size_unit;
1845 /* Allocate memory for vectorized defs. */
1846 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1848 /* SLP_NODE corresponds either to a group of stores or to a group of
1849 unary/binary operations. We don't call this function for loads.
1850 For reduction defs we call vect_get_constant_vectors(), since we are
1851 looking for initial loop invariant values. */
1852 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
1853 /* The defs are already vectorized. */
1854 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1855 else
1856 /* Build vectors from scalar defs. */
1857 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1858 reduc_index);
1860 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1861 /* Since we don't call this function with loads, this is a group of
1862 stores. */
1863 return;
1865 /* For reductions, we only need initial values. */
1866 if (reduc_index != -1)
1867 return;
1869 code = gimple_assign_rhs_code (first_stmt);
1870 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1871 return;
1873 /* The number of vector defs is determined by the number of vector statements
1874 in the node from which we get those statements. */
1875 if (SLP_TREE_RIGHT (slp_node))
1876 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1877 else
1878 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1880 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1882 if (SLP_TREE_RIGHT (slp_node))
1883 /* The defs are already vectorized. */
1884 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1885 else
1886 /* Build vectors from scalar defs. */
1887 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
1891 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1892 building a vector of type MASK_TYPE from it) and two input vectors placed in
1893 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1894 shifting by STRIDE elements of DR_CHAIN for every copy.
1895 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1896 copies).
1897 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1898 the created stmts must be inserted. */
1900 static inline void
1901 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1902 tree mask, int first_vec_indx, int second_vec_indx,
1903 gimple_stmt_iterator *gsi, slp_tree node,
1904 tree builtin_decl, tree vectype,
1905 VEC(tree,heap) *dr_chain,
1906 int ncopies, int vect_stmts_counter)
1908 tree perm_dest;
1909 gimple perm_stmt = NULL;
1910 stmt_vec_info next_stmt_info;
1911 int i, stride;
1912 tree first_vec, second_vec, data_ref;
1913 VEC (tree, heap) *params = NULL;
1915 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1917 /* Initialize the vect stmts of NODE to properly insert the generated
1918 stmts later. */
1919 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1920 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1921 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1923 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1924 for (i = 0; i < ncopies; i++)
1926 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1927 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1929 /* Build argument list for the vectorized call. */
1930 VEC_free (tree, heap, params);
1931 params = VEC_alloc (tree, heap, 3);
1932 VEC_quick_push (tree, params, first_vec);
1933 VEC_quick_push (tree, params, second_vec);
1934 VEC_quick_push (tree, params, mask);
1936 /* Generate the permute statement. */
1937 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1938 data_ref = make_ssa_name (perm_dest, perm_stmt);
1939 gimple_call_set_lhs (perm_stmt, data_ref);
1940 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1942 /* Store the vector statement in NODE. */
1943 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1944 stride * i + vect_stmts_counter, perm_stmt);
1946 first_vec_indx += stride;
1947 second_vec_indx += stride;
1950 /* Mark the scalar stmt as vectorized. */
1951 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1952 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1956 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1957 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1958 representation. Check that the mask is valid and return FALSE if not.
1959 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1960 the next vector, i.e., the current first vector is not needed. */
1962 static bool
1963 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1964 int mask_nunits, bool only_one_vec, int index,
1965 int *mask, int *current_mask_element,
1966 bool *need_next_vector)
1968 int i;
1969 static int number_of_mask_fixes = 1;
1970 static bool mask_fixed = false;
1971 static bool needs_first_vector = false;
1973 /* Convert to target specific representation. */
1974 *current_mask_element = first_mask_element + m;
1975 /* Adjust the value in case it's a mask for second and third vectors. */
1976 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1978 if (*current_mask_element < mask_nunits)
1979 needs_first_vector = true;
1981 /* We have only one input vector to permute but the mask accesses values in
1982 the next vector as well. */
1983 if (only_one_vec && *current_mask_element >= mask_nunits)
1985 if (vect_print_dump_info (REPORT_DETAILS))
1987 fprintf (vect_dump, "permutation requires at least two vectors ");
1988 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1991 return false;
1994 /* The mask requires the next vector. */
1995 if (*current_mask_element >= mask_nunits * 2)
1997 if (needs_first_vector || mask_fixed)
1999 /* We either need the first vector too or have already moved to the
2000 next vector. In both cases, this permutation needs three
2001 vectors. */
2002 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "permutation requires at "
2005 "least three vectors ");
2006 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2009 return false;
2012 /* We move to the next vector, dropping the first one and working with
2013 the second and the third - we need to adjust the values of the mask
2014 accordingly. */
2015 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2017 for (i = 0; i < index; i++)
2018 mask[i] -= mask_nunits * number_of_mask_fixes;
2020 (number_of_mask_fixes)++;
2021 mask_fixed = true;
2024 *need_next_vector = mask_fixed;
2026 /* This was the last element of this mask. Start a new one. */
2027 if (index == mask_nunits - 1)
2029 number_of_mask_fixes = 1;
2030 mask_fixed = false;
2031 needs_first_vector = false;
2034 return true;
2038 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2039 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2040 permute statements for SLP_NODE_INSTANCE. */
2041 bool
2042 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2043 gimple_stmt_iterator *gsi, int vf,
2044 slp_instance slp_node_instance, bool analyze_only)
2046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2047 tree mask_element_type = NULL_TREE, mask_type;
2048 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2049 slp_tree node;
2050 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2051 gimple next_scalar_stmt;
2052 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2053 int first_mask_element;
2054 int index, unroll_factor, *mask, current_mask_element, ncopies;
2055 bool only_one_vec = false, need_next_vector = false;
2056 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2058 if (!targetm.vectorize.builtin_vec_perm)
2060 if (vect_print_dump_info (REPORT_DETAILS))
2062 fprintf (vect_dump, "no builtin for vect permute for ");
2063 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2066 return false;
2069 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2070 &mask_element_type);
2071 if (!builtin_decl || !mask_element_type)
2073 if (vect_print_dump_info (REPORT_DETAILS))
2075 fprintf (vect_dump, "no builtin for vect permute for ");
2076 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2079 return false;
2082 mask_type = get_vectype_for_scalar_type (mask_element_type);
2083 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2084 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2085 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2086 scale = mask_nunits / nunits;
2087 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2089 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2090 unrolling factor. */
2091 orig_vec_stmts_num = group_size *
2092 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2093 if (orig_vec_stmts_num == 1)
2094 only_one_vec = true;
2096 /* Number of copies is determined by the final vectorization factor
2097 relatively to SLP_NODE_INSTANCE unrolling factor. */
2098 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2100 /* Generate permutation masks for every NODE. Number of masks for each NODE
2101 is equal to GROUP_SIZE.
2102 E.g., we have a group of three nodes with three loads from the same
2103 location in each node, and the vector size is 4. I.e., we have a
2104 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2105 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2106 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2109 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2110 scpecific type, e.g., in bytes for Altivec.
2111 The last mask is illegal since we assume two operands for permute
2112 operation, and the mask element values can't be outside that range. Hence,
2113 the last mask must be converted into {2,5,5,5}.
2114 For the first two permutations we need the first and the second input
2115 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2116 we need the second and the third vectors: {b1,c1,a2,b2} and
2117 {c2,a3,b3,c3}. */
2119 for (i = 0;
2120 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2121 i, node);
2122 i++)
2124 scalar_index = 0;
2125 index = 0;
2126 vect_stmts_counter = 0;
2127 vec_index = 0;
2128 first_vec_index = vec_index++;
2129 if (only_one_vec)
2130 second_vec_index = first_vec_index;
2131 else
2132 second_vec_index = vec_index++;
2134 for (j = 0; j < unroll_factor; j++)
2136 for (k = 0; k < group_size; k++)
2138 first_mask_element = (i + j * group_size) * scale;
2139 for (m = 0; m < scale; m++)
2141 if (!vect_get_mask_element (stmt, first_mask_element, m,
2142 mask_nunits, only_one_vec, index, mask,
2143 &current_mask_element, &need_next_vector))
2144 return false;
2146 mask[index++] = current_mask_element;
2149 if (index == mask_nunits)
2151 tree mask_vec = NULL;
2153 while (--index >= 0)
2155 tree t = build_int_cst (mask_element_type, mask[index]);
2156 mask_vec = tree_cons (NULL, t, mask_vec);
2158 mask_vec = build_vector (mask_type, mask_vec);
2159 index = 0;
2161 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2162 mask_vec))
2164 if (vect_print_dump_info (REPORT_DETAILS))
2166 fprintf (vect_dump, "unsupported vect permute ");
2167 print_generic_expr (vect_dump, mask_vec, 0);
2169 free (mask);
2170 return false;
2173 if (!analyze_only)
2175 if (need_next_vector)
2177 first_vec_index = second_vec_index;
2178 second_vec_index = vec_index;
2181 next_scalar_stmt = VEC_index (gimple,
2182 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2184 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2185 mask_vec, first_vec_index, second_vec_index,
2186 gsi, node, builtin_decl, vectype, dr_chain,
2187 ncopies, vect_stmts_counter++);
2194 free (mask);
2195 return true;
2200 /* Vectorize SLP instance tree in postorder. */
2202 static bool
2203 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2204 unsigned int vectorization_factor)
2206 gimple stmt;
2207 bool strided_store, is_store;
2208 gimple_stmt_iterator si;
2209 stmt_vec_info stmt_info;
2210 unsigned int vec_stmts_size, nunits, group_size;
2211 tree vectype;
2212 int i;
2213 slp_tree loads_node;
2215 if (!node)
2216 return false;
2218 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2219 vectorization_factor);
2220 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2221 vectorization_factor);
2223 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2224 stmt_info = vinfo_for_stmt (stmt);
2226 /* VECTYPE is the type of the destination. */
2227 vectype = STMT_VINFO_VECTYPE (stmt_info);
2228 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2229 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2231 /* For each SLP instance calculate number of vector stmts to be created
2232 for the scalar stmts in each node of the SLP tree. Number of vector
2233 elements in one vector iteration is the number of scalar elements in
2234 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2235 size. */
2236 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2238 /* In case of load permutation we have to allocate vectorized statements for
2239 all the nodes that participate in that permutation. */
2240 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2242 for (i = 0;
2243 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2244 i++)
2246 if (!SLP_TREE_VEC_STMTS (loads_node))
2248 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2249 vec_stmts_size);
2250 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2255 if (!SLP_TREE_VEC_STMTS (node))
2257 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2258 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2261 if (vect_print_dump_info (REPORT_DETAILS))
2263 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2264 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2267 /* Loads should be inserted before the first load. */
2268 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2269 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2270 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2271 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2272 else
2273 si = gsi_for_stmt (stmt);
2275 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2276 return is_store;
2280 bool
2281 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2283 VEC (slp_instance, heap) *slp_instances;
2284 slp_instance instance;
2285 unsigned int i, vf;
2286 bool is_store = false;
2288 if (loop_vinfo)
2290 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2291 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2293 else
2295 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2296 vf = 1;
2299 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2301 /* Schedule the tree of INSTANCE. */
2302 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2303 instance, vf);
2304 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2305 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2306 fprintf (vect_dump, "vectorizing stmts using SLP.");
2309 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2311 slp_tree root = SLP_INSTANCE_TREE (instance);
2312 gimple store;
2313 unsigned int j;
2314 gimple_stmt_iterator gsi;
2316 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2317 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2319 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2320 break;
2322 /* Free the attached stmt_vec_info and remove the stmt. */
2323 gsi = gsi_for_stmt (store);
2324 gsi_remove (&gsi, true);
2325 free_stmt_vec_info (store);
2329 return is_store;
2333 /* Vectorize the basic block. */
2335 void
2336 vect_slp_transform_bb (basic_block bb)
2338 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2339 gimple_stmt_iterator si;
2341 gcc_assert (bb_vinfo);
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "SLPing BB\n");
2346 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2348 gimple stmt = gsi_stmt (si);
2349 stmt_vec_info stmt_info;
2351 if (vect_print_dump_info (REPORT_DETAILS))
2353 fprintf (vect_dump, "------>SLPing statement: ");
2354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2357 stmt_info = vinfo_for_stmt (stmt);
2358 gcc_assert (stmt_info);
2360 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2361 if (STMT_SLP_TYPE (stmt_info))
2363 vect_schedule_slp (NULL, bb_vinfo);
2364 break;
2368 mark_sym_for_renaming (gimple_vop (cfun));
2369 /* The memory tags and pointers in vectorized statements need to
2370 have their SSA forms updated. FIXME, why can't this be delayed
2371 until all the loops have been transformed? */
2372 update_ssa (TODO_update_ssa);
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2377 destroy_bb_vec_info (bb_vinfo);