expmed.c (expand_divmod): Always use a comparison for a division by a large unsigned...
[official-gcc.git] / gcc / tree-vect-slp.c
blob058438dc5e988ce50bff95652b20e19f6268d3f0
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
43 static void
44 vect_free_slp_tree (slp_tree node)
46 if (!node)
47 return;
49 if (SLP_TREE_LEFT (node))
50 vect_free_slp_tree (SLP_TREE_LEFT (node));
52 if (SLP_TREE_RIGHT (node))
53 vect_free_slp_tree (SLP_TREE_RIGHT (node));
55 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
57 if (SLP_TREE_VEC_STMTS (node))
58 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
60 free (node);
64 /* Free the memory allocated for the SLP instance. */
66 void
67 vect_free_slp_instance (slp_instance instance)
69 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
70 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
71 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
75 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
76 they are of a legal type and that they match the defs of the first stmt of
77 the SLP group (stored in FIRST_STMT_...). */
79 static bool
80 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
81 gimple stmt, VEC (gimple, heap) **def_stmts0,
82 VEC (gimple, heap) **def_stmts1,
83 enum vect_def_type *first_stmt_dt0,
84 enum vect_def_type *first_stmt_dt1,
85 tree *first_stmt_def0_type,
86 tree *first_stmt_def1_type,
87 tree *first_stmt_const_oprnd,
88 int ncopies_for_cost,
89 bool *pattern0, bool *pattern1)
91 tree oprnd;
92 unsigned int i, number_of_oprnds;
93 tree def;
94 gimple def_stmt;
95 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
96 stmt_vec_info stmt_info =
97 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
98 enum gimple_rhs_class rhs_class;
99 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
101 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
102 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
104 for (i = 0; i < number_of_oprnds; i++)
106 oprnd = gimple_op (stmt, i + 1);
108 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
109 || (!def_stmt && dt[i] != vect_constant_def))
111 if (vect_print_dump_info (REPORT_SLP))
113 fprintf (vect_dump, "Build SLP failed: can't find def for ");
114 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
117 return false;
120 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
121 the pattern. Check that all the stmts of the node are in the
122 pattern. */
123 if (def_stmt && gimple_bb (def_stmt)
124 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
125 && vinfo_for_stmt (def_stmt)
126 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
128 if (!*first_stmt_dt0)
129 *pattern0 = true;
130 else
132 if (i == 1 && !*first_stmt_dt1)
133 *pattern1 = true;
134 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
136 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "Build SLP failed: some of the stmts"
139 " are in a pattern, and others are not ");
140 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 return false;
147 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
148 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
150 if (*dt == vect_unknown_def_type)
152 if (vect_print_dump_info (REPORT_DETAILS))
153 fprintf (vect_dump, "Unsupported pattern.");
154 return false;
157 switch (gimple_code (def_stmt))
159 case GIMPLE_PHI:
160 def = gimple_phi_result (def_stmt);
161 break;
163 case GIMPLE_ASSIGN:
164 def = gimple_assign_lhs (def_stmt);
165 break;
167 default:
168 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "unsupported defining stmt: ");
170 return false;
174 if (!*first_stmt_dt0)
176 /* op0 of the first stmt of the group - store its info. */
177 *first_stmt_dt0 = dt[i];
178 if (def)
179 *first_stmt_def0_type = TREE_TYPE (def);
180 else
181 *first_stmt_const_oprnd = oprnd;
183 /* Analyze costs (for the first stmt of the group only). */
184 if (rhs_class != GIMPLE_SINGLE_RHS)
185 /* Not memory operation (we don't call this functions for loads). */
186 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
187 else
188 /* Store. */
189 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
192 else
194 if (!*first_stmt_dt1 && i == 1)
196 /* op1 of the first stmt of the group - store its info. */
197 *first_stmt_dt1 = dt[i];
198 if (def)
199 *first_stmt_def1_type = TREE_TYPE (def);
200 else
202 /* We assume that the stmt contains only one constant
203 operand. We fail otherwise, to be on the safe side. */
204 if (*first_stmt_const_oprnd)
206 if (vect_print_dump_info (REPORT_SLP))
207 fprintf (vect_dump, "Build SLP failed: two constant "
208 "oprnds in stmt");
209 return false;
211 *first_stmt_const_oprnd = oprnd;
214 else
216 /* Not first stmt of the group, check that the def-stmt/s match
217 the def-stmt/s of the first stmt. */
218 if ((i == 0
219 && (*first_stmt_dt0 != dt[i]
220 || (*first_stmt_def0_type && def
221 && *first_stmt_def0_type != TREE_TYPE (def))))
222 || (i == 1
223 && (*first_stmt_dt1 != dt[i]
224 || (*first_stmt_def1_type && def
225 && *first_stmt_def1_type != TREE_TYPE (def))))
226 || (!def
227 && TREE_TYPE (*first_stmt_const_oprnd)
228 != TREE_TYPE (oprnd)))
230 if (vect_print_dump_info (REPORT_SLP))
231 fprintf (vect_dump, "Build SLP failed: different types ");
233 return false;
238 /* Check the types of the definitions. */
239 switch (dt[i])
241 case vect_constant_def:
242 case vect_invariant_def:
243 break;
245 case vect_loop_def:
246 if (i == 0)
247 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
248 else
249 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
250 break;
252 default:
253 /* FORNOW: Not supported. */
254 if (vect_print_dump_info (REPORT_SLP))
256 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
257 print_generic_expr (vect_dump, def, TDF_SLIM);
260 return false;
264 return true;
268 /* Recursively build an SLP tree starting from NODE.
269 Fail (and return FALSE) if def-stmts are not isomorphic, require data
270 permutation or are of unsupported types of operation. Otherwise, return
271 TRUE. */
273 static bool
274 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
275 unsigned int group_size,
276 int *inside_cost, int *outside_cost,
277 int ncopies_for_cost, unsigned int *max_nunits,
278 VEC (int, heap) **load_permutation,
279 VEC (slp_tree, heap) **loads)
281 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
282 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
283 unsigned int i;
284 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
285 gimple stmt = VEC_index (gimple, stmts, 0);
286 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
287 enum tree_code first_stmt_code = 0, rhs_code;
288 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
289 tree lhs;
290 bool stop_recursion = false, need_same_oprnds = false;
291 tree vectype, scalar_type, first_op1 = NULL_TREE;
292 unsigned int vectorization_factor = 0, ncopies;
293 optab optab;
294 int icode;
295 enum machine_mode optab_op2_mode;
296 enum machine_mode vec_mode;
297 tree first_stmt_const_oprnd = NULL_TREE;
298 struct data_reference *first_dr;
299 bool pattern0 = false, pattern1 = false;
300 HOST_WIDE_INT dummy;
301 bool permutation = false;
302 unsigned int load_place;
303 gimple first_load;
305 /* For every stmt in NODE find its def stmt/s. */
306 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
308 if (vect_print_dump_info (REPORT_SLP))
310 fprintf (vect_dump, "Build SLP for ");
311 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
314 lhs = gimple_get_lhs (stmt);
315 if (lhs == NULL_TREE)
317 if (vect_print_dump_info (REPORT_SLP))
319 fprintf (vect_dump,
320 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
321 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
324 return false;
327 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
328 vectype = get_vectype_for_scalar_type (scalar_type);
329 if (!vectype)
331 if (vect_print_dump_info (REPORT_SLP))
333 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
334 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
336 return false;
339 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
340 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
341 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
342 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
343 fprintf (vect_dump, "SLP with multiple types ");
345 /* In case of multiple types we need to detect the smallest type. */
346 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
347 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
349 if (is_gimple_call (stmt))
350 rhs_code = CALL_EXPR;
351 else
352 rhs_code = gimple_assign_rhs_code (stmt);
354 /* Check the operation. */
355 if (i == 0)
357 first_stmt_code = rhs_code;
359 /* Shift arguments should be equal in all the packed stmts for a
360 vector shift with scalar shift operand. */
361 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
362 || rhs_code == LROTATE_EXPR
363 || rhs_code == RROTATE_EXPR)
365 vec_mode = TYPE_MODE (vectype);
367 /* First see if we have a vector/vector shift. */
368 optab = optab_for_tree_code (rhs_code, vectype,
369 optab_vector);
371 if (!optab
372 || (optab->handlers[(int) vec_mode].insn_code
373 == CODE_FOR_nothing))
375 /* No vector/vector shift, try for a vector/scalar shift. */
376 optab = optab_for_tree_code (rhs_code, vectype,
377 optab_scalar);
379 if (!optab)
381 if (vect_print_dump_info (REPORT_SLP))
382 fprintf (vect_dump, "Build SLP failed: no optab.");
383 return false;
385 icode = (int) optab->handlers[(int) vec_mode].insn_code;
386 if (icode == CODE_FOR_nothing)
388 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP failed: "
390 "op not supported by target.");
391 return false;
393 optab_op2_mode = insn_data[icode].operand[2].mode;
394 if (!VECTOR_MODE_P (optab_op2_mode))
396 need_same_oprnds = true;
397 first_op1 = gimple_assign_rhs2 (stmt);
402 else
404 if (first_stmt_code != rhs_code
405 && (first_stmt_code != IMAGPART_EXPR
406 || rhs_code != REALPART_EXPR)
407 && (first_stmt_code != REALPART_EXPR
408 || rhs_code != IMAGPART_EXPR))
410 if (vect_print_dump_info (REPORT_SLP))
412 fprintf (vect_dump,
413 "Build SLP failed: different operation in stmt ");
414 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
417 return false;
420 if (need_same_oprnds
421 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
423 if (vect_print_dump_info (REPORT_SLP))
425 fprintf (vect_dump,
426 "Build SLP failed: different shift arguments in ");
427 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
430 return false;
434 /* Strided store or load. */
435 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
437 if (REFERENCE_CLASS_P (lhs))
439 /* Store. */
440 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
441 &def_stmts0, &def_stmts1,
442 &first_stmt_dt0,
443 &first_stmt_dt1,
444 &first_stmt_def0_type,
445 &first_stmt_def1_type,
446 &first_stmt_const_oprnd,
447 ncopies_for_cost,
448 &pattern0, &pattern1))
449 return false;
451 else
453 /* Load. */
454 /* FORNOW: Check that there is no gap between the loads. */
455 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
456 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
457 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
458 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
460 if (vect_print_dump_info (REPORT_SLP))
462 fprintf (vect_dump, "Build SLP failed: strided "
463 "loads have gaps ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
467 return false;
470 /* Check that the size of interleaved loads group is not
471 greater than the SLP group size. */
472 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
473 > ncopies * group_size)
475 if (vect_print_dump_info (REPORT_SLP))
477 fprintf (vect_dump, "Build SLP failed: the number of "
478 "interleaved loads is greater than"
479 " the SLP group size ");
480 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
483 return false;
486 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
488 if (first_load == stmt)
490 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
491 if (vect_supportable_dr_alignment (first_dr)
492 == dr_unaligned_unsupported)
494 if (vect_print_dump_info (REPORT_SLP))
496 fprintf (vect_dump, "Build SLP failed: unsupported "
497 "unaligned load ");
498 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
501 return false;
504 /* Analyze costs (for the first stmt in the group). */
505 vect_model_load_cost (vinfo_for_stmt (stmt),
506 ncopies_for_cost, *node);
509 /* Store the place of this load in the interleaving chain. In
510 case that permutation is needed we later decide if a specific
511 permutation is supported. */
512 load_place = vect_get_place_in_interleaving_chain (stmt,
513 first_load);
514 if (load_place != i)
515 permutation = true;
517 VEC_safe_push (int, heap, *load_permutation, load_place);
519 /* We stop the tree when we reach a group of loads. */
520 stop_recursion = true;
521 continue;
523 } /* Strided access. */
524 else
526 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
528 /* Not strided load. */
529 if (vect_print_dump_info (REPORT_SLP))
531 fprintf (vect_dump, "Build SLP failed: not strided load ");
532 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
535 /* FORNOW: Not strided loads are not supported. */
536 return false;
539 /* Not memory operation. */
540 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
541 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
543 if (vect_print_dump_info (REPORT_SLP))
545 fprintf (vect_dump, "Build SLP failed: operation");
546 fprintf (vect_dump, " unsupported ");
547 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
550 return false;
553 /* Find the def-stmts. */
554 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
555 &def_stmts0, &def_stmts1,
556 &first_stmt_dt0, &first_stmt_dt1,
557 &first_stmt_def0_type,
558 &first_stmt_def1_type,
559 &first_stmt_const_oprnd,
560 ncopies_for_cost,
561 &pattern0, &pattern1))
562 return false;
566 /* Add the costs of the node to the overall instance costs. */
567 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
568 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
570 /* Strided loads were reached - stop the recursion. */
571 if (stop_recursion)
573 if (permutation)
575 VEC_safe_push (slp_tree, heap, *loads, *node);
576 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
579 return true;
582 /* Create SLP_TREE nodes for the definition node/s. */
583 if (first_stmt_dt0 == vect_loop_def)
585 slp_tree left_node = XNEW (struct _slp_tree);
586 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
587 SLP_TREE_VEC_STMTS (left_node) = NULL;
588 SLP_TREE_LEFT (left_node) = NULL;
589 SLP_TREE_RIGHT (left_node) = NULL;
590 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
591 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
592 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
593 inside_cost, outside_cost, ncopies_for_cost,
594 max_nunits, load_permutation, loads))
595 return false;
597 SLP_TREE_LEFT (*node) = left_node;
600 if (first_stmt_dt1 == vect_loop_def)
602 slp_tree right_node = XNEW (struct _slp_tree);
603 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
604 SLP_TREE_VEC_STMTS (right_node) = NULL;
605 SLP_TREE_LEFT (right_node) = NULL;
606 SLP_TREE_RIGHT (right_node) = NULL;
607 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
608 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
609 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
610 inside_cost, outside_cost, ncopies_for_cost,
611 max_nunits, load_permutation, loads))
612 return false;
614 SLP_TREE_RIGHT (*node) = right_node;
617 return true;
621 static void
622 vect_print_slp_tree (slp_tree node)
624 int i;
625 gimple stmt;
627 if (!node)
628 return;
630 fprintf (vect_dump, "node ");
631 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
633 fprintf (vect_dump, "\n\tstmt %d ", i);
634 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
636 fprintf (vect_dump, "\n");
638 vect_print_slp_tree (SLP_TREE_LEFT (node));
639 vect_print_slp_tree (SLP_TREE_RIGHT (node));
643 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
644 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
645 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
646 stmts in NODE are to be marked. */
648 static void
649 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
651 int i;
652 gimple stmt;
654 if (!node)
655 return;
657 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
658 if (j < 0 || i == j)
659 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
661 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
662 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
666 /* Check if the permutation required by the SLP INSTANCE is supported.
667 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
669 static bool
670 vect_supported_slp_permutation_p (slp_instance instance)
672 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
673 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
674 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
675 VEC (slp_tree, heap) *sorted_loads = NULL;
676 int index;
677 slp_tree *tmp_loads = NULL;
678 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
679 slp_tree load;
681 /* FORNOW: The only supported loads permutation is loads from the same
682 location in all the loads in the node, when the data-refs in
683 nodes of LOADS constitute an interleaving chain.
684 Sort the nodes according to the order of accesses in the chain. */
685 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
686 for (i = 0, j = 0;
687 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
688 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
689 i += group_size, j++)
691 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
692 /* Check that the loads are all in the same interleaving chain. */
693 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
695 if (vect_print_dump_info (REPORT_DETAILS))
697 fprintf (vect_dump, "Build SLP failed: unsupported data "
698 "permutation ");
699 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
702 free (tmp_loads);
703 return false;
706 tmp_loads[index] = load;
709 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
710 for (i = 0; i < group_size; i++)
711 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
713 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
714 SLP_INSTANCE_LOADS (instance) = sorted_loads;
715 free (tmp_loads);
717 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
718 SLP_INSTANCE_UNROLLING_FACTOR (instance),
719 instance, true))
720 return false;
722 return true;
726 /* Check if the required load permutation is supported.
727 LOAD_PERMUTATION contains a list of indices of the loads.
728 In SLP this permutation is relative to the order of strided stores that are
729 the base of the SLP instance. */
731 static bool
732 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
733 VEC (int, heap) *load_permutation)
735 int i = 0, j, prev = -1, next, k;
736 bool supported;
738 /* FORNOW: permutations are only supported for loop-aware SLP. */
739 if (!slp_instn)
740 return false;
742 if (vect_print_dump_info (REPORT_SLP))
744 fprintf (vect_dump, "Load permutation ");
745 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
746 fprintf (vect_dump, "%d ", next);
749 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
750 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
751 well. */
752 if (VEC_length (int, load_permutation)
753 != (unsigned int) (group_size * group_size))
754 return false;
756 supported = true;
757 for (j = 0; j < group_size; j++)
759 for (i = j * group_size, k = 0;
760 VEC_iterate (int, load_permutation, i, next) && k < group_size;
761 i++, k++)
763 if (i != j * group_size && next != prev)
765 supported = false;
766 break;
769 prev = next;
773 if (supported && i == group_size * group_size
774 && vect_supported_slp_permutation_p (slp_instn))
775 return true;
777 return false;
781 /* Find the first load in the loop that belongs to INSTANCE.
782 When loads are in several SLP nodes, there can be a case in which the first
783 load does not appear in the first SLP node to be transformed, causing
784 incorrect order of statements. Since we generate all the loads together,
785 they must be inserted before the first load of the SLP instance and not
786 before the first load of the first node of the instance. */
787 static gimple
788 vect_find_first_load_in_slp_instance (slp_instance instance)
790 int i, j;
791 slp_tree load_node;
792 gimple first_load = NULL, load;
794 for (i = 0;
795 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
796 i++)
797 for (j = 0;
798 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
799 j++)
800 first_load = get_earlier_stmt (load, first_load);
802 return first_load;
806 /* Analyze an SLP instance starting from a group of strided stores. Call
807 vect_build_slp_tree to build a tree of packed stmts if possible.
808 Return FALSE if it's impossible to SLP any stmt in the loop. */
810 static bool
811 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
813 slp_instance new_instance;
814 slp_tree node = XNEW (struct _slp_tree);
815 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
816 unsigned int unrolling_factor = 1, nunits;
817 tree vectype, scalar_type;
818 gimple next;
819 unsigned int vectorization_factor = 0, ncopies;
820 bool slp_impossible = false;
821 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
822 unsigned int max_nunits = 0;
823 VEC (int, heap) *load_permutation;
824 VEC (slp_tree, heap) *loads;
826 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
827 vinfo_for_stmt (stmt))));
828 vectype = get_vectype_for_scalar_type (scalar_type);
829 if (!vectype)
831 if (vect_print_dump_info (REPORT_SLP))
833 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
834 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
836 return false;
839 nunits = TYPE_VECTOR_SUBPARTS (vectype);
840 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
841 ncopies = vectorization_factor / nunits;
843 /* Create a node (a root of the SLP tree) for the packed strided stores. */
844 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
845 next = stmt;
846 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
847 while (next)
849 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
850 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
853 SLP_TREE_VEC_STMTS (node) = NULL;
854 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
855 SLP_TREE_LEFT (node) = NULL;
856 SLP_TREE_RIGHT (node) = NULL;
857 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
858 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
860 /* Calculate the unrolling factor. */
861 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
863 /* Calculate the number of vector stmts to create based on the unrolling
864 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
865 GROUP_SIZE / NUNITS otherwise. */
866 ncopies_for_cost = unrolling_factor * group_size / nunits;
868 load_permutation = VEC_alloc (int, heap, group_size * group_size);
869 loads = VEC_alloc (slp_tree, heap, group_size);
871 /* Build the tree for the SLP instance. */
872 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
873 &outside_cost, ncopies_for_cost, &max_nunits,
874 &load_permutation, &loads))
876 /* Create a new SLP instance. */
877 new_instance = XNEW (struct _slp_instance);
878 SLP_INSTANCE_TREE (new_instance) = node;
879 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
880 /* Calculate the unrolling factor based on the smallest type in the
881 loop. */
882 if (max_nunits > nunits)
883 unrolling_factor = least_common_multiple (max_nunits, group_size)
884 / group_size;
886 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
887 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
888 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
889 SLP_INSTANCE_LOADS (new_instance) = loads;
890 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
891 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
892 if (VEC_length (slp_tree, loads))
894 if (!vect_supported_load_permutation_p (new_instance, group_size,
895 load_permutation))
897 if (vect_print_dump_info (REPORT_SLP))
899 fprintf (vect_dump, "Build SLP failed: unsupported load "
900 "permutation ");
901 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
904 vect_free_slp_instance (new_instance);
905 return false;
908 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
909 = vect_find_first_load_in_slp_instance (new_instance);
911 else
912 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
914 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
915 new_instance);
916 if (vect_print_dump_info (REPORT_SLP))
917 vect_print_slp_tree (node);
919 return true;
922 /* Failed to SLP. */
923 /* Free the allocated memory. */
924 vect_free_slp_tree (node);
925 VEC_free (int, heap, load_permutation);
926 VEC_free (slp_tree, heap, loads);
928 if (slp_impossible)
929 return false;
931 /* SLP failed for this instance, but it is still possible to SLP other stmts
932 in the loop. */
933 return true;
937 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
938 trees of packed scalar stmts if SLP is possible. */
940 bool
941 vect_analyze_slp (loop_vec_info loop_vinfo)
943 unsigned int i;
944 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
945 gimple store;
947 if (vect_print_dump_info (REPORT_SLP))
948 fprintf (vect_dump, "=== vect_analyze_slp ===");
950 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
951 if (!vect_analyze_slp_instance (loop_vinfo, store))
953 /* SLP failed. No instance can be SLPed in the loop. */
954 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
955 fprintf (vect_dump, "SLP failed.");
957 return false;
960 return true;
964 /* For each possible SLP instance decide whether to SLP it and calculate overall
965 unrolling factor needed to SLP the loop. */
967 void
968 vect_make_slp_decision (loop_vec_info loop_vinfo)
970 unsigned int i, unrolling_factor = 1;
971 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
972 slp_instance instance;
973 int decided_to_slp = 0;
975 if (vect_print_dump_info (REPORT_SLP))
976 fprintf (vect_dump, "=== vect_make_slp_decision ===");
978 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
980 /* FORNOW: SLP if you can. */
981 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
982 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
984 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
985 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
986 loop-based vectorization. Such stmts will be marked as HYBRID. */
987 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
988 decided_to_slp++;
991 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
993 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
994 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
995 decided_to_slp, unrolling_factor);
999 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1000 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1002 static void
1003 vect_detect_hybrid_slp_stmts (slp_tree node)
1005 int i;
1006 gimple stmt;
1007 imm_use_iterator imm_iter;
1008 gimple use_stmt;
1010 if (!node)
1011 return;
1013 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1014 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1015 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1016 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1017 if (vinfo_for_stmt (use_stmt)
1018 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1019 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1020 vect_mark_slp_stmts (node, hybrid, i);
1022 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1023 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1027 /* Find stmts that must be both vectorized and SLPed. */
1029 void
1030 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1032 unsigned int i;
1033 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1034 slp_instance instance;
1036 if (vect_print_dump_info (REPORT_SLP))
1037 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1039 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1040 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1043 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1044 the number of created vector stmts depends on the unrolling factor). However,
1045 the actual number of vector stmts for every SLP node depends on VF which is
1046 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1047 In this function we assume that the inside costs calculated in
1048 vect_model_xxx_cost are linear in ncopies. */
1050 void
1051 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1053 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1054 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1055 slp_instance instance;
1057 if (vect_print_dump_info (REPORT_SLP))
1058 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1060 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1061 /* We assume that costs are linear in ncopies. */
1062 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1063 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1066 /* For constant and loop invariant defs of SLP_NODE this function returns
1067 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1068 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1069 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1071 static void
1072 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1073 unsigned int op_num, unsigned int number_of_vectors)
1075 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1076 gimple stmt = VEC_index (gimple, stmts, 0);
1077 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1078 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1079 int nunits;
1080 tree vec_cst;
1081 tree t = NULL_TREE;
1082 int j, number_of_places_left_in_vector;
1083 tree vector_type;
1084 tree op, vop;
1085 int group_size = VEC_length (gimple, stmts);
1086 unsigned int vec_num, i;
1087 int number_of_copies = 1;
1088 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1089 bool constant_p, is_store;
1091 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1093 is_store = true;
1094 op = gimple_assign_rhs1 (stmt);
1096 else
1098 is_store = false;
1099 op = gimple_op (stmt, op_num + 1);
1102 if (CONSTANT_CLASS_P (op))
1104 vector_type = vectype;
1105 constant_p = true;
1107 else
1109 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1110 gcc_assert (vector_type);
1111 constant_p = false;
1114 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1116 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1117 created vectors. It is greater than 1 if unrolling is performed.
1119 For example, we have two scalar operands, s1 and s2 (e.g., group of
1120 strided accesses of size two), while NUNITS is four (i.e., four scalars
1121 of this type can be packed in a vector). The output vector will contain
1122 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1123 will be 2).
1125 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1126 containing the operands.
1128 For example, NUNITS is four as before, and the group size is 8
1129 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1130 {s5, s6, s7, s8}. */
1132 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1134 number_of_places_left_in_vector = nunits;
1135 for (j = 0; j < number_of_copies; j++)
1137 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1139 if (is_store)
1140 op = gimple_assign_rhs1 (stmt);
1141 else
1142 op = gimple_op (stmt, op_num + 1);
1144 /* Create 'vect_ = {op0,op1,...,opn}'. */
1145 t = tree_cons (NULL_TREE, op, t);
1147 number_of_places_left_in_vector--;
1149 if (number_of_places_left_in_vector == 0)
1151 number_of_places_left_in_vector = nunits;
1153 if (constant_p)
1154 vec_cst = build_vector (vector_type, t);
1155 else
1156 vec_cst = build_constructor_from_list (vector_type, t);
1157 VEC_quick_push (tree, voprnds,
1158 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1159 t = NULL_TREE;
1164 /* Since the vectors are created in the reverse order, we should invert
1165 them. */
1166 vec_num = VEC_length (tree, voprnds);
1167 for (j = vec_num - 1; j >= 0; j--)
1169 vop = VEC_index (tree, voprnds, j);
1170 VEC_quick_push (tree, *vec_oprnds, vop);
1173 VEC_free (tree, heap, voprnds);
1175 /* In case that VF is greater than the unrolling factor needed for the SLP
1176 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1177 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1178 to replicate the vectors. */
1179 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1181 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1182 VEC_quick_push (tree, *vec_oprnds, vop);
1187 /* Get vectorized definitions from SLP_NODE that contains corresponding
1188 vectorized def-stmts. */
1190 static void
1191 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1193 tree vec_oprnd;
1194 gimple vec_def_stmt;
1195 unsigned int i;
1197 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1199 for (i = 0;
1200 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1201 i++)
1203 gcc_assert (vec_def_stmt);
1204 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1205 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1210 /* Get vectorized definitions for SLP_NODE.
1211 If the scalar definitions are loop invariants or constants, collect them and
1212 call vect_get_constant_vectors() to create vector stmts.
1213 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1214 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1215 vect_get_slp_vect_defs() to retrieve them.
1216 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1217 the right node. This is used when the second operand must remain scalar. */
1219 void
1220 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1221 VEC (tree,heap) **vec_oprnds1)
1223 gimple first_stmt;
1224 enum tree_code code;
1225 int number_of_vects;
1226 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1228 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1229 /* The number of vector defs is determined by the number of vector statements
1230 in the node from which we get those statements. */
1231 if (SLP_TREE_LEFT (slp_node))
1232 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1233 else
1235 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1236 /* Number of vector stmts was calculated according to LHS in
1237 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1238 necessary. See vect_get_smallest_scalar_type() for details. */
1239 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1240 &rhs_size_unit);
1241 if (rhs_size_unit != lhs_size_unit)
1243 number_of_vects *= rhs_size_unit;
1244 number_of_vects /= lhs_size_unit;
1248 /* Allocate memory for vectorized defs. */
1249 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1251 /* SLP_NODE corresponds either to a group of stores or to a group of
1252 unary/binary operations. We don't call this function for loads. */
1253 if (SLP_TREE_LEFT (slp_node))
1254 /* The defs are already vectorized. */
1255 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1256 else
1257 /* Build vectors from scalar defs. */
1258 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1260 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1261 /* Since we don't call this function with loads, this is a group of
1262 stores. */
1263 return;
1265 code = gimple_assign_rhs_code (first_stmt);
1266 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1267 return;
1269 /* The number of vector defs is determined by the number of vector statements
1270 in the node from which we get those statements. */
1271 if (SLP_TREE_RIGHT (slp_node))
1272 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1273 else
1274 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1276 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1278 if (SLP_TREE_RIGHT (slp_node))
1279 /* The defs are already vectorized. */
1280 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1281 else
1282 /* Build vectors from scalar defs. */
1283 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1286 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1287 building a vector of type MASK_TYPE from it) and two input vectors placed in
1288 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1289 shifting by STRIDE elements of DR_CHAIN for every copy.
1290 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1291 copies).
1292 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1293 the created stmts must be inserted. */
1295 static inline void
1296 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1297 int *mask_array, int mask_nunits,
1298 tree mask_element_type, tree mask_type,
1299 int first_vec_indx, int second_vec_indx,
1300 gimple_stmt_iterator *gsi, slp_tree node,
1301 tree builtin_decl, tree vectype,
1302 VEC(tree,heap) *dr_chain,
1303 int ncopies, int vect_stmts_counter)
1305 tree t = NULL_TREE, mask_vec, mask, perm_dest;
1306 gimple perm_stmt = NULL;
1307 stmt_vec_info next_stmt_info;
1308 int i, group_size, stride, dr_chain_size;
1309 tree first_vec, second_vec, data_ref;
1310 VEC (tree, heap) *params = NULL;
1312 /* Create a vector mask. */
1313 for (i = mask_nunits - 1; i >= 0; --i)
1314 t = tree_cons (NULL_TREE, build_int_cst (mask_element_type, mask_array[i]),
1316 mask_vec = build_vector (mask_type, t);
1317 mask = vect_init_vector (stmt, mask_vec, mask_type, NULL);
1319 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node));
1320 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1321 dr_chain_size = VEC_length (tree, dr_chain);
1323 /* Initialize the vect stmts of NODE to properly insert the generated
1324 stmts later. */
1325 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1326 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1327 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1329 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1330 for (i = 0; i < ncopies; i++)
1332 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1333 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1335 /* Build argument list for the vectorized call. */
1336 VEC_free (tree, heap, params);
1337 params = VEC_alloc (tree, heap, 3);
1338 VEC_quick_push (tree, params, first_vec);
1339 VEC_quick_push (tree, params, second_vec);
1340 VEC_quick_push (tree, params, mask);
1342 /* Generate the permute statement. */
1343 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1344 data_ref = make_ssa_name (perm_dest, perm_stmt);
1345 gimple_call_set_lhs (perm_stmt, data_ref);
1346 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1348 /* Store the vector statement in NODE. */
1349 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1350 stride * i + vect_stmts_counter, perm_stmt);
1352 first_vec_indx += stride;
1353 second_vec_indx += stride;
1356 /* Mark the scalar stmt as vectorized. */
1357 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1358 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1362 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1363 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1364 representation. Check that the mask is valid and return FALSE if not.
1365 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1366 the next vector, i.e., the current first vector is not needed. */
1368 static bool
1369 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1370 int mask_nunits, bool only_one_vec, int index,
1371 int *mask, int *current_mask_element,
1372 bool *need_next_vector)
1374 int i;
1375 static int number_of_mask_fixes = 1;
1376 static bool mask_fixed = false;
1377 static bool needs_first_vector = false;
1379 /* Convert to target specific representation. */
1380 *current_mask_element = first_mask_element + m;
1381 /* Adjust the value in case it's a mask for second and third vectors. */
1382 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1384 if (*current_mask_element < mask_nunits)
1385 needs_first_vector = true;
1387 /* We have only one input vector to permute but the mask accesses values in
1388 the next vector as well. */
1389 if (only_one_vec && *current_mask_element >= mask_nunits)
1391 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "permutation requires at least two vectors ");
1394 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1397 return false;
1400 /* The mask requires the next vector. */
1401 if (*current_mask_element >= mask_nunits * 2)
1403 if (needs_first_vector || mask_fixed)
1405 /* We either need the first vector too or have already moved to the
1406 next vector. In both cases, this permutation needs three
1407 vectors. */
1408 if (vect_print_dump_info (REPORT_DETAILS))
1410 fprintf (vect_dump, "permutation requires at "
1411 "least three vectors ");
1412 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1415 return false;
1418 /* We move to the next vector, dropping the first one and working with
1419 the second and the third - we need to adjust the values of the mask
1420 accordingly. */
1421 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1423 for (i = 0; i < index; i++)
1424 mask[i] -= mask_nunits * number_of_mask_fixes;
1426 (number_of_mask_fixes)++;
1427 mask_fixed = true;
1430 *need_next_vector = mask_fixed;
1432 /* This was the last element of this mask. Start a new one. */
1433 if (index == mask_nunits - 1)
1435 number_of_mask_fixes = 1;
1436 mask_fixed = false;
1437 needs_first_vector = false;
1440 return true;
1444 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1445 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1446 permute statements for SLP_NODE_INSTANCE. */
1447 bool
1448 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1449 gimple_stmt_iterator *gsi, int vf,
1450 slp_instance slp_node_instance, bool analyze_only)
1452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1453 tree mask_element_type = NULL_TREE, mask_type;
1454 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1455 slp_tree node;
1456 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1457 gimple next_scalar_stmt;
1458 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1459 int first_mask_element;
1460 int index, unroll_factor, *mask, current_mask_element, ncopies;
1461 bool only_one_vec = false, need_next_vector = false;
1462 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1464 if (!targetm.vectorize.builtin_vec_perm)
1466 if (vect_print_dump_info (REPORT_DETAILS))
1468 fprintf (vect_dump, "no builtin for vect permute for ");
1469 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1472 return false;
1475 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1476 &mask_element_type);
1477 if (!builtin_decl || !mask_element_type)
1479 if (vect_print_dump_info (REPORT_DETAILS))
1481 fprintf (vect_dump, "no builtin for vect permute for ");
1482 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1485 return false;
1488 mask_type = get_vectype_for_scalar_type (mask_element_type);
1489 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1490 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1491 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1492 scale = mask_nunits / nunits;
1493 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1495 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1496 unrolling factor. */
1497 orig_vec_stmts_num = group_size *
1498 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1499 if (orig_vec_stmts_num == 1)
1500 only_one_vec = true;
1502 /* Number of copies is determined by the final vectorization factor
1503 relatively to SLP_NODE_INSTANCE unrolling factor. */
1504 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1506 /* Generate permutation masks for every NODE. Number of masks for each NODE
1507 is equal to GROUP_SIZE.
1508 E.g., we have a group of three nodes with three loads from the same
1509 location in each node, and the vector size is 4. I.e., we have a
1510 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1511 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1512 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1515 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1516 scpecific type, e.g., in bytes for Altivec.
1517 The last mask is illegal since we assume two operands for permute
1518 operation, and the mask element values can't be outside that range. Hence,
1519 the last mask must be converted into {2,5,5,5}.
1520 For the first two permutations we need the first and the second input
1521 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1522 we need the second and the third vectors: {b1,c1,a2,b2} and
1523 {c2,a3,b3,c3}. */
1525 for (i = 0;
1526 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1527 i, node);
1528 i++)
1530 scalar_index = 0;
1531 index = 0;
1532 vect_stmts_counter = 0;
1533 vec_index = 0;
1534 first_vec_index = vec_index++;
1535 if (only_one_vec)
1536 second_vec_index = first_vec_index;
1537 else
1538 second_vec_index = vec_index++;
1540 for (j = 0; j < unroll_factor; j++)
1542 for (k = 0; k < group_size; k++)
1544 first_mask_element = (i + j * group_size) * scale;
1545 for (m = 0; m < scale; m++)
1547 if (!vect_get_mask_element (stmt, first_mask_element, m,
1548 mask_nunits, only_one_vec, index, mask,
1549 &current_mask_element, &need_next_vector))
1550 return false;
1552 mask[index++] = current_mask_element;
1555 if (index == mask_nunits)
1557 index = 0;
1558 if (!analyze_only)
1560 if (need_next_vector)
1562 first_vec_index = second_vec_index;
1563 second_vec_index = vec_index;
1566 next_scalar_stmt = VEC_index (gimple,
1567 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1569 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1570 mask, mask_nunits, mask_element_type, mask_type,
1571 first_vec_index, second_vec_index, gsi, node,
1572 builtin_decl, vectype, dr_chain, ncopies,
1573 vect_stmts_counter++);
1580 free (mask);
1581 return true;
1586 /* Vectorize SLP instance tree in postorder. */
1588 static bool
1589 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1590 unsigned int vectorization_factor)
1592 gimple stmt;
1593 bool strided_store, is_store;
1594 gimple_stmt_iterator si;
1595 stmt_vec_info stmt_info;
1596 unsigned int vec_stmts_size, nunits, group_size;
1597 tree vectype;
1598 int i;
1599 slp_tree loads_node;
1601 if (!node)
1602 return false;
1604 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1605 vectorization_factor);
1606 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1607 vectorization_factor);
1609 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1610 stmt_info = vinfo_for_stmt (stmt);
1612 /* VECTYPE is the type of the destination. */
1613 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1614 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1615 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1617 /* For each SLP instance calculate number of vector stmts to be created
1618 for the scalar stmts in each node of the SLP tree. Number of vector
1619 elements in one vector iteration is the number of scalar elements in
1620 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1621 size. */
1622 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1624 /* In case of load permutation we have to allocate vectorized statements for
1625 all the nodes that participate in that permutation. */
1626 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1628 for (i = 0;
1629 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1630 i++)
1632 if (!SLP_TREE_VEC_STMTS (loads_node))
1634 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1635 vec_stmts_size);
1636 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1641 if (!SLP_TREE_VEC_STMTS (node))
1643 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
1644 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
1647 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
1650 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1653 /* Loads should be inserted before the first load. */
1654 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
1655 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1656 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1657 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1658 else
1659 si = gsi_for_stmt (stmt);
1661 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
1662 if (is_store)
1664 if (DR_GROUP_FIRST_DR (stmt_info))
1665 /* If IS_STORE is TRUE, the vectorization of the
1666 interleaving chain was completed - free all the stores in
1667 the chain. */
1668 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
1669 else
1670 /* FORNOW: SLP originates only from strided stores. */
1671 gcc_unreachable ();
1673 return true;
1676 /* FORNOW: SLP originates only from strided stores. */
1677 return false;
1681 bool
1682 vect_schedule_slp (loop_vec_info loop_vinfo)
1684 VEC (slp_instance, heap) *slp_instances =
1685 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1686 slp_instance instance;
1687 unsigned int i;
1688 bool is_store = false;
1690 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1692 /* Schedule the tree of INSTANCE. */
1693 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
1694 instance, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1696 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
1697 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1698 fprintf (vect_dump, "vectorizing stmts using SLP.");
1701 return is_store;