* loop-iv.c (replace_single_def_regs, replace_in_expr): New static
[official-gcc/alias-decl.git] / gcc / tree-vect-slp.c
blobfe01a76beea621e4069dda94c00a59dbddec44fc
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
41 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
43 static void
44 vect_free_slp_tree (slp_tree node)
46 if (!node)
47 return;
49 if (SLP_TREE_LEFT (node))
50 vect_free_slp_tree (SLP_TREE_LEFT (node));
52 if (SLP_TREE_RIGHT (node))
53 vect_free_slp_tree (SLP_TREE_RIGHT (node));
55 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
57 if (SLP_TREE_VEC_STMTS (node))
58 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
60 free (node);
64 /* Free the memory allocated for the SLP instance. */
66 void
67 vect_free_slp_instance (slp_instance instance)
69 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
70 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
71 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
75 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
76 they are of a legal type and that they match the defs of the first stmt of
77 the SLP group (stored in FIRST_STMT_...). */
79 static bool
80 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
81 gimple stmt, VEC (gimple, heap) **def_stmts0,
82 VEC (gimple, heap) **def_stmts1,
83 enum vect_def_type *first_stmt_dt0,
84 enum vect_def_type *first_stmt_dt1,
85 tree *first_stmt_def0_type,
86 tree *first_stmt_def1_type,
87 tree *first_stmt_const_oprnd,
88 int ncopies_for_cost,
89 bool *pattern0, bool *pattern1)
91 tree oprnd;
92 unsigned int i, number_of_oprnds;
93 tree def;
94 gimple def_stmt;
95 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
96 stmt_vec_info stmt_info =
97 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
98 enum gimple_rhs_class rhs_class;
99 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
101 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
102 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
104 for (i = 0; i < number_of_oprnds; i++)
106 oprnd = gimple_op (stmt, i + 1);
108 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
109 || (!def_stmt && dt[i] != vect_constant_def))
111 if (vect_print_dump_info (REPORT_SLP))
113 fprintf (vect_dump, "Build SLP failed: can't find def for ");
114 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
117 return false;
120 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
121 the pattern. Check that all the stmts of the node are in the
122 pattern. */
123 if (def_stmt && gimple_bb (def_stmt)
124 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
125 && vinfo_for_stmt (def_stmt)
126 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
128 if (!*first_stmt_dt0)
129 *pattern0 = true;
130 else
132 if (i == 1 && !*first_stmt_dt1)
133 *pattern1 = true;
134 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
136 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "Build SLP failed: some of the stmts"
139 " are in a pattern, and others are not ");
140 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 return false;
147 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
148 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
150 if (*dt == vect_unknown_def_type)
152 if (vect_print_dump_info (REPORT_DETAILS))
153 fprintf (vect_dump, "Unsupported pattern.");
154 return false;
157 switch (gimple_code (def_stmt))
159 case GIMPLE_PHI:
160 def = gimple_phi_result (def_stmt);
161 break;
163 case GIMPLE_ASSIGN:
164 def = gimple_assign_lhs (def_stmt);
165 break;
167 default:
168 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "unsupported defining stmt: ");
170 return false;
174 if (!*first_stmt_dt0)
176 /* op0 of the first stmt of the group - store its info. */
177 *first_stmt_dt0 = dt[i];
178 if (def)
179 *first_stmt_def0_type = TREE_TYPE (def);
180 else
181 *first_stmt_const_oprnd = oprnd;
183 /* Analyze costs (for the first stmt of the group only). */
184 if (rhs_class != GIMPLE_SINGLE_RHS)
185 /* Not memory operation (we don't call this functions for loads). */
186 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
187 else
188 /* Store. */
189 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
192 else
194 if (!*first_stmt_dt1 && i == 1)
196 /* op1 of the first stmt of the group - store its info. */
197 *first_stmt_dt1 = dt[i];
198 if (def)
199 *first_stmt_def1_type = TREE_TYPE (def);
200 else
202 /* We assume that the stmt contains only one constant
203 operand. We fail otherwise, to be on the safe side. */
204 if (*first_stmt_const_oprnd)
206 if (vect_print_dump_info (REPORT_SLP))
207 fprintf (vect_dump, "Build SLP failed: two constant "
208 "oprnds in stmt");
209 return false;
211 *first_stmt_const_oprnd = oprnd;
214 else
216 /* Not first stmt of the group, check that the def-stmt/s match
217 the def-stmt/s of the first stmt. */
218 if ((i == 0
219 && (*first_stmt_dt0 != dt[i]
220 || (*first_stmt_def0_type && def
221 && *first_stmt_def0_type != TREE_TYPE (def))))
222 || (i == 1
223 && (*first_stmt_dt1 != dt[i]
224 || (*first_stmt_def1_type && def
225 && *first_stmt_def1_type != TREE_TYPE (def))))
226 || (!def
227 && TREE_TYPE (*first_stmt_const_oprnd)
228 != TREE_TYPE (oprnd)))
230 if (vect_print_dump_info (REPORT_SLP))
231 fprintf (vect_dump, "Build SLP failed: different types ");
233 return false;
238 /* Check the types of the definitions. */
239 switch (dt[i])
241 case vect_constant_def:
242 case vect_invariant_def:
243 break;
245 case vect_loop_def:
246 if (i == 0)
247 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
248 else
249 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
250 break;
252 default:
253 /* FORNOW: Not supported. */
254 if (vect_print_dump_info (REPORT_SLP))
256 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
257 print_generic_expr (vect_dump, def, TDF_SLIM);
260 return false;
264 return true;
268 /* Recursively build an SLP tree starting from NODE.
269 Fail (and return FALSE) if def-stmts are not isomorphic, require data
270 permutation or are of unsupported types of operation. Otherwise, return
271 TRUE. */
273 static bool
274 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
275 unsigned int group_size,
276 int *inside_cost, int *outside_cost,
277 int ncopies_for_cost, unsigned int *max_nunits,
278 VEC (int, heap) **load_permutation,
279 VEC (slp_tree, heap) **loads)
281 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
282 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
283 unsigned int i;
284 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
285 gimple stmt = VEC_index (gimple, stmts, 0);
286 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
287 enum tree_code first_stmt_code = 0, rhs_code;
288 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
289 tree lhs;
290 bool stop_recursion = false, need_same_oprnds = false;
291 tree vectype, scalar_type, first_op1 = NULL_TREE;
292 unsigned int vectorization_factor = 0, ncopies;
293 optab optab;
294 int icode;
295 enum machine_mode optab_op2_mode;
296 enum machine_mode vec_mode;
297 tree first_stmt_const_oprnd = NULL_TREE;
298 struct data_reference *first_dr;
299 bool pattern0 = false, pattern1 = false;
300 HOST_WIDE_INT dummy;
301 bool permutation = false;
302 unsigned int load_place;
303 gimple first_load;
305 /* For every stmt in NODE find its def stmt/s. */
306 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
308 if (vect_print_dump_info (REPORT_SLP))
310 fprintf (vect_dump, "Build SLP for ");
311 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
314 lhs = gimple_get_lhs (stmt);
315 if (lhs == NULL_TREE)
317 if (vect_print_dump_info (REPORT_SLP))
319 fprintf (vect_dump,
320 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
321 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
324 return false;
327 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
328 vectype = get_vectype_for_scalar_type (scalar_type);
329 if (!vectype)
331 if (vect_print_dump_info (REPORT_SLP))
333 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
334 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
336 return false;
339 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
340 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
341 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
342 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
343 fprintf (vect_dump, "SLP with multiple types ");
345 /* In case of multiple types we need to detect the smallest type. */
346 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
347 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
349 if (is_gimple_call (stmt))
350 rhs_code = CALL_EXPR;
351 else
352 rhs_code = gimple_assign_rhs_code (stmt);
354 /* Check the operation. */
355 if (i == 0)
357 first_stmt_code = rhs_code;
359 /* Shift arguments should be equal in all the packed stmts for a
360 vector shift with scalar shift operand. */
361 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
362 || rhs_code == LROTATE_EXPR
363 || rhs_code == RROTATE_EXPR)
365 vec_mode = TYPE_MODE (vectype);
367 /* First see if we have a vector/vector shift. */
368 optab = optab_for_tree_code (rhs_code, vectype,
369 optab_vector);
371 if (!optab
372 || (optab->handlers[(int) vec_mode].insn_code
373 == CODE_FOR_nothing))
375 /* No vector/vector shift, try for a vector/scalar shift. */
376 optab = optab_for_tree_code (rhs_code, vectype,
377 optab_scalar);
379 if (!optab)
381 if (vect_print_dump_info (REPORT_SLP))
382 fprintf (vect_dump, "Build SLP failed: no optab.");
383 return false;
385 icode = (int) optab->handlers[(int) vec_mode].insn_code;
386 if (icode == CODE_FOR_nothing)
388 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP failed: "
390 "op not supported by target.");
391 return false;
393 optab_op2_mode = insn_data[icode].operand[2].mode;
394 if (!VECTOR_MODE_P (optab_op2_mode))
396 need_same_oprnds = true;
397 first_op1 = gimple_assign_rhs2 (stmt);
402 else
404 if (first_stmt_code != rhs_code
405 && (first_stmt_code != IMAGPART_EXPR
406 || rhs_code != REALPART_EXPR)
407 && (first_stmt_code != REALPART_EXPR
408 || rhs_code != IMAGPART_EXPR))
410 if (vect_print_dump_info (REPORT_SLP))
412 fprintf (vect_dump,
413 "Build SLP failed: different operation in stmt ");
414 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
417 return false;
420 if (need_same_oprnds
421 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
423 if (vect_print_dump_info (REPORT_SLP))
425 fprintf (vect_dump,
426 "Build SLP failed: different shift arguments in ");
427 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
430 return false;
434 /* Strided store or load. */
435 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
437 if (REFERENCE_CLASS_P (lhs))
439 /* Store. */
440 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
441 &def_stmts0, &def_stmts1,
442 &first_stmt_dt0,
443 &first_stmt_dt1,
444 &first_stmt_def0_type,
445 &first_stmt_def1_type,
446 &first_stmt_const_oprnd,
447 ncopies_for_cost,
448 &pattern0, &pattern1))
449 return false;
451 else
453 /* Load. */
454 /* FORNOW: Check that there is no gap between the loads. */
455 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
456 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
457 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
458 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
460 if (vect_print_dump_info (REPORT_SLP))
462 fprintf (vect_dump, "Build SLP failed: strided "
463 "loads have gaps ");
464 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
467 return false;
470 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
472 if (first_load == stmt)
474 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
475 if (vect_supportable_dr_alignment (first_dr)
476 == dr_unaligned_unsupported)
478 if (vect_print_dump_info (REPORT_SLP))
480 fprintf (vect_dump, "Build SLP failed: unsupported "
481 "unaligned load ");
482 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
485 return false;
488 /* Analyze costs (for the first stmt in the group). */
489 vect_model_load_cost (vinfo_for_stmt (stmt),
490 ncopies_for_cost, *node);
493 /* Store the place of this load in the interleaving chain. In
494 case that permutation is needed we later decide if a specific
495 permutation is supported. */
496 load_place = vect_get_place_in_interleaving_chain (stmt,
497 first_load);
498 if (load_place != i)
499 permutation = true;
501 VEC_safe_push (int, heap, *load_permutation, load_place);
503 /* We stop the tree when we reach a group of loads. */
504 stop_recursion = true;
505 continue;
507 } /* Strided access. */
508 else
510 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
512 /* Not strided load. */
513 if (vect_print_dump_info (REPORT_SLP))
515 fprintf (vect_dump, "Build SLP failed: not strided load ");
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
519 /* FORNOW: Not strided loads are not supported. */
520 return false;
523 /* Not memory operation. */
524 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
525 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
527 if (vect_print_dump_info (REPORT_SLP))
529 fprintf (vect_dump, "Build SLP failed: operation");
530 fprintf (vect_dump, " unsupported ");
531 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
534 return false;
537 /* Find the def-stmts. */
538 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
539 &def_stmts0, &def_stmts1,
540 &first_stmt_dt0, &first_stmt_dt1,
541 &first_stmt_def0_type,
542 &first_stmt_def1_type,
543 &first_stmt_const_oprnd,
544 ncopies_for_cost,
545 &pattern0, &pattern1))
546 return false;
550 /* Add the costs of the node to the overall instance costs. */
551 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
552 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
554 /* Strided loads were reached - stop the recursion. */
555 if (stop_recursion)
557 if (permutation)
559 VEC_safe_push (slp_tree, heap, *loads, *node);
560 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
563 return true;
566 /* Create SLP_TREE nodes for the definition node/s. */
567 if (first_stmt_dt0 == vect_loop_def)
569 slp_tree left_node = XNEW (struct _slp_tree);
570 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
571 SLP_TREE_VEC_STMTS (left_node) = NULL;
572 SLP_TREE_LEFT (left_node) = NULL;
573 SLP_TREE_RIGHT (left_node) = NULL;
574 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
575 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
576 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
577 inside_cost, outside_cost, ncopies_for_cost,
578 max_nunits, load_permutation, loads))
579 return false;
581 SLP_TREE_LEFT (*node) = left_node;
584 if (first_stmt_dt1 == vect_loop_def)
586 slp_tree right_node = XNEW (struct _slp_tree);
587 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
588 SLP_TREE_VEC_STMTS (right_node) = NULL;
589 SLP_TREE_LEFT (right_node) = NULL;
590 SLP_TREE_RIGHT (right_node) = NULL;
591 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
592 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
593 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
594 inside_cost, outside_cost, ncopies_for_cost,
595 max_nunits, load_permutation, loads))
596 return false;
598 SLP_TREE_RIGHT (*node) = right_node;
601 return true;
605 static void
606 vect_print_slp_tree (slp_tree node)
608 int i;
609 gimple stmt;
611 if (!node)
612 return;
614 fprintf (vect_dump, "node ");
615 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
617 fprintf (vect_dump, "\n\tstmt %d ", i);
618 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
620 fprintf (vect_dump, "\n");
622 vect_print_slp_tree (SLP_TREE_LEFT (node));
623 vect_print_slp_tree (SLP_TREE_RIGHT (node));
627 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
628 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
629 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
630 stmts in NODE are to be marked. */
632 static void
633 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
635 int i;
636 gimple stmt;
638 if (!node)
639 return;
641 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
642 if (j < 0 || i == j)
643 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
645 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
646 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
650 /* Check if the permutation required by the SLP INSTANCE is supported.
651 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
653 static bool
654 vect_supported_slp_permutation_p (slp_instance instance)
656 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
657 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
658 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
659 VEC (slp_tree, heap) *sorted_loads = NULL;
660 int index;
661 slp_tree *tmp_loads = NULL;
662 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
663 slp_tree load;
665 /* FORNOW: The only supported loads permutation is loads from the same
666 location in all the loads in the node, when the data-refs in
667 nodes of LOADS constitute an interleaving chain.
668 Sort the nodes according to the order of accesses in the chain. */
669 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
670 for (i = 0, j = 0;
671 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
672 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
673 i += group_size, j++)
675 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
676 /* Check that the loads are all in the same interleaving chain. */
677 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
679 if (vect_print_dump_info (REPORT_DETAILS))
681 fprintf (vect_dump, "Build SLP failed: unsupported data "
682 "permutation ");
683 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
686 free (tmp_loads);
687 return false;
690 tmp_loads[index] = load;
693 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
694 for (i = 0; i < group_size; i++)
695 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
697 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
698 SLP_INSTANCE_LOADS (instance) = sorted_loads;
699 free (tmp_loads);
701 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
702 SLP_INSTANCE_UNROLLING_FACTOR (instance),
703 instance, true))
704 return false;
706 return true;
710 /* Check if the required load permutation is supported.
711 LOAD_PERMUTATION contains a list of indices of the loads.
712 In SLP this permutation is relative to the order of strided stores that are
713 the base of the SLP instance. */
715 static bool
716 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
717 VEC (int, heap) *load_permutation)
719 int i = 0, j, prev = -1, next, k;
720 bool supported;
722 /* FORNOW: permutations are only supported for loop-aware SLP. */
723 if (!slp_instn)
724 return false;
726 if (vect_print_dump_info (REPORT_SLP))
728 fprintf (vect_dump, "Load permutation ");
729 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
730 fprintf (vect_dump, "%d ", next);
733 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
734 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
735 well. */
736 if (VEC_length (int, load_permutation)
737 != (unsigned int) (group_size * group_size))
738 return false;
740 supported = true;
741 for (j = 0; j < group_size; j++)
743 for (i = j * group_size, k = 0;
744 VEC_iterate (int, load_permutation, i, next) && k < group_size;
745 i++, k++)
747 if (i != j * group_size && next != prev)
749 supported = false;
750 break;
753 prev = next;
757 if (supported && i == group_size * group_size
758 && vect_supported_slp_permutation_p (slp_instn))
759 return true;
761 return false;
765 /* Find the first load in the loop that belongs to INSTANCE.
766 When loads are in several SLP nodes, there can be a case in which the first
767 load does not appear in the first SLP node to be transformed, causing
768 incorrect order of statements. Since we generate all the loads together,
769 they must be inserted before the first load of the SLP instance and not
770 before the first load of the first node of the instance. */
771 static gimple
772 vect_find_first_load_in_slp_instance (slp_instance instance)
774 int i, j;
775 slp_tree load_node;
776 gimple first_load = NULL, load;
778 for (i = 0;
779 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
780 i++)
781 for (j = 0;
782 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
783 j++)
784 first_load = get_earlier_stmt (load, first_load);
786 return first_load;
790 /* Analyze an SLP instance starting from a group of strided stores. Call
791 vect_build_slp_tree to build a tree of packed stmts if possible.
792 Return FALSE if it's impossible to SLP any stmt in the loop. */
794 static bool
795 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
797 slp_instance new_instance;
798 slp_tree node = XNEW (struct _slp_tree);
799 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
800 unsigned int unrolling_factor = 1, nunits;
801 tree vectype, scalar_type;
802 gimple next;
803 unsigned int vectorization_factor = 0, ncopies;
804 bool slp_impossible = false;
805 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
806 unsigned int max_nunits = 0;
807 VEC (int, heap) *load_permutation;
808 VEC (slp_tree, heap) *loads;
810 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
811 vinfo_for_stmt (stmt))));
812 vectype = get_vectype_for_scalar_type (scalar_type);
813 if (!vectype)
815 if (vect_print_dump_info (REPORT_SLP))
817 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
818 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
820 return false;
823 nunits = TYPE_VECTOR_SUBPARTS (vectype);
824 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
825 ncopies = vectorization_factor / nunits;
827 /* Create a node (a root of the SLP tree) for the packed strided stores. */
828 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
829 next = stmt;
830 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
831 while (next)
833 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
834 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
837 SLP_TREE_VEC_STMTS (node) = NULL;
838 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
839 SLP_TREE_LEFT (node) = NULL;
840 SLP_TREE_RIGHT (node) = NULL;
841 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
842 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
844 /* Calculate the unrolling factor. */
845 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
847 /* Calculate the number of vector stmts to create based on the unrolling
848 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
849 GROUP_SIZE / NUNITS otherwise. */
850 ncopies_for_cost = unrolling_factor * group_size / nunits;
852 load_permutation = VEC_alloc (int, heap, group_size * group_size);
853 loads = VEC_alloc (slp_tree, heap, group_size);
855 /* Build the tree for the SLP instance. */
856 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
857 &outside_cost, ncopies_for_cost, &max_nunits,
858 &load_permutation, &loads))
860 /* Create a new SLP instance. */
861 new_instance = XNEW (struct _slp_instance);
862 SLP_INSTANCE_TREE (new_instance) = node;
863 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
864 /* Calculate the unrolling factor based on the smallest type in the
865 loop. */
866 if (max_nunits > nunits)
867 unrolling_factor = least_common_multiple (max_nunits, group_size)
868 / group_size;
870 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
871 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
872 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
873 SLP_INSTANCE_LOADS (new_instance) = loads;
874 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
875 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
876 if (VEC_length (slp_tree, loads))
878 if (!vect_supported_load_permutation_p (new_instance, group_size,
879 load_permutation))
881 if (vect_print_dump_info (REPORT_SLP))
883 fprintf (vect_dump, "Build SLP failed: unsupported load "
884 "permutation ");
885 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
888 vect_free_slp_instance (new_instance);
889 return false;
892 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
893 = vect_find_first_load_in_slp_instance (new_instance);
895 else
896 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
898 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
899 new_instance);
900 if (vect_print_dump_info (REPORT_SLP))
901 vect_print_slp_tree (node);
903 return true;
906 /* Failed to SLP. */
907 /* Free the allocated memory. */
908 vect_free_slp_tree (node);
909 VEC_free (int, heap, load_permutation);
910 VEC_free (slp_tree, heap, loads);
912 if (slp_impossible)
913 return false;
915 /* SLP failed for this instance, but it is still possible to SLP other stmts
916 in the loop. */
917 return true;
921 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
922 trees of packed scalar stmts if SLP is possible. */
924 bool
925 vect_analyze_slp (loop_vec_info loop_vinfo)
927 unsigned int i;
928 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
929 gimple store;
931 if (vect_print_dump_info (REPORT_SLP))
932 fprintf (vect_dump, "=== vect_analyze_slp ===");
934 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
935 if (!vect_analyze_slp_instance (loop_vinfo, store))
937 /* SLP failed. No instance can be SLPed in the loop. */
938 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
939 fprintf (vect_dump, "SLP failed.");
941 return false;
944 return true;
948 /* For each possible SLP instance decide whether to SLP it and calculate overall
949 unrolling factor needed to SLP the loop. */
951 void
952 vect_make_slp_decision (loop_vec_info loop_vinfo)
954 unsigned int i, unrolling_factor = 1;
955 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
956 slp_instance instance;
957 int decided_to_slp = 0;
959 if (vect_print_dump_info (REPORT_SLP))
960 fprintf (vect_dump, "=== vect_make_slp_decision ===");
962 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
964 /* FORNOW: SLP if you can. */
965 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
966 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
968 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
969 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
970 loop-based vectorization. Such stmts will be marked as HYBRID. */
971 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
972 decided_to_slp++;
975 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
977 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
978 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
979 decided_to_slp, unrolling_factor);
983 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
984 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
986 static void
987 vect_detect_hybrid_slp_stmts (slp_tree node)
989 int i;
990 gimple stmt;
991 imm_use_iterator imm_iter;
992 gimple use_stmt;
994 if (!node)
995 return;
997 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
998 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
999 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1000 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1001 if (vinfo_for_stmt (use_stmt)
1002 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
1003 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
1004 vect_mark_slp_stmts (node, hybrid, i);
1006 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1007 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1011 /* Find stmts that must be both vectorized and SLPed. */
1013 void
1014 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1016 unsigned int i;
1017 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1018 slp_instance instance;
1020 if (vect_print_dump_info (REPORT_SLP))
1021 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1023 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1024 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1027 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1028 the number of created vector stmts depends on the unrolling factor). However,
1029 the actual number of vector stmts for every SLP node depends on VF which is
1030 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1031 In this function we assume that the inside costs calculated in
1032 vect_model_xxx_cost are linear in ncopies. */
1034 void
1035 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1037 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1038 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1039 slp_instance instance;
1041 if (vect_print_dump_info (REPORT_SLP))
1042 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1044 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1045 /* We assume that costs are linear in ncopies. */
1046 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1047 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1050 /* For constant and loop invariant defs of SLP_NODE this function returns
1051 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1052 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1053 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1055 static void
1056 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1057 unsigned int op_num, unsigned int number_of_vectors)
1059 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1060 gimple stmt = VEC_index (gimple, stmts, 0);
1061 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1063 int nunits;
1064 tree vec_cst;
1065 tree t = NULL_TREE;
1066 int j, number_of_places_left_in_vector;
1067 tree vector_type;
1068 tree op, vop;
1069 int group_size = VEC_length (gimple, stmts);
1070 unsigned int vec_num, i;
1071 int number_of_copies = 1;
1072 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1073 bool constant_p, is_store;
1075 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1077 is_store = true;
1078 op = gimple_assign_rhs1 (stmt);
1080 else
1082 is_store = false;
1083 op = gimple_op (stmt, op_num + 1);
1086 if (CONSTANT_CLASS_P (op))
1088 vector_type = vectype;
1089 constant_p = true;
1091 else
1093 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1094 gcc_assert (vector_type);
1095 constant_p = false;
1098 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1100 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1101 created vectors. It is greater than 1 if unrolling is performed.
1103 For example, we have two scalar operands, s1 and s2 (e.g., group of
1104 strided accesses of size two), while NUNITS is four (i.e., four scalars
1105 of this type can be packed in a vector). The output vector will contain
1106 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1107 will be 2).
1109 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1110 containing the operands.
1112 For example, NUNITS is four as before, and the group size is 8
1113 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1114 {s5, s6, s7, s8}. */
1116 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1118 number_of_places_left_in_vector = nunits;
1119 for (j = 0; j < number_of_copies; j++)
1121 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1123 if (is_store)
1124 op = gimple_assign_rhs1 (stmt);
1125 else
1126 op = gimple_op (stmt, op_num + 1);
1128 /* Create 'vect_ = {op0,op1,...,opn}'. */
1129 t = tree_cons (NULL_TREE, op, t);
1131 number_of_places_left_in_vector--;
1133 if (number_of_places_left_in_vector == 0)
1135 number_of_places_left_in_vector = nunits;
1137 if (constant_p)
1138 vec_cst = build_vector (vector_type, t);
1139 else
1140 vec_cst = build_constructor_from_list (vector_type, t);
1141 VEC_quick_push (tree, voprnds,
1142 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1143 t = NULL_TREE;
1148 /* Since the vectors are created in the reverse order, we should invert
1149 them. */
1150 vec_num = VEC_length (tree, voprnds);
1151 for (j = vec_num - 1; j >= 0; j--)
1153 vop = VEC_index (tree, voprnds, j);
1154 VEC_quick_push (tree, *vec_oprnds, vop);
1157 VEC_free (tree, heap, voprnds);
1159 /* In case that VF is greater than the unrolling factor needed for the SLP
1160 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1161 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1162 to replicate the vectors. */
1163 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1165 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1166 VEC_quick_push (tree, *vec_oprnds, vop);
1171 /* Get vectorized definitions from SLP_NODE that contains corresponding
1172 vectorized def-stmts. */
1174 static void
1175 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1177 tree vec_oprnd;
1178 gimple vec_def_stmt;
1179 unsigned int i;
1181 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1183 for (i = 0;
1184 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1185 i++)
1187 gcc_assert (vec_def_stmt);
1188 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1189 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1194 /* Get vectorized definitions for SLP_NODE.
1195 If the scalar definitions are loop invariants or constants, collect them and
1196 call vect_get_constant_vectors() to create vector stmts.
1197 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1198 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1199 vect_get_slp_vect_defs() to retrieve them.
1200 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1201 the right node. This is used when the second operand must remain scalar. */
1203 void
1204 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1205 VEC (tree,heap) **vec_oprnds1)
1207 gimple first_stmt;
1208 enum tree_code code;
1209 int number_of_vects;
1210 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1212 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1213 /* The number of vector defs is determined by the number of vector statements
1214 in the node from which we get those statements. */
1215 if (SLP_TREE_LEFT (slp_node))
1216 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1217 else
1219 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1220 /* Number of vector stmts was calculated according to LHS in
1221 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1222 necessary. See vect_get_smallest_scalar_type() for details. */
1223 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1224 &rhs_size_unit);
1225 if (rhs_size_unit != lhs_size_unit)
1227 number_of_vects *= rhs_size_unit;
1228 number_of_vects /= lhs_size_unit;
1232 /* Allocate memory for vectorized defs. */
1233 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1235 /* SLP_NODE corresponds either to a group of stores or to a group of
1236 unary/binary operations. We don't call this function for loads. */
1237 if (SLP_TREE_LEFT (slp_node))
1238 /* The defs are already vectorized. */
1239 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1240 else
1241 /* Build vectors from scalar defs. */
1242 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1244 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1245 /* Since we don't call this function with loads, this is a group of
1246 stores. */
1247 return;
1249 code = gimple_assign_rhs_code (first_stmt);
1250 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1251 return;
1253 /* The number of vector defs is determined by the number of vector statements
1254 in the node from which we get those statements. */
1255 if (SLP_TREE_RIGHT (slp_node))
1256 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1257 else
1258 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1260 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1262 if (SLP_TREE_RIGHT (slp_node))
1263 /* The defs are already vectorized. */
1264 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1265 else
1266 /* Build vectors from scalar defs. */
1267 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1270 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1271 building a vector of type MASK_TYPE from it) and two input vectors placed in
1272 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1273 shifting by STRIDE elements of DR_CHAIN for every copy.
1274 (STRIDE is the number of vectorized stmts for NODE divided by the number of
1275 copies).
1276 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1277 the created stmts must be inserted. */
1279 static inline void
1280 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1281 int *mask_array, int mask_nunits,
1282 tree mask_element_type, tree mask_type,
1283 int first_vec_indx, int second_vec_indx,
1284 gimple_stmt_iterator *gsi, slp_tree node,
1285 tree builtin_decl, tree vectype,
1286 VEC(tree,heap) *dr_chain,
1287 int ncopies, int vect_stmts_counter)
1289 tree t = NULL_TREE, mask_vec, mask, perm_dest;
1290 gimple perm_stmt = NULL;
1291 stmt_vec_info next_stmt_info;
1292 int i, group_size, stride, dr_chain_size;
1293 tree first_vec, second_vec, data_ref;
1294 tree sym;
1295 ssa_op_iter iter;
1296 VEC (tree, heap) *params = NULL;
1298 /* Create a vector mask. */
1299 for (i = mask_nunits - 1; i >= 0; --i)
1300 t = tree_cons (NULL_TREE, build_int_cst (mask_element_type, mask_array[i]),
1302 mask_vec = build_vector (mask_type, t);
1303 mask = vect_init_vector (stmt, mask_vec, mask_type, NULL);
1305 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node));
1306 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1307 dr_chain_size = VEC_length (tree, dr_chain);
1309 /* Initialize the vect stmts of NODE to properly insert the generated
1310 stmts later. */
1311 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1312 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1313 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1315 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1316 for (i = 0; i < ncopies; i++)
1318 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1319 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1321 /* Build argument list for the vectorized call. */
1322 VEC_free (tree, heap, params);
1323 params = VEC_alloc (tree, heap, 3);
1324 VEC_quick_push (tree, params, first_vec);
1325 VEC_quick_push (tree, params, second_vec);
1326 VEC_quick_push (tree, params, mask);
1328 /* Generate the permute statement. */
1329 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1330 data_ref = make_ssa_name (perm_dest, perm_stmt);
1331 gimple_call_set_lhs (perm_stmt, data_ref);
1332 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1333 FOR_EACH_SSA_TREE_OPERAND (sym, perm_stmt, iter, SSA_OP_ALL_VIRTUALS)
1335 if (TREE_CODE (sym) == SSA_NAME)
1336 sym = SSA_NAME_VAR (sym);
1337 mark_sym_for_renaming (sym);
1340 /* Store the vector statement in NODE. */
1341 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1342 stride * i + vect_stmts_counter, perm_stmt);
1344 first_vec_indx += stride;
1345 second_vec_indx += stride;
1348 /* Mark the scalar stmt as vectorized. */
1349 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1350 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1354 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1355 return in CURRENT_MASK_ELEMENT its equivalent in target specific
1356 representation. Check that the mask is valid and return FALSE if not.
1357 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1358 the next vector, i.e., the current first vector is not needed. */
1360 static bool
1361 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1362 int mask_nunits, bool only_one_vec, int index,
1363 int *mask, int *current_mask_element,
1364 bool *need_next_vector)
1366 int i;
1367 static int number_of_mask_fixes = 1;
1368 static bool mask_fixed = false;
1369 static bool needs_first_vector = false;
1371 /* Convert to target specific representation. */
1372 *current_mask_element = first_mask_element + m;
1373 /* Adjust the value in case it's a mask for second and third vectors. */
1374 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1376 if (*current_mask_element < mask_nunits)
1377 needs_first_vector = true;
1379 /* We have only one input vector to permute but the mask accesses values in
1380 the next vector as well. */
1381 if (only_one_vec && *current_mask_element >= mask_nunits)
1383 if (vect_print_dump_info (REPORT_DETAILS))
1385 fprintf (vect_dump, "permutation requires at least two vectors ");
1386 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1389 return false;
1392 /* The mask requires the next vector. */
1393 if (*current_mask_element >= mask_nunits * 2)
1395 if (needs_first_vector || mask_fixed)
1397 /* We either need the first vector too or have already moved to the
1398 next vector. In both cases, this permutation needs three
1399 vectors. */
1400 if (vect_print_dump_info (REPORT_DETAILS))
1402 fprintf (vect_dump, "permutation requires at "
1403 "least three vectors ");
1404 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1407 return false;
1410 /* We move to the next vector, dropping the first one and working with
1411 the second and the third - we need to adjust the values of the mask
1412 accordingly. */
1413 *current_mask_element -= mask_nunits * number_of_mask_fixes;
1415 for (i = 0; i < index; i++)
1416 mask[i] -= mask_nunits * number_of_mask_fixes;
1418 (number_of_mask_fixes)++;
1419 mask_fixed = true;
1422 *need_next_vector = mask_fixed;
1424 /* This was the last element of this mask. Start a new one. */
1425 if (index == mask_nunits - 1)
1427 number_of_mask_fixes = 1;
1428 mask_fixed = false;
1429 needs_first_vector = false;
1432 return true;
1436 /* Generate vector permute statements from a list of loads in DR_CHAIN.
1437 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1438 permute statements for SLP_NODE_INSTANCE. */
1439 bool
1440 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1441 gimple_stmt_iterator *gsi, int vf,
1442 slp_instance slp_node_instance, bool analyze_only)
1444 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1445 tree mask_element_type = NULL_TREE, mask_type;
1446 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1447 slp_tree node;
1448 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1449 gimple next_scalar_stmt;
1450 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1451 int first_mask_element;
1452 int index, unroll_factor, *mask, current_mask_element, ncopies;
1453 bool only_one_vec = false, need_next_vector = false;
1454 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1456 if (!targetm.vectorize.builtin_vec_perm)
1458 if (vect_print_dump_info (REPORT_DETAILS))
1460 fprintf (vect_dump, "no builtin for vect permute for ");
1461 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1464 return false;
1467 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1468 &mask_element_type);
1469 if (!builtin_decl || !mask_element_type)
1471 if (vect_print_dump_info (REPORT_DETAILS))
1473 fprintf (vect_dump, "no builtin for vect permute for ");
1474 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1477 return false;
1480 mask_type = get_vectype_for_scalar_type (mask_element_type);
1481 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1482 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1483 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1484 scale = mask_nunits / nunits;
1485 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1487 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1488 unrolling factor. */
1489 orig_vec_stmts_num = group_size *
1490 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1491 if (orig_vec_stmts_num == 1)
1492 only_one_vec = true;
1494 /* Number of copies is determined by the final vectorization factor
1495 relatively to SLP_NODE_INSTANCE unrolling factor. */
1496 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1498 /* Generate permutation masks for every NODE. Number of masks for each NODE
1499 is equal to GROUP_SIZE.
1500 E.g., we have a group of three nodes with three loads from the same
1501 location in each node, and the vector size is 4. I.e., we have a
1502 a0b0c0a1b1c1... sequence and we need to create the following vectors:
1503 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1504 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1507 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1508 scpecific type, e.g., in bytes for Altivec.
1509 The last mask is illegal since we assume two operands for permute
1510 operation, and the mask element values can't be outside that range. Hence,
1511 the last mask must be converted into {2,5,5,5}.
1512 For the first two permutations we need the first and the second input
1513 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1514 we need the second and the third vectors: {b1,c1,a2,b2} and
1515 {c2,a3,b3,c3}. */
1517 for (i = 0;
1518 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1519 i, node);
1520 i++)
1522 scalar_index = 0;
1523 index = 0;
1524 vect_stmts_counter = 0;
1525 vec_index = 0;
1526 first_vec_index = vec_index++;
1527 if (only_one_vec)
1528 second_vec_index = first_vec_index;
1529 else
1530 second_vec_index = vec_index++;
1532 for (j = 0; j < unroll_factor; j++)
1534 for (k = 0; k < group_size; k++)
1536 first_mask_element = (i + j * group_size) * scale;
1537 for (m = 0; m < scale; m++)
1539 if (!vect_get_mask_element (stmt, first_mask_element, m,
1540 mask_nunits, only_one_vec, index, mask,
1541 &current_mask_element, &need_next_vector))
1542 return false;
1544 mask[index++] = current_mask_element;
1547 if (index == mask_nunits)
1549 index = 0;
1550 if (!analyze_only)
1552 if (need_next_vector)
1554 first_vec_index = second_vec_index;
1555 second_vec_index = vec_index;
1558 next_scalar_stmt = VEC_index (gimple,
1559 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1561 vect_create_mask_and_perm (stmt, next_scalar_stmt,
1562 mask, mask_nunits, mask_element_type, mask_type,
1563 first_vec_index, second_vec_index, gsi, node,
1564 builtin_decl, vectype, dr_chain, ncopies,
1565 vect_stmts_counter++);
1572 free (mask);
1573 return true;
1578 /* Vectorize SLP instance tree in postorder. */
1580 static bool
1581 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1582 unsigned int vectorization_factor)
1584 gimple stmt;
1585 bool strided_store, is_store;
1586 gimple_stmt_iterator si;
1587 stmt_vec_info stmt_info;
1588 unsigned int vec_stmts_size, nunits, group_size;
1589 tree vectype;
1590 int i;
1591 slp_tree loads_node;
1593 if (!node)
1594 return false;
1596 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1597 vectorization_factor);
1598 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1599 vectorization_factor);
1601 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1602 stmt_info = vinfo_for_stmt (stmt);
1604 /* VECTYPE is the type of the destination. */
1605 vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1606 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1607 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1609 /* For each SLP instance calculate number of vector stmts to be created
1610 for the scalar stmts in each node of the SLP tree. Number of vector
1611 elements in one vector iteration is the number of scalar elements in
1612 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1613 size. */
1614 vec_stmts_size = (vectorization_factor * group_size) / nunits;
1616 /* In case of load permutation we have to allocate vectorized statements for
1617 all the nodes that participate in that permutation. */
1618 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1620 for (i = 0;
1621 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1622 i++)
1624 if (!SLP_TREE_VEC_STMTS (loads_node))
1626 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
1627 vec_stmts_size);
1628 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
1633 if (!SLP_TREE_VEC_STMTS (node))
1635 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
1636 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
1639 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
1642 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1645 /* Loads should be inserted before the first load. */
1646 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
1647 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
1648 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
1649 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
1650 else
1651 si = gsi_for_stmt (stmt);
1653 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
1654 if (is_store)
1656 if (DR_GROUP_FIRST_DR (stmt_info))
1657 /* If IS_STORE is TRUE, the vectorization of the
1658 interleaving chain was completed - free all the stores in
1659 the chain. */
1660 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
1661 else
1662 /* FORNOW: SLP originates only from strided stores. */
1663 gcc_unreachable ();
1665 return true;
1668 /* FORNOW: SLP originates only from strided stores. */
1669 return false;
1673 bool
1674 vect_schedule_slp (loop_vec_info loop_vinfo)
1676 VEC (slp_instance, heap) *slp_instances =
1677 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1678 slp_instance instance;
1679 unsigned int i;
1680 bool is_store = false;
1682 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1684 /* Schedule the tree of INSTANCE. */
1685 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
1686 instance, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1688 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
1689 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1690 fprintf (vect_dump, "vectorizing stmts using SLP.");
1693 return is_store;