PR other/48093
[official-gcc.git] / gcc / tree-vect-slp.c
blob25eaf3e6f053a3e53393e99a87695d7a626f553e
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 if (!node)
71 return;
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84 free (node);
88 /* Free the memory allocated for the SLP instance. */
90 void
91 vect_free_slp_instance (slp_instance instance)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
99 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
103 static bool
104 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
105 slp_tree slp_node, gimple stmt,
106 VEC (gimple, heap) **def_stmts0,
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
110 tree *first_stmt_def0_type,
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
121 stmt_vec_info stmt_info =
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
124 struct loop *loop = NULL;
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
132 for (i = 0; i < number_of_oprnds; i++)
134 oprnd = gimple_op (stmt, i + 1);
136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
137 &dt[i])
138 || (!def_stmt && dt[i] != vect_constant_def))
140 if (vect_print_dump_info (REPORT_SLP))
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
146 return false;
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
151 pattern. */
152 if (loop && def_stmt && gimple_bb (def_stmt)
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
165 if (vect_print_dump_info (REPORT_DETAILS))
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
172 return false;
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
179 if (*dt == vect_unknown_def_type)
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
186 switch (gimple_code (def_stmt))
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
203 if (!*first_stmt_dt0)
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
219 dt[0], slp_node);
222 else
224 if (!*first_stmt_dt1 && i == 1)
226 /* op1 of the first stmt of the group - store its info. */
227 *first_stmt_dt1 = dt[i];
228 if (def)
229 *first_stmt_def1_type = TREE_TYPE (def);
230 else
232 /* We assume that the stmt contains only one constant
233 operand. We fail otherwise, to be on the safe side. */
234 if (*first_stmt_const_oprnd)
236 if (vect_print_dump_info (REPORT_SLP))
237 fprintf (vect_dump, "Build SLP failed: two constant "
238 "oprnds in stmt");
239 return false;
241 *first_stmt_const_oprnd = oprnd;
244 else
246 /* Not first stmt of the group, check that the def-stmt/s match
247 the def-stmt/s of the first stmt. */
248 if ((i == 0
249 && (*first_stmt_dt0 != dt[i]
250 || (*first_stmt_def0_type && def
251 && !types_compatible_p (*first_stmt_def0_type,
252 TREE_TYPE (def)))))
253 || (i == 1
254 && (*first_stmt_dt1 != dt[i]
255 || (*first_stmt_def1_type && def
256 && !types_compatible_p (*first_stmt_def1_type,
257 TREE_TYPE (def)))))
258 || (!def
259 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 TREE_TYPE (oprnd))))
262 if (vect_print_dump_info (REPORT_SLP))
263 fprintf (vect_dump, "Build SLP failed: different types ");
265 return false;
270 /* Check the types of the definitions. */
271 switch (dt[i])
273 case vect_constant_def:
274 case vect_external_def:
275 break;
277 case vect_internal_def:
278 case vect_reduction_def:
279 if (i == 0)
280 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
281 else
282 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
283 break;
285 default:
286 /* FORNOW: Not supported. */
287 if (vect_print_dump_info (REPORT_SLP))
289 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
290 print_generic_expr (vect_dump, def, TDF_SLIM);
293 return false;
297 return true;
301 /* Recursively build an SLP tree starting from NODE.
302 Fail (and return FALSE) if def-stmts are not isomorphic, require data
303 permutation or are of unsupported types of operation. Otherwise, return
304 TRUE. */
306 static bool
307 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
308 slp_tree *node, unsigned int group_size,
309 int *inside_cost, int *outside_cost,
310 int ncopies_for_cost, unsigned int *max_nunits,
311 VEC (int, heap) **load_permutation,
312 VEC (slp_tree, heap) **loads,
313 unsigned int vectorization_factor)
315 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
316 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
317 unsigned int i;
318 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
319 gimple stmt = VEC_index (gimple, stmts, 0);
320 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
321 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
322 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
323 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
324 tree lhs;
325 bool stop_recursion = false, need_same_oprnds = false;
326 tree vectype, scalar_type, first_op1 = NULL_TREE;
327 unsigned int ncopies;
328 optab optab;
329 int icode;
330 enum machine_mode optab_op2_mode;
331 enum machine_mode vec_mode;
332 tree first_stmt_const_oprnd = NULL_TREE;
333 struct data_reference *first_dr;
334 bool pattern0 = false, pattern1 = false;
335 HOST_WIDE_INT dummy;
336 bool permutation = false;
337 unsigned int load_place;
338 gimple first_load, prev_first_load = NULL;
340 /* For every stmt in NODE find its def stmt/s. */
341 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
343 if (vect_print_dump_info (REPORT_SLP))
345 fprintf (vect_dump, "Build SLP for ");
346 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
349 /* Fail to vectorize statements marked as unvectorizable. */
350 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
352 if (vect_print_dump_info (REPORT_SLP))
354 fprintf (vect_dump,
355 "Build SLP failed: unvectorizable statement ");
356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
359 return false;
362 lhs = gimple_get_lhs (stmt);
363 if (lhs == NULL_TREE)
365 if (vect_print_dump_info (REPORT_SLP))
367 fprintf (vect_dump,
368 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
369 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
372 return false;
375 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
376 vectype = get_vectype_for_scalar_type (scalar_type);
377 if (!vectype)
379 if (vect_print_dump_info (REPORT_SLP))
381 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
384 return false;
387 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
388 if (ncopies != 1)
390 if (vect_print_dump_info (REPORT_SLP))
391 fprintf (vect_dump, "SLP with multiple types ");
393 /* FORNOW: multiple types are unsupported in BB SLP. */
394 if (bb_vinfo)
395 return false;
398 /* In case of multiple types we need to detect the smallest type. */
399 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
400 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
402 if (is_gimple_call (stmt))
403 rhs_code = CALL_EXPR;
404 else
405 rhs_code = gimple_assign_rhs_code (stmt);
407 /* Check the operation. */
408 if (i == 0)
410 first_stmt_code = rhs_code;
412 /* Shift arguments should be equal in all the packed stmts for a
413 vector shift with scalar shift operand. */
414 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
415 || rhs_code == LROTATE_EXPR
416 || rhs_code == RROTATE_EXPR)
418 vec_mode = TYPE_MODE (vectype);
420 /* First see if we have a vector/vector shift. */
421 optab = optab_for_tree_code (rhs_code, vectype,
422 optab_vector);
424 if (!optab
425 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
427 /* No vector/vector shift, try for a vector/scalar shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
429 optab_scalar);
431 if (!optab)
433 if (vect_print_dump_info (REPORT_SLP))
434 fprintf (vect_dump, "Build SLP failed: no optab.");
435 return false;
437 icode = (int) optab_handler (optab, vec_mode);
438 if (icode == CODE_FOR_nothing)
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: "
442 "op not supported by target.");
443 return false;
445 optab_op2_mode = insn_data[icode].operand[2].mode;
446 if (!VECTOR_MODE_P (optab_op2_mode))
448 need_same_oprnds = true;
449 first_op1 = gimple_assign_rhs2 (stmt);
454 else
456 if (first_stmt_code != rhs_code
457 && (first_stmt_code != IMAGPART_EXPR
458 || rhs_code != REALPART_EXPR)
459 && (first_stmt_code != REALPART_EXPR
460 || rhs_code != IMAGPART_EXPR)
461 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
462 && (first_stmt_code == ARRAY_REF
463 || first_stmt_code == INDIRECT_REF
464 || first_stmt_code == COMPONENT_REF
465 || first_stmt_code == MEM_REF)))
467 if (vect_print_dump_info (REPORT_SLP))
469 fprintf (vect_dump,
470 "Build SLP failed: different operation in stmt ");
471 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
474 return false;
477 if (need_same_oprnds
478 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
480 if (vect_print_dump_info (REPORT_SLP))
482 fprintf (vect_dump,
483 "Build SLP failed: different shift arguments in ");
484 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
487 return false;
491 /* Strided store or load. */
492 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
494 if (REFERENCE_CLASS_P (lhs))
496 /* Store. */
497 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
498 stmt, &def_stmts0, &def_stmts1,
499 &first_stmt_dt0,
500 &first_stmt_dt1,
501 &first_stmt_def0_type,
502 &first_stmt_def1_type,
503 &first_stmt_const_oprnd,
504 ncopies_for_cost,
505 &pattern0, &pattern1))
506 return false;
508 else
510 /* Load. */
511 /* FORNOW: Check that there is no gap between the loads. */
512 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
513 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
514 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
515 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
517 if (vect_print_dump_info (REPORT_SLP))
519 fprintf (vect_dump, "Build SLP failed: strided "
520 "loads have gaps ");
521 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
524 return false;
527 /* Check that the size of interleaved loads group is not
528 greater than the SLP group size. */
529 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
531 if (vect_print_dump_info (REPORT_SLP))
533 fprintf (vect_dump, "Build SLP failed: the number of "
534 "interleaved loads is greater than"
535 " the SLP group size ");
536 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
539 return false;
542 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
543 if (prev_first_load)
545 /* Check that there are no loads from different interleaving
546 chains in the same node. The only exception is complex
547 numbers. */
548 if (prev_first_load != first_load
549 && rhs_code != REALPART_EXPR
550 && rhs_code != IMAGPART_EXPR)
552 if (vect_print_dump_info (REPORT_SLP))
554 fprintf (vect_dump, "Build SLP failed: different "
555 "interleaving chains in one node ");
556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
559 return false;
562 else
563 prev_first_load = first_load;
565 if (first_load == stmt)
567 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
568 if (vect_supportable_dr_alignment (first_dr, false)
569 == dr_unaligned_unsupported)
571 if (vect_print_dump_info (REPORT_SLP))
573 fprintf (vect_dump, "Build SLP failed: unsupported "
574 "unaligned load ");
575 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
578 return false;
581 /* Analyze costs (for the first stmt in the group). */
582 vect_model_load_cost (vinfo_for_stmt (stmt),
583 ncopies_for_cost, false, *node);
586 /* Store the place of this load in the interleaving chain. In
587 case that permutation is needed we later decide if a specific
588 permutation is supported. */
589 load_place = vect_get_place_in_interleaving_chain (stmt,
590 first_load);
591 if (load_place != i)
592 permutation = true;
594 VEC_safe_push (int, heap, *load_permutation, load_place);
596 /* We stop the tree when we reach a group of loads. */
597 stop_recursion = true;
598 continue;
600 } /* Strided access. */
601 else
603 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
605 /* Not strided load. */
606 if (vect_print_dump_info (REPORT_SLP))
608 fprintf (vect_dump, "Build SLP failed: not strided load ");
609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
612 /* FORNOW: Not strided loads are not supported. */
613 return false;
616 /* Not memory operation. */
617 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
618 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
620 if (vect_print_dump_info (REPORT_SLP))
622 fprintf (vect_dump, "Build SLP failed: operation");
623 fprintf (vect_dump, " unsupported ");
624 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
627 return false;
630 /* Find the def-stmts. */
631 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
632 &def_stmts0, &def_stmts1,
633 &first_stmt_dt0, &first_stmt_dt1,
634 &first_stmt_def0_type,
635 &first_stmt_def1_type,
636 &first_stmt_const_oprnd,
637 ncopies_for_cost,
638 &pattern0, &pattern1))
639 return false;
643 /* Add the costs of the node to the overall instance costs. */
644 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
645 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
647 /* Strided loads were reached - stop the recursion. */
648 if (stop_recursion)
650 if (permutation)
652 VEC_safe_push (slp_tree, heap, *loads, *node);
653 *inside_cost
654 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
655 * group_size;
657 else
659 /* We don't check here complex numbers chains, so we keep them in
660 LOADS for further check in vect_supported_load_permutation_p. */
661 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
662 VEC_safe_push (slp_tree, heap, *loads, *node);
665 return true;
668 /* Create SLP_TREE nodes for the definition node/s. */
669 if (first_stmt_dt0 == vect_internal_def)
671 slp_tree left_node = XNEW (struct _slp_tree);
672 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
673 SLP_TREE_VEC_STMTS (left_node) = NULL;
674 SLP_TREE_LEFT (left_node) = NULL;
675 SLP_TREE_RIGHT (left_node) = NULL;
676 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
677 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
678 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
679 inside_cost, outside_cost, ncopies_for_cost,
680 max_nunits, load_permutation, loads,
681 vectorization_factor))
682 return false;
684 SLP_TREE_LEFT (*node) = left_node;
687 if (first_stmt_dt1 == vect_internal_def)
689 slp_tree right_node = XNEW (struct _slp_tree);
690 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
691 SLP_TREE_VEC_STMTS (right_node) = NULL;
692 SLP_TREE_LEFT (right_node) = NULL;
693 SLP_TREE_RIGHT (right_node) = NULL;
694 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
695 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
696 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
697 inside_cost, outside_cost, ncopies_for_cost,
698 max_nunits, load_permutation, loads,
699 vectorization_factor))
700 return false;
702 SLP_TREE_RIGHT (*node) = right_node;
705 return true;
709 static void
710 vect_print_slp_tree (slp_tree node)
712 int i;
713 gimple stmt;
715 if (!node)
716 return;
718 fprintf (vect_dump, "node ");
719 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
721 fprintf (vect_dump, "\n\tstmt %d ", i);
722 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
724 fprintf (vect_dump, "\n");
726 vect_print_slp_tree (SLP_TREE_LEFT (node));
727 vect_print_slp_tree (SLP_TREE_RIGHT (node));
731 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
732 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
733 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
734 stmts in NODE are to be marked. */
736 static void
737 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
739 int i;
740 gimple stmt;
742 if (!node)
743 return;
745 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
746 if (j < 0 || i == j)
747 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
749 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
750 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
754 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
756 static void
757 vect_mark_slp_stmts_relevant (slp_tree node)
759 int i;
760 gimple stmt;
761 stmt_vec_info stmt_info;
763 if (!node)
764 return;
766 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
768 stmt_info = vinfo_for_stmt (stmt);
769 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
770 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
771 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
774 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
775 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
779 /* Check if the permutation required by the SLP INSTANCE is supported.
780 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
782 static bool
783 vect_supported_slp_permutation_p (slp_instance instance)
785 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
786 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
787 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
788 VEC (slp_tree, heap) *sorted_loads = NULL;
789 int index;
790 slp_tree *tmp_loads = NULL;
791 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
792 slp_tree load;
794 /* FORNOW: The only supported loads permutation is loads from the same
795 location in all the loads in the node, when the data-refs in
796 nodes of LOADS constitute an interleaving chain.
797 Sort the nodes according to the order of accesses in the chain. */
798 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
799 for (i = 0, j = 0;
800 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
801 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
802 i += group_size, j++)
804 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
805 /* Check that the loads are all in the same interleaving chain. */
806 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
808 if (vect_print_dump_info (REPORT_DETAILS))
810 fprintf (vect_dump, "Build SLP failed: unsupported data "
811 "permutation ");
812 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
815 free (tmp_loads);
816 return false;
819 tmp_loads[index] = load;
822 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
823 for (i = 0; i < group_size; i++)
824 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
826 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
827 SLP_INSTANCE_LOADS (instance) = sorted_loads;
828 free (tmp_loads);
830 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
831 SLP_INSTANCE_UNROLLING_FACTOR (instance),
832 instance, true))
833 return false;
835 return true;
839 /* Rearrange the statements of NODE according to PERMUTATION. */
841 static void
842 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
843 VEC (int, heap) *permutation)
845 gimple stmt;
846 VEC (gimple, heap) *tmp_stmts;
847 unsigned int index, i;
849 if (!node)
850 return;
852 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
853 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
855 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
856 tmp_stmts = VEC_alloc (gimple, heap, group_size);
858 for (i = 0; i < group_size; i++)
859 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
861 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
863 index = VEC_index (int, permutation, i);
864 VEC_replace (gimple, tmp_stmts, index, stmt);
867 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
868 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
872 /* Check if the required load permutation is supported.
873 LOAD_PERMUTATION contains a list of indices of the loads.
874 In SLP this permutation is relative to the order of strided stores that are
875 the base of the SLP instance. */
877 static bool
878 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
879 VEC (int, heap) *load_permutation)
881 int i = 0, j, prev = -1, next, k, number_of_groups;
882 bool supported, bad_permutation = false;
883 sbitmap load_index;
884 slp_tree node, other_complex_node;
885 gimple stmt, first = NULL, other_node_first;
886 unsigned complex_numbers = 0;
888 /* FORNOW: permutations are only supported in SLP. */
889 if (!slp_instn)
890 return false;
892 if (vect_print_dump_info (REPORT_SLP))
894 fprintf (vect_dump, "Load permutation ");
895 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
896 fprintf (vect_dump, "%d ", next);
899 /* In case of reduction every load permutation is allowed, since the order
900 of the reduction statements is not important (as opposed to the case of
901 strided stores). The only condition we need to check is that all the
902 load nodes are of the same size and have the same permutation (and then
903 rearrange all the nodes of the SLP instance according to this
904 permutation). */
906 /* Check that all the load nodes are of the same size. */
907 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
909 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
910 != (unsigned) group_size)
911 return false;
913 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
914 if (is_gimple_assign (stmt)
915 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
916 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
917 complex_numbers++;
920 /* Complex operands can be swapped as following:
921 real_c = real_b + real_a;
922 imag_c = imag_a + imag_b;
923 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
924 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
925 chains are mixed, they match the above pattern. */
926 if (complex_numbers)
928 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
930 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
932 if (j == 0)
933 first = stmt;
934 else
936 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
938 if (complex_numbers != 2)
939 return false;
941 if (i == 0)
942 k = 1;
943 else
944 k = 0;
946 other_complex_node = VEC_index (slp_tree,
947 SLP_INSTANCE_LOADS (slp_instn), k);
948 other_node_first = VEC_index (gimple,
949 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
951 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))
952 != other_node_first)
953 return false;
960 /* We checked that this case ok, so there is no need to proceed with
961 permutation tests. */
962 if (complex_numbers == 2)
964 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
965 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
966 return true;
969 node = SLP_INSTANCE_TREE (slp_instn);
970 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
971 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
972 instance, not all the loads belong to the same node or interleaving
973 group. Hence, we need to divide them into groups according to
974 GROUP_SIZE. */
975 number_of_groups = VEC_length (int, load_permutation) / group_size;
977 /* Reduction (there are no data-refs in the root). */
978 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
980 int first_group_load_index;
982 /* Compare all the permutation sequences to the first one. */
983 for (i = 1; i < number_of_groups; i++)
985 k = 0;
986 for (j = i * group_size; j < i * group_size + group_size; j++)
988 next = VEC_index (int, load_permutation, j);
989 first_group_load_index = VEC_index (int, load_permutation, k);
991 if (next != first_group_load_index)
993 bad_permutation = true;
994 break;
997 k++;
1000 if (bad_permutation)
1001 break;
1004 if (!bad_permutation)
1006 /* Check that the loads in the first sequence are different and there
1007 are no gaps between them. */
1008 load_index = sbitmap_alloc (group_size);
1009 sbitmap_zero (load_index);
1010 for (k = 0; k < group_size; k++)
1012 first_group_load_index = VEC_index (int, load_permutation, k);
1013 if (TEST_BIT (load_index, first_group_load_index))
1015 bad_permutation = true;
1016 break;
1019 SET_BIT (load_index, first_group_load_index);
1022 if (!bad_permutation)
1023 for (k = 0; k < group_size; k++)
1024 if (!TEST_BIT (load_index, k))
1026 bad_permutation = true;
1027 break;
1030 sbitmap_free (load_index);
1033 if (!bad_permutation)
1035 /* This permutation is valid for reduction. Since the order of the
1036 statements in the nodes is not important unless they are memory
1037 accesses, we can rearrange the statements in all the nodes
1038 according to the order of the loads. */
1039 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1040 load_permutation);
1041 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1042 return true;
1046 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1047 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1048 well (unless it's reduction). */
1049 if (VEC_length (int, load_permutation)
1050 != (unsigned int) (group_size * group_size))
1051 return false;
1053 supported = true;
1054 load_index = sbitmap_alloc (group_size);
1055 sbitmap_zero (load_index);
1056 for (j = 0; j < group_size; j++)
1058 for (i = j * group_size, k = 0;
1059 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1060 i++, k++)
1062 if (i != j * group_size && next != prev)
1064 supported = false;
1065 break;
1068 prev = next;
1071 if (TEST_BIT (load_index, prev))
1073 supported = false;
1074 break;
1077 SET_BIT (load_index, prev);
1080 for (j = 0; j < group_size; j++)
1081 if (!TEST_BIT (load_index, j))
1082 return false;
1084 sbitmap_free (load_index);
1086 if (supported && i == group_size * group_size
1087 && vect_supported_slp_permutation_p (slp_instn))
1088 return true;
1090 return false;
1094 /* Find the first load in the loop that belongs to INSTANCE.
1095 When loads are in several SLP nodes, there can be a case in which the first
1096 load does not appear in the first SLP node to be transformed, causing
1097 incorrect order of statements. Since we generate all the loads together,
1098 they must be inserted before the first load of the SLP instance and not
1099 before the first load of the first node of the instance. */
1101 static gimple
1102 vect_find_first_load_in_slp_instance (slp_instance instance)
1104 int i, j;
1105 slp_tree load_node;
1106 gimple first_load = NULL, load;
1108 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1109 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1110 first_load = get_earlier_stmt (load, first_load);
1112 return first_load;
1116 /* Find the last store in SLP INSTANCE. */
1118 static gimple
1119 vect_find_last_store_in_slp_instance (slp_instance instance)
1121 int i;
1122 slp_tree node;
1123 gimple last_store = NULL, store;
1125 node = SLP_INSTANCE_TREE (instance);
1126 for (i = 0;
1127 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1128 i++)
1129 last_store = get_later_stmt (store, last_store);
1131 return last_store;
1135 /* Analyze an SLP instance starting from a group of strided stores. Call
1136 vect_build_slp_tree to build a tree of packed stmts if possible.
1137 Return FALSE if it's impossible to SLP any stmt in the loop. */
1139 static bool
1140 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1141 gimple stmt)
1143 slp_instance new_instance;
1144 slp_tree node = XNEW (struct _slp_tree);
1145 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1146 unsigned int unrolling_factor = 1, nunits;
1147 tree vectype, scalar_type = NULL_TREE;
1148 gimple next;
1149 unsigned int vectorization_factor = 0;
1150 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1151 unsigned int max_nunits = 0;
1152 VEC (int, heap) *load_permutation;
1153 VEC (slp_tree, heap) *loads;
1154 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1156 if (dr)
1158 scalar_type = TREE_TYPE (DR_REF (dr));
1159 vectype = get_vectype_for_scalar_type (scalar_type);
1160 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1162 else
1164 gcc_assert (loop_vinfo);
1165 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1166 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1169 if (!vectype)
1171 if (vect_print_dump_info (REPORT_SLP))
1173 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1174 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1177 return false;
1180 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1181 if (loop_vinfo)
1182 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1183 else
1184 /* No multitypes in BB SLP. */
1185 vectorization_factor = nunits;
1187 /* Calculate the unrolling factor. */
1188 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1189 if (unrolling_factor != 1 && !loop_vinfo)
1191 if (vect_print_dump_info (REPORT_SLP))
1192 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1193 " block SLP");
1195 return false;
1198 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1199 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1200 next = stmt;
1201 if (dr)
1203 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1204 while (next)
1206 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1207 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1210 else
1212 /* Collect reduction statements. */
1213 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1214 next);
1215 i++)
1217 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1218 if (vect_print_dump_info (REPORT_DETAILS))
1220 fprintf (vect_dump, "pushing reduction into node: ");
1221 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1226 SLP_TREE_VEC_STMTS (node) = NULL;
1227 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1228 SLP_TREE_LEFT (node) = NULL;
1229 SLP_TREE_RIGHT (node) = NULL;
1230 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1231 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1233 /* Calculate the number of vector stmts to create based on the unrolling
1234 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1235 GROUP_SIZE / NUNITS otherwise. */
1236 ncopies_for_cost = unrolling_factor * group_size / nunits;
1238 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1239 loads = VEC_alloc (slp_tree, heap, group_size);
1241 /* Build the tree for the SLP instance. */
1242 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1243 &inside_cost, &outside_cost, ncopies_for_cost,
1244 &max_nunits, &load_permutation, &loads,
1245 vectorization_factor))
1247 /* Create a new SLP instance. */
1248 new_instance = XNEW (struct _slp_instance);
1249 SLP_INSTANCE_TREE (new_instance) = node;
1250 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1251 /* Calculate the unrolling factor based on the smallest type in the
1252 loop. */
1253 if (max_nunits > nunits)
1254 unrolling_factor = least_common_multiple (max_nunits, group_size)
1255 / group_size;
1257 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1258 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1259 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1260 SLP_INSTANCE_LOADS (new_instance) = loads;
1261 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1262 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1263 if (VEC_length (slp_tree, loads))
1265 if (!vect_supported_load_permutation_p (new_instance, group_size,
1266 load_permutation))
1268 if (vect_print_dump_info (REPORT_SLP))
1270 fprintf (vect_dump, "Build SLP failed: unsupported load "
1271 "permutation ");
1272 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1275 vect_free_slp_instance (new_instance);
1276 return false;
1279 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1280 = vect_find_first_load_in_slp_instance (new_instance);
1282 else
1283 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1285 if (loop_vinfo)
1286 VEC_safe_push (slp_instance, heap,
1287 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1288 new_instance);
1289 else
1290 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1291 new_instance);
1293 if (vect_print_dump_info (REPORT_SLP))
1294 vect_print_slp_tree (node);
1296 return true;
1299 /* Failed to SLP. */
1300 /* Free the allocated memory. */
1301 vect_free_slp_tree (node);
1302 VEC_free (int, heap, load_permutation);
1303 VEC_free (slp_tree, heap, loads);
1305 return false;
1309 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1310 trees of packed scalar stmts if SLP is possible. */
1312 bool
1313 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1315 unsigned int i;
1316 VEC (gimple, heap) *strided_stores, *reductions = NULL;
1317 gimple store;
1318 bool ok = false;
1320 if (vect_print_dump_info (REPORT_SLP))
1321 fprintf (vect_dump, "=== vect_analyze_slp ===");
1323 if (loop_vinfo)
1325 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1326 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1328 else
1329 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1331 /* Find SLP sequences starting from groups of strided stores. */
1332 FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
1333 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1334 ok = true;
1336 if (bb_vinfo && !ok)
1338 if (vect_print_dump_info (REPORT_SLP))
1339 fprintf (vect_dump, "Failed to SLP the basic block.");
1341 return false;
1344 /* Find SLP sequences starting from groups of reductions. */
1345 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1346 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1347 VEC_index (gimple, reductions, 0)))
1348 ok = true;
1350 return true;
1354 /* For each possible SLP instance decide whether to SLP it and calculate overall
1355 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1356 least one instance. */
1358 bool
1359 vect_make_slp_decision (loop_vec_info loop_vinfo)
1361 unsigned int i, unrolling_factor = 1;
1362 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1363 slp_instance instance;
1364 int decided_to_slp = 0;
1366 if (vect_print_dump_info (REPORT_SLP))
1367 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1369 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1371 /* FORNOW: SLP if you can. */
1372 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1373 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1375 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1376 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1377 loop-based vectorization. Such stmts will be marked as HYBRID. */
1378 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1379 decided_to_slp++;
1382 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1384 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1385 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1386 decided_to_slp, unrolling_factor);
1388 return (decided_to_slp > 0);
1392 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1393 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1395 static void
1396 vect_detect_hybrid_slp_stmts (slp_tree node)
1398 int i;
1399 gimple stmt;
1400 imm_use_iterator imm_iter;
1401 gimple use_stmt;
1402 stmt_vec_info stmt_vinfo;
1404 if (!node)
1405 return;
1407 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1408 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1409 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1410 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1411 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1412 && !STMT_SLP_TYPE (stmt_vinfo)
1413 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1414 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1415 && !(gimple_code (use_stmt) == GIMPLE_PHI
1416 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1417 == vect_reduction_def))
1418 vect_mark_slp_stmts (node, hybrid, i);
1420 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1421 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1425 /* Find stmts that must be both vectorized and SLPed. */
1427 void
1428 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1430 unsigned int i;
1431 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1432 slp_instance instance;
1434 if (vect_print_dump_info (REPORT_SLP))
1435 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1437 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1438 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1442 /* Create and initialize a new bb_vec_info struct for BB, as well as
1443 stmt_vec_info structs for all the stmts in it. */
1445 static bb_vec_info
1446 new_bb_vec_info (basic_block bb)
1448 bb_vec_info res = NULL;
1449 gimple_stmt_iterator gsi;
1451 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1452 BB_VINFO_BB (res) = bb;
1454 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1456 gimple stmt = gsi_stmt (gsi);
1457 gimple_set_uid (stmt, 0);
1458 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1461 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1462 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1464 bb->aux = res;
1465 return res;
1469 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1470 stmts in the basic block. */
1472 static void
1473 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1475 basic_block bb;
1476 gimple_stmt_iterator si;
1478 if (!bb_vinfo)
1479 return;
1481 bb = BB_VINFO_BB (bb_vinfo);
1483 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1485 gimple stmt = gsi_stmt (si);
1486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1488 if (stmt_info)
1489 /* Free stmt_vec_info. */
1490 free_stmt_vec_info (stmt);
1493 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1494 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1495 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1496 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1497 free (bb_vinfo);
1498 bb->aux = NULL;
1502 /* Analyze statements contained in SLP tree node after recursively analyzing
1503 the subtree. Return TRUE if the operations are supported. */
1505 static bool
1506 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1508 bool dummy;
1509 int i;
1510 gimple stmt;
1512 if (!node)
1513 return true;
1515 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1516 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1517 return false;
1519 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1521 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1522 gcc_assert (stmt_info);
1523 gcc_assert (PURE_SLP_STMT (stmt_info));
1525 if (!vect_analyze_stmt (stmt, &dummy, node))
1526 return false;
1529 return true;
1533 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1534 operations are supported. */
1536 static bool
1537 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1539 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1540 slp_instance instance;
1541 int i;
1543 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1545 if (!vect_slp_analyze_node_operations (bb_vinfo,
1546 SLP_INSTANCE_TREE (instance)))
1548 vect_free_slp_instance (instance);
1549 VEC_ordered_remove (slp_instance, slp_instances, i);
1551 else
1552 i++;
1555 if (!VEC_length (slp_instance, slp_instances))
1556 return false;
1558 return true;
1561 /* Check if loads and stores are mixed in the basic block (in that
1562 case if we are not sure that the accesses differ, we can't vectorize the
1563 basic block). Also return FALSE in case that there is statement marked as
1564 not vectorizable. */
1566 static bool
1567 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1569 basic_block bb = BB_VINFO_BB (bb_vinfo);
1570 gimple_stmt_iterator si;
1571 bool detected_store = false;
1572 gimple stmt;
1573 struct data_reference *dr;
1575 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1577 stmt = gsi_stmt (si);
1579 /* We can't allow not analyzed statements, since they may contain data
1580 accesses. */
1581 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1582 return false;
1584 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1585 continue;
1587 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1588 if (DR_IS_READ (dr) && detected_store)
1589 return false;
1591 if (!DR_IS_READ (dr))
1592 detected_store = true;
1595 return true;
1598 /* Check if vectorization of the basic block is profitable. */
1600 static bool
1601 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1603 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1604 slp_instance instance;
1605 int i;
1606 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1607 unsigned int stmt_cost;
1608 gimple stmt;
1609 gimple_stmt_iterator si;
1610 basic_block bb = BB_VINFO_BB (bb_vinfo);
1611 stmt_vec_info stmt_info = NULL;
1612 tree dummy_type = NULL;
1613 int dummy = 0;
1615 /* Calculate vector costs. */
1616 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1618 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1619 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1622 /* Calculate scalar cost. */
1623 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1625 stmt = gsi_stmt (si);
1626 stmt_info = vinfo_for_stmt (stmt);
1628 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1629 || !PURE_SLP_STMT (stmt_info))
1630 continue;
1632 if (STMT_VINFO_DATA_REF (stmt_info))
1634 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1635 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1636 (scalar_load, dummy_type, dummy);
1637 else
1638 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1639 (scalar_store, dummy_type, dummy);
1641 else
1642 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1643 (scalar_stmt, dummy_type, dummy);
1645 scalar_cost += stmt_cost;
1648 if (vect_print_dump_info (REPORT_COST))
1650 fprintf (vect_dump, "Cost model analysis: \n");
1651 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1652 vec_inside_cost);
1653 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1654 vec_outside_cost);
1655 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1658 /* Vectorization is profitable if its cost is less than the cost of scalar
1659 version. */
1660 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1661 return false;
1663 return true;
1666 /* Check if the basic block can be vectorized. */
1668 bb_vec_info
1669 vect_slp_analyze_bb (basic_block bb)
1671 bb_vec_info bb_vinfo;
1672 VEC (ddr_p, heap) *ddrs;
1673 VEC (slp_instance, heap) *slp_instances;
1674 slp_instance instance;
1675 int i, insns = 0;
1676 gimple_stmt_iterator gsi;
1677 int min_vf = 2;
1678 int max_vf = MAX_VECTORIZATION_FACTOR;
1679 bool data_dependence_in_bb = false;
1681 current_vector_size = 0;
1683 if (vect_print_dump_info (REPORT_DETAILS))
1684 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1686 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1688 gimple stmt = gsi_stmt (gsi);
1689 if (!is_gimple_debug (stmt)
1690 && !gimple_nop_p (stmt)
1691 && gimple_code (stmt) != GIMPLE_LABEL)
1692 insns++;
1695 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1698 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1699 "block.\n");
1701 return NULL;
1704 bb_vinfo = new_bb_vec_info (bb);
1705 if (!bb_vinfo)
1706 return NULL;
1708 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1710 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1711 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1712 "block.\n");
1714 destroy_bb_vec_info (bb_vinfo);
1715 return NULL;
1718 ddrs = BB_VINFO_DDRS (bb_vinfo);
1719 if (!VEC_length (ddr_p, ddrs))
1721 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1722 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1723 "block.\n");
1725 destroy_bb_vec_info (bb_vinfo);
1726 return NULL;
1729 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1730 &data_dependence_in_bb)
1731 || min_vf > max_vf
1732 || (data_dependence_in_bb
1733 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1736 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1737 "in basic block.\n");
1739 destroy_bb_vec_info (bb_vinfo);
1740 return NULL;
1743 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1745 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1746 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1747 "block.\n");
1749 destroy_bb_vec_info (bb_vinfo);
1750 return NULL;
1753 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1756 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1757 "block.\n");
1759 destroy_bb_vec_info (bb_vinfo);
1760 return NULL;
1763 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1765 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1766 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1767 "block.\n");
1769 destroy_bb_vec_info (bb_vinfo);
1770 return NULL;
1773 /* Check the SLP opportunities in the basic block, analyze and build SLP
1774 trees. */
1775 if (!vect_analyze_slp (NULL, bb_vinfo))
1777 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1778 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1779 "in basic block.\n");
1781 destroy_bb_vec_info (bb_vinfo);
1782 return NULL;
1785 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1787 /* Mark all the statements that we want to vectorize as pure SLP and
1788 relevant. */
1789 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1791 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1792 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1795 if (!vect_slp_analyze_operations (bb_vinfo))
1797 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1798 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1800 destroy_bb_vec_info (bb_vinfo);
1801 return NULL;
1804 /* Cost model: check if the vectorization is worthwhile. */
1805 if (flag_vect_cost_model
1806 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1808 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1809 fprintf (vect_dump, "not vectorized: vectorization is not "
1810 "profitable.\n");
1812 destroy_bb_vec_info (bb_vinfo);
1813 return NULL;
1816 if (vect_print_dump_info (REPORT_DETAILS))
1817 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1819 return bb_vinfo;
1823 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1824 the number of created vector stmts depends on the unrolling factor).
1825 However, the actual number of vector stmts for every SLP node depends on
1826 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1827 should be updated. In this function we assume that the inside costs
1828 calculated in vect_model_xxx_cost are linear in ncopies. */
1830 void
1831 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1833 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1834 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1835 slp_instance instance;
1837 if (vect_print_dump_info (REPORT_SLP))
1838 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1840 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1841 /* We assume that costs are linear in ncopies. */
1842 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1843 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1847 /* For constant and loop invariant defs of SLP_NODE this function returns
1848 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1849 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1850 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1851 REDUC_INDEX is the index of the reduction operand in the statements, unless
1852 it is -1. */
1854 static void
1855 vect_get_constant_vectors (tree op, slp_tree slp_node,
1856 VEC (tree, heap) **vec_oprnds,
1857 unsigned int op_num, unsigned int number_of_vectors,
1858 int reduc_index)
1860 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1861 gimple stmt = VEC_index (gimple, stmts, 0);
1862 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1863 int nunits;
1864 tree vec_cst;
1865 tree t = NULL_TREE;
1866 int j, number_of_places_left_in_vector;
1867 tree vector_type;
1868 tree vop;
1869 int group_size = VEC_length (gimple, stmts);
1870 unsigned int vec_num, i;
1871 int number_of_copies = 1;
1872 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1873 bool constant_p, is_store;
1874 tree neutral_op = NULL;
1875 enum tree_code code = gimple_assign_rhs_code (stmt);
1877 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1879 if (reduc_index == -1)
1881 VEC_free (tree, heap, *vec_oprnds);
1882 return;
1885 op_num = reduc_index - 1;
1886 op = gimple_op (stmt, reduc_index);
1887 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1888 we need either neutral operands or the original operands. See
1889 get_initial_def_for_reduction() for details. */
1890 switch (code)
1892 case WIDEN_SUM_EXPR:
1893 case DOT_PROD_EXPR:
1894 case PLUS_EXPR:
1895 case MINUS_EXPR:
1896 case BIT_IOR_EXPR:
1897 case BIT_XOR_EXPR:
1898 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1899 neutral_op = build_real (TREE_TYPE (op), dconst0);
1900 else
1901 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1903 break;
1905 case MULT_EXPR:
1906 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1907 neutral_op = build_real (TREE_TYPE (op), dconst1);
1908 else
1909 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1911 break;
1913 case BIT_AND_EXPR:
1914 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1915 break;
1917 default:
1918 neutral_op = NULL;
1922 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1924 is_store = true;
1925 op = gimple_assign_rhs1 (stmt);
1927 else
1928 is_store = false;
1930 gcc_assert (op);
1932 if (CONSTANT_CLASS_P (op))
1933 constant_p = true;
1934 else
1935 constant_p = false;
1937 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1938 gcc_assert (vector_type);
1939 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1941 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1942 created vectors. It is greater than 1 if unrolling is performed.
1944 For example, we have two scalar operands, s1 and s2 (e.g., group of
1945 strided accesses of size two), while NUNITS is four (i.e., four scalars
1946 of this type can be packed in a vector). The output vector will contain
1947 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1948 will be 2).
1950 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1951 containing the operands.
1953 For example, NUNITS is four as before, and the group size is 8
1954 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1955 {s5, s6, s7, s8}. */
1957 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1959 number_of_places_left_in_vector = nunits;
1960 for (j = 0; j < number_of_copies; j++)
1962 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1964 if (is_store)
1965 op = gimple_assign_rhs1 (stmt);
1966 else
1967 op = gimple_op (stmt, op_num + 1);
1969 if (reduc_index != -1)
1971 struct loop *loop = (gimple_bb (stmt))->loop_father;
1972 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1974 gcc_assert (loop);
1975 /* Get the def before the loop. */
1976 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1977 loop_preheader_edge (loop));
1978 if (j != (number_of_copies - 1) && neutral_op)
1979 op = neutral_op;
1982 /* Create 'vect_ = {op0,op1,...,opn}'. */
1983 t = tree_cons (NULL_TREE, op, t);
1985 number_of_places_left_in_vector--;
1987 if (number_of_places_left_in_vector == 0)
1989 number_of_places_left_in_vector = nunits;
1991 if (constant_p)
1992 vec_cst = build_vector (vector_type, t);
1993 else
1994 vec_cst = build_constructor_from_list (vector_type, t);
1995 VEC_quick_push (tree, voprnds,
1996 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1997 t = NULL_TREE;
2002 /* Since the vectors are created in the reverse order, we should invert
2003 them. */
2004 vec_num = VEC_length (tree, voprnds);
2005 for (j = vec_num - 1; j >= 0; j--)
2007 vop = VEC_index (tree, voprnds, j);
2008 VEC_quick_push (tree, *vec_oprnds, vop);
2011 VEC_free (tree, heap, voprnds);
2013 /* In case that VF is greater than the unrolling factor needed for the SLP
2014 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2015 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2016 to replicate the vectors. */
2017 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2019 tree neutral_vec = NULL;
2021 if (neutral_op)
2023 if (!neutral_vec)
2024 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2026 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2028 else
2030 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2031 VEC_quick_push (tree, *vec_oprnds, vop);
2037 /* Get vectorized definitions from SLP_NODE that contains corresponding
2038 vectorized def-stmts. */
2040 static void
2041 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2043 tree vec_oprnd;
2044 gimple vec_def_stmt;
2045 unsigned int i;
2047 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2049 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2051 gcc_assert (vec_def_stmt);
2052 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2053 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2058 /* Get vectorized definitions for SLP_NODE.
2059 If the scalar definitions are loop invariants or constants, collect them and
2060 call vect_get_constant_vectors() to create vector stmts.
2061 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2062 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2063 vect_get_slp_vect_defs() to retrieve them.
2064 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2065 the right node. This is used when the second operand must remain scalar. */
2067 void
2068 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2069 VEC (tree,heap) **vec_oprnds0,
2070 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2072 gimple first_stmt;
2073 enum tree_code code;
2074 int number_of_vects;
2075 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2077 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2078 /* The number of vector defs is determined by the number of vector statements
2079 in the node from which we get those statements. */
2080 if (SLP_TREE_LEFT (slp_node))
2081 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2082 else
2084 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2085 /* Number of vector stmts was calculated according to LHS in
2086 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2087 necessary. See vect_get_smallest_scalar_type () for details. */
2088 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2089 &rhs_size_unit);
2090 if (rhs_size_unit != lhs_size_unit)
2092 number_of_vects *= rhs_size_unit;
2093 number_of_vects /= lhs_size_unit;
2097 /* Allocate memory for vectorized defs. */
2098 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2100 /* SLP_NODE corresponds either to a group of stores or to a group of
2101 unary/binary operations. We don't call this function for loads.
2102 For reduction defs we call vect_get_constant_vectors(), since we are
2103 looking for initial loop invariant values. */
2104 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2105 /* The defs are already vectorized. */
2106 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2107 else
2108 /* Build vectors from scalar defs. */
2109 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2110 reduc_index);
2112 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2113 /* Since we don't call this function with loads, this is a group of
2114 stores. */
2115 return;
2117 /* For reductions, we only need initial values. */
2118 if (reduc_index != -1)
2119 return;
2121 code = gimple_assign_rhs_code (first_stmt);
2122 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2123 return;
2125 /* The number of vector defs is determined by the number of vector statements
2126 in the node from which we get those statements. */
2127 if (SLP_TREE_RIGHT (slp_node))
2128 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2129 else
2130 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2132 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2134 if (SLP_TREE_RIGHT (slp_node))
2135 /* The defs are already vectorized. */
2136 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2137 else
2138 /* Build vectors from scalar defs. */
2139 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2140 -1);
2144 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2145 building a vector of type MASK_TYPE from it) and two input vectors placed in
2146 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2147 shifting by STRIDE elements of DR_CHAIN for every copy.
2148 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2149 copies).
2150 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2151 the created stmts must be inserted. */
2153 static inline void
2154 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2155 tree mask, int first_vec_indx, int second_vec_indx,
2156 gimple_stmt_iterator *gsi, slp_tree node,
2157 tree builtin_decl, tree vectype,
2158 VEC(tree,heap) *dr_chain,
2159 int ncopies, int vect_stmts_counter)
2161 tree perm_dest;
2162 gimple perm_stmt = NULL;
2163 stmt_vec_info next_stmt_info;
2164 int i, stride;
2165 tree first_vec, second_vec, data_ref;
2167 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2169 /* Initialize the vect stmts of NODE to properly insert the generated
2170 stmts later. */
2171 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2172 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2175 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2176 for (i = 0; i < ncopies; i++)
2178 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2179 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2181 /* Generate the permute statement. */
2182 perm_stmt = gimple_build_call (builtin_decl,
2183 3, first_vec, second_vec, mask);
2184 data_ref = make_ssa_name (perm_dest, perm_stmt);
2185 gimple_call_set_lhs (perm_stmt, data_ref);
2186 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2188 /* Store the vector statement in NODE. */
2189 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2190 stride * i + vect_stmts_counter, perm_stmt);
2192 first_vec_indx += stride;
2193 second_vec_indx += stride;
2196 /* Mark the scalar stmt as vectorized. */
2197 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2198 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2202 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2203 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2204 representation. Check that the mask is valid and return FALSE if not.
2205 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2206 the next vector, i.e., the current first vector is not needed. */
2208 static bool
2209 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2210 int mask_nunits, bool only_one_vec, int index,
2211 int *mask, int *current_mask_element,
2212 bool *need_next_vector, int *number_of_mask_fixes,
2213 bool *mask_fixed, bool *needs_first_vector)
2215 int i;
2217 /* Convert to target specific representation. */
2218 *current_mask_element = first_mask_element + m;
2219 /* Adjust the value in case it's a mask for second and third vectors. */
2220 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2222 if (*current_mask_element < mask_nunits)
2223 *needs_first_vector = true;
2225 /* We have only one input vector to permute but the mask accesses values in
2226 the next vector as well. */
2227 if (only_one_vec && *current_mask_element >= mask_nunits)
2229 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "permutation requires at least two vectors ");
2232 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2235 return false;
2238 /* The mask requires the next vector. */
2239 if (*current_mask_element >= mask_nunits * 2)
2241 if (*needs_first_vector || *mask_fixed)
2243 /* We either need the first vector too or have already moved to the
2244 next vector. In both cases, this permutation needs three
2245 vectors. */
2246 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "permutation requires at "
2249 "least three vectors ");
2250 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2253 return false;
2256 /* We move to the next vector, dropping the first one and working with
2257 the second and the third - we need to adjust the values of the mask
2258 accordingly. */
2259 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2261 for (i = 0; i < index; i++)
2262 mask[i] -= mask_nunits * *number_of_mask_fixes;
2264 (*number_of_mask_fixes)++;
2265 *mask_fixed = true;
2268 *need_next_vector = *mask_fixed;
2270 /* This was the last element of this mask. Start a new one. */
2271 if (index == mask_nunits - 1)
2273 *number_of_mask_fixes = 1;
2274 *mask_fixed = false;
2275 *needs_first_vector = false;
2278 return true;
2282 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2283 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2284 permute statements for SLP_NODE_INSTANCE. */
2285 bool
2286 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2287 gimple_stmt_iterator *gsi, int vf,
2288 slp_instance slp_node_instance, bool analyze_only)
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 tree mask_element_type = NULL_TREE, mask_type;
2292 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2293 slp_tree node;
2294 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2295 gimple next_scalar_stmt;
2296 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2297 int first_mask_element;
2298 int index, unroll_factor, *mask, current_mask_element, ncopies;
2299 bool only_one_vec = false, need_next_vector = false;
2300 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2301 int number_of_mask_fixes = 1;
2302 bool mask_fixed = false;
2303 bool needs_first_vector = false;
2305 if (!targetm.vectorize.builtin_vec_perm)
2307 if (vect_print_dump_info (REPORT_DETAILS))
2309 fprintf (vect_dump, "no builtin for vect permute for ");
2310 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2313 return false;
2316 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2317 &mask_element_type);
2318 if (!builtin_decl || !mask_element_type)
2320 if (vect_print_dump_info (REPORT_DETAILS))
2322 fprintf (vect_dump, "no builtin for vect permute for ");
2323 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2326 return false;
2329 mask_type = get_vectype_for_scalar_type (mask_element_type);
2330 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2331 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2332 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2333 scale = mask_nunits / nunits;
2334 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2336 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2337 unrolling factor. */
2338 orig_vec_stmts_num = group_size *
2339 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2340 if (orig_vec_stmts_num == 1)
2341 only_one_vec = true;
2343 /* Number of copies is determined by the final vectorization factor
2344 relatively to SLP_NODE_INSTANCE unrolling factor. */
2345 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2347 /* Generate permutation masks for every NODE. Number of masks for each NODE
2348 is equal to GROUP_SIZE.
2349 E.g., we have a group of three nodes with three loads from the same
2350 location in each node, and the vector size is 4. I.e., we have a
2351 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2352 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2353 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2356 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2357 scpecific type, e.g., in bytes for Altivec.
2358 The last mask is illegal since we assume two operands for permute
2359 operation, and the mask element values can't be outside that range.
2360 Hence, the last mask must be converted into {2,5,5,5}.
2361 For the first two permutations we need the first and the second input
2362 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2363 we need the second and the third vectors: {b1,c1,a2,b2} and
2364 {c2,a3,b3,c3}. */
2366 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2368 scalar_index = 0;
2369 index = 0;
2370 vect_stmts_counter = 0;
2371 vec_index = 0;
2372 first_vec_index = vec_index++;
2373 if (only_one_vec)
2374 second_vec_index = first_vec_index;
2375 else
2376 second_vec_index = vec_index++;
2378 for (j = 0; j < unroll_factor; j++)
2380 for (k = 0; k < group_size; k++)
2382 first_mask_element = (i + j * group_size) * scale;
2383 for (m = 0; m < scale; m++)
2385 if (!vect_get_mask_element (stmt, first_mask_element, m,
2386 mask_nunits, only_one_vec, index, mask,
2387 &current_mask_element, &need_next_vector,
2388 &number_of_mask_fixes, &mask_fixed,
2389 &needs_first_vector))
2390 return false;
2392 mask[index++] = current_mask_element;
2395 if (index == mask_nunits)
2397 tree mask_vec = NULL;
2399 while (--index >= 0)
2401 tree t = build_int_cst (mask_element_type, mask[index]);
2402 mask_vec = tree_cons (NULL, t, mask_vec);
2404 mask_vec = build_vector (mask_type, mask_vec);
2405 index = 0;
2407 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2408 mask_vec))
2410 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "unsupported vect permute ");
2413 print_generic_expr (vect_dump, mask_vec, 0);
2415 free (mask);
2416 return false;
2419 if (!analyze_only)
2421 if (need_next_vector)
2423 first_vec_index = second_vec_index;
2424 second_vec_index = vec_index;
2427 next_scalar_stmt = VEC_index (gimple,
2428 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2430 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2431 mask_vec, first_vec_index, second_vec_index,
2432 gsi, node, builtin_decl, vectype, dr_chain,
2433 ncopies, vect_stmts_counter++);
2440 free (mask);
2441 return true;
2446 /* Vectorize SLP instance tree in postorder. */
2448 static bool
2449 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2450 unsigned int vectorization_factor)
2452 gimple stmt;
2453 bool strided_store, is_store;
2454 gimple_stmt_iterator si;
2455 stmt_vec_info stmt_info;
2456 unsigned int vec_stmts_size, nunits, group_size;
2457 tree vectype;
2458 int i;
2459 slp_tree loads_node;
2461 if (!node)
2462 return false;
2464 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2465 vectorization_factor);
2466 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2467 vectorization_factor);
2469 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2470 stmt_info = vinfo_for_stmt (stmt);
2472 /* VECTYPE is the type of the destination. */
2473 vectype = STMT_VINFO_VECTYPE (stmt_info);
2474 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2475 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2477 /* For each SLP instance calculate number of vector stmts to be created
2478 for the scalar stmts in each node of the SLP tree. Number of vector
2479 elements in one vector iteration is the number of scalar elements in
2480 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2481 size. */
2482 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2484 /* In case of load permutation we have to allocate vectorized statements for
2485 all the nodes that participate in that permutation. */
2486 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2488 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2490 if (!SLP_TREE_VEC_STMTS (loads_node))
2492 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2493 vec_stmts_size);
2494 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2499 if (!SLP_TREE_VEC_STMTS (node))
2501 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2502 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2505 if (vect_print_dump_info (REPORT_DETAILS))
2507 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2508 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2511 /* Loads should be inserted before the first load. */
2512 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2513 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2514 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2515 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2516 else
2517 si = gsi_for_stmt (stmt);
2519 /* Stores should be inserted just before the last store. */
2520 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2521 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2523 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2524 si = gsi_for_stmt (last_store);
2527 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2528 return is_store;
2532 /* Generate vector code for all SLP instances in the loop/basic block. */
2534 bool
2535 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2537 VEC (slp_instance, heap) *slp_instances;
2538 slp_instance instance;
2539 unsigned int i, vf;
2540 bool is_store = false;
2542 if (loop_vinfo)
2544 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2545 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2547 else
2549 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2550 vf = 1;
2553 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2555 /* Schedule the tree of INSTANCE. */
2556 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2557 instance, vf);
2558 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2559 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2560 fprintf (vect_dump, "vectorizing stmts using SLP.");
2563 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2565 slp_tree root = SLP_INSTANCE_TREE (instance);
2566 gimple store;
2567 unsigned int j;
2568 gimple_stmt_iterator gsi;
2570 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2571 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2573 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2574 break;
2576 /* Free the attached stmt_vec_info and remove the stmt. */
2577 gsi = gsi_for_stmt (store);
2578 gsi_remove (&gsi, true);
2579 free_stmt_vec_info (store);
2583 return is_store;
2587 /* Vectorize the basic block. */
2589 void
2590 vect_slp_transform_bb (basic_block bb)
2592 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2593 gimple_stmt_iterator si;
2595 gcc_assert (bb_vinfo);
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "SLPing BB\n");
2600 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2602 gimple stmt = gsi_stmt (si);
2603 stmt_vec_info stmt_info;
2605 if (vect_print_dump_info (REPORT_DETAILS))
2607 fprintf (vect_dump, "------>SLPing statement: ");
2608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2611 stmt_info = vinfo_for_stmt (stmt);
2612 gcc_assert (stmt_info);
2614 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2615 if (STMT_SLP_TYPE (stmt_info))
2617 vect_schedule_slp (NULL, bb_vinfo);
2618 break;
2622 mark_sym_for_renaming (gimple_vop (cfun));
2623 /* The memory tags and pointers in vectorized statements need to
2624 have their SSA forms updated. FIXME, why can't this be delayed
2625 until all the loops have been transformed? */
2626 update_ssa (TODO_update_ssa);
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2631 destroy_bb_vec_info (bb_vinfo);