2015-05-01 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / tree-vect-slp.c
blob60f257b4da98a403b083e3670a4a9c9e1526da12
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "basic-block.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-pass.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "recog.h" /* FIXME: for insn_data */
74 #include "insn-codes.h"
75 #include "optabs.h"
76 #include "tree-vectorizer.h"
77 #include "langhooks.h"
78 #include "gimple-walk.h"
80 /* Extract the location of the basic block in the source code.
81 Return the basic block location if succeed and NULL if not. */
83 source_location
84 find_bb_location (basic_block bb)
86 gimple stmt = NULL;
87 gimple_stmt_iterator si;
89 if (!bb)
90 return UNKNOWN_LOCATION;
92 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
94 stmt = gsi_stmt (si);
95 if (gimple_location (stmt) != UNKNOWN_LOCATION)
96 return gimple_location (stmt);
99 return UNKNOWN_LOCATION;
103 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
105 static void
106 vect_free_slp_tree (slp_tree node)
108 int i;
109 slp_tree child;
111 if (!node)
112 return;
114 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
115 vect_free_slp_tree (child);
117 SLP_TREE_CHILDREN (node).release ();
118 SLP_TREE_SCALAR_STMTS (node).release ();
119 SLP_TREE_VEC_STMTS (node).release ();
120 SLP_TREE_LOAD_PERMUTATION (node).release ();
122 free (node);
126 /* Free the memory allocated for the SLP instance. */
128 void
129 vect_free_slp_instance (slp_instance instance)
131 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
132 SLP_INSTANCE_LOADS (instance).release ();
133 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
134 free (instance);
138 /* Create an SLP node for SCALAR_STMTS. */
140 static slp_tree
141 vect_create_new_slp_node (vec<gimple> scalar_stmts)
143 slp_tree node;
144 gimple stmt = scalar_stmts[0];
145 unsigned int nops;
147 if (is_gimple_call (stmt))
148 nops = gimple_call_num_args (stmt);
149 else if (is_gimple_assign (stmt))
151 nops = gimple_num_ops (stmt) - 1;
152 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
153 nops++;
155 else
156 return NULL;
158 node = XNEW (struct _slp_tree);
159 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
160 SLP_TREE_VEC_STMTS (node).create (0);
161 SLP_TREE_CHILDREN (node).create (nops);
162 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
164 return node;
168 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
169 operand. */
170 static vec<slp_oprnd_info>
171 vect_create_oprnd_info (int nops, int group_size)
173 int i;
174 slp_oprnd_info oprnd_info;
175 vec<slp_oprnd_info> oprnds_info;
177 oprnds_info.create (nops);
178 for (i = 0; i < nops; i++)
180 oprnd_info = XNEW (struct _slp_oprnd_info);
181 oprnd_info->def_stmts.create (group_size);
182 oprnd_info->first_dt = vect_uninitialized_def;
183 oprnd_info->first_op_type = NULL_TREE;
184 oprnd_info->first_pattern = false;
185 oprnds_info.quick_push (oprnd_info);
188 return oprnds_info;
192 /* Free operands info. */
194 static void
195 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
197 int i;
198 slp_oprnd_info oprnd_info;
200 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
202 oprnd_info->def_stmts.release ();
203 XDELETE (oprnd_info);
206 oprnds_info.release ();
210 /* Find the place of the data-ref in STMT in the interleaving chain that starts
211 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
213 static int
214 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
216 gimple next_stmt = first_stmt;
217 int result = 0;
219 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
220 return -1;
224 if (next_stmt == stmt)
225 return result;
226 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
227 if (next_stmt)
228 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
230 while (next_stmt);
232 return -1;
236 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
237 they are of a valid type and that they match the defs of the first stmt of
238 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
239 return -1, if the error could be corrected by swapping operands of the
240 operation return 1, if everything is ok return 0. */
242 static int
243 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
244 gimple stmt, bool first,
245 vec<slp_oprnd_info> *oprnds_info)
247 tree oprnd;
248 unsigned int i, number_of_oprnds;
249 tree def;
250 gimple def_stmt;
251 enum vect_def_type dt = vect_uninitialized_def;
252 struct loop *loop = NULL;
253 bool pattern = false;
254 slp_oprnd_info oprnd_info;
255 int first_op_idx = 1;
256 bool commutative = false;
257 bool first_op_cond = false;
259 if (loop_vinfo)
260 loop = LOOP_VINFO_LOOP (loop_vinfo);
262 if (is_gimple_call (stmt))
264 number_of_oprnds = gimple_call_num_args (stmt);
265 first_op_idx = 3;
267 else if (is_gimple_assign (stmt))
269 enum tree_code code = gimple_assign_rhs_code (stmt);
270 number_of_oprnds = gimple_num_ops (stmt) - 1;
271 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
273 first_op_cond = true;
274 commutative = true;
275 number_of_oprnds++;
277 else
278 commutative = commutative_tree_code (code);
280 else
281 return -1;
283 bool swapped = false;
284 for (i = 0; i < number_of_oprnds; i++)
286 again:
287 if (first_op_cond)
289 if (i == 0 || i == 1)
290 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
291 swapped ? !i : i);
292 else
293 oprnd = gimple_op (stmt, first_op_idx + i - 1);
295 else
296 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
298 oprnd_info = (*oprnds_info)[i];
300 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
301 &def, &dt)
302 || (!def_stmt && dt != vect_constant_def))
304 if (dump_enabled_p ())
306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307 "Build SLP failed: can't find def for ");
308 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
309 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
312 return -1;
315 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
316 from the pattern. Check that all the stmts of the node are in the
317 pattern. */
318 if (def_stmt && gimple_bb (def_stmt)
319 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
320 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
321 && gimple_code (def_stmt) != GIMPLE_PHI))
322 && vinfo_for_stmt (def_stmt)
323 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
324 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
325 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
327 pattern = true;
328 if (!first && !oprnd_info->first_pattern)
330 if (i == 0
331 && !swapped
332 && commutative)
334 swapped = true;
335 goto again;
338 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
341 "Build SLP failed: some of the stmts"
342 " are in a pattern, and others are not ");
343 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
344 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
347 return 1;
350 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
351 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
353 if (dt == vect_unknown_def_type)
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
357 "Unsupported pattern.\n");
358 return -1;
361 switch (gimple_code (def_stmt))
363 case GIMPLE_PHI:
364 def = gimple_phi_result (def_stmt);
365 break;
367 case GIMPLE_ASSIGN:
368 def = gimple_assign_lhs (def_stmt);
369 break;
371 default:
372 if (dump_enabled_p ())
373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
374 "unsupported defining stmt:\n");
375 return -1;
379 if (first)
381 oprnd_info->first_dt = dt;
382 oprnd_info->first_pattern = pattern;
383 oprnd_info->first_op_type = TREE_TYPE (oprnd);
385 else
387 /* Not first stmt of the group, check that the def-stmt/s match
388 the def-stmt/s of the first stmt. Allow different definition
389 types for reduction chains: the first stmt must be a
390 vect_reduction_def (a phi node), and the rest
391 vect_internal_def. */
392 if (((oprnd_info->first_dt != dt
393 && !(oprnd_info->first_dt == vect_reduction_def
394 && dt == vect_internal_def)
395 && !((oprnd_info->first_dt == vect_external_def
396 || oprnd_info->first_dt == vect_constant_def)
397 && (dt == vect_external_def
398 || dt == vect_constant_def)))
399 || !types_compatible_p (oprnd_info->first_op_type,
400 TREE_TYPE (oprnd))))
402 /* Try swapping operands if we got a mismatch. */
403 if (i == 0
404 && !swapped
405 && commutative)
407 swapped = true;
408 goto again;
411 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
413 "Build SLP failed: different types\n");
415 return 1;
419 /* Check the types of the definitions. */
420 switch (dt)
422 case vect_constant_def:
423 case vect_external_def:
424 case vect_reduction_def:
425 break;
427 case vect_internal_def:
428 oprnd_info->def_stmts.quick_push (def_stmt);
429 break;
431 default:
432 /* FORNOW: Not supported. */
433 if (dump_enabled_p ())
435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
436 "Build SLP failed: illegal type of def ");
437 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
438 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
441 return -1;
445 /* Swap operands. */
446 if (swapped)
448 if (first_op_cond)
450 tree cond = gimple_assign_rhs1 (stmt);
451 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
452 &TREE_OPERAND (cond, 1));
453 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
455 else
456 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
457 gimple_assign_rhs2_ptr (stmt));
460 return 0;
464 /* Verify if the scalar stmts STMTS are isomorphic, require data
465 permutation or are of unsupported types of operation. Return
466 true if they are, otherwise return false and indicate in *MATCHES
467 which stmts are not isomorphic to the first one. If MATCHES[0]
468 is false then this indicates the comparison could not be
469 carried out or the stmts will never be vectorized by SLP. */
471 static bool
472 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
473 vec<gimple> stmts, unsigned int group_size,
474 unsigned nops, unsigned int *max_nunits,
475 unsigned int vectorization_factor, bool *matches)
477 unsigned int i;
478 gimple stmt = stmts[0];
479 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
480 enum tree_code first_cond_code = ERROR_MARK;
481 tree lhs;
482 bool need_same_oprnds = false;
483 tree vectype, scalar_type, first_op1 = NULL_TREE;
484 optab optab;
485 int icode;
486 machine_mode optab_op2_mode;
487 machine_mode vec_mode;
488 struct data_reference *first_dr;
489 HOST_WIDE_INT dummy;
490 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
491 tree cond;
493 /* For every stmt in NODE find its def stmt/s. */
494 FOR_EACH_VEC_ELT (stmts, i, stmt)
496 matches[i] = false;
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
501 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
502 dump_printf (MSG_NOTE, "\n");
505 /* Fail to vectorize statements marked as unvectorizable. */
506 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
508 if (dump_enabled_p ())
510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511 "Build SLP failed: unvectorizable statement ");
512 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
513 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
515 /* Fatal mismatch. */
516 matches[0] = false;
517 return false;
520 lhs = gimple_get_lhs (stmt);
521 if (lhs == NULL_TREE)
523 if (dump_enabled_p ())
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "Build SLP failed: not GIMPLE_ASSIGN nor "
527 "GIMPLE_CALL ");
528 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
529 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
531 /* Fatal mismatch. */
532 matches[0] = false;
533 return false;
536 if (is_gimple_assign (stmt)
537 && gimple_assign_rhs_code (stmt) == COND_EXPR
538 && (cond = gimple_assign_rhs1 (stmt))
539 && !COMPARISON_CLASS_P (cond))
541 if (dump_enabled_p ())
543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
544 "Build SLP failed: condition is not "
545 "comparison ");
546 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
547 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
549 /* Fatal mismatch. */
550 matches[0] = false;
551 return false;
554 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
555 vectype = get_vectype_for_scalar_type (scalar_type);
556 if (!vectype)
558 if (dump_enabled_p ())
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type ");
562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
563 scalar_type);
564 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
566 /* Fatal mismatch. */
567 matches[0] = false;
568 return false;
571 /* In case of multiple types we need to detect the smallest type. */
572 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
574 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
575 if (bb_vinfo)
576 vectorization_factor = *max_nunits;
579 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
581 rhs_code = CALL_EXPR;
582 if (gimple_call_internal_p (call_stmt)
583 || gimple_call_tail_p (call_stmt)
584 || gimple_call_noreturn_p (call_stmt)
585 || !gimple_call_nothrow_p (call_stmt)
586 || gimple_call_chain (call_stmt))
588 if (dump_enabled_p ())
590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
591 "Build SLP failed: unsupported call type ");
592 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
593 call_stmt, 0);
594 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
596 /* Fatal mismatch. */
597 matches[0] = false;
598 return false;
601 else
602 rhs_code = gimple_assign_rhs_code (stmt);
604 /* Check the operation. */
605 if (i == 0)
607 first_stmt_code = rhs_code;
609 /* Shift arguments should be equal in all the packed stmts for a
610 vector shift with scalar shift operand. */
611 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
612 || rhs_code == LROTATE_EXPR
613 || rhs_code == RROTATE_EXPR)
615 vec_mode = TYPE_MODE (vectype);
617 /* First see if we have a vector/vector shift. */
618 optab = optab_for_tree_code (rhs_code, vectype,
619 optab_vector);
621 if (!optab
622 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
624 /* No vector/vector shift, try for a vector/scalar shift. */
625 optab = optab_for_tree_code (rhs_code, vectype,
626 optab_scalar);
628 if (!optab)
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
632 "Build SLP failed: no optab.\n");
633 /* Fatal mismatch. */
634 matches[0] = false;
635 return false;
637 icode = (int) optab_handler (optab, vec_mode);
638 if (icode == CODE_FOR_nothing)
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: "
643 "op not supported by target.\n");
644 /* Fatal mismatch. */
645 matches[0] = false;
646 return false;
648 optab_op2_mode = insn_data[icode].operand[2].mode;
649 if (!VECTOR_MODE_P (optab_op2_mode))
651 need_same_oprnds = true;
652 first_op1 = gimple_assign_rhs2 (stmt);
656 else if (rhs_code == WIDEN_LSHIFT_EXPR)
658 need_same_oprnds = true;
659 first_op1 = gimple_assign_rhs2 (stmt);
662 else
664 if (first_stmt_code != rhs_code
665 && (first_stmt_code != IMAGPART_EXPR
666 || rhs_code != REALPART_EXPR)
667 && (first_stmt_code != REALPART_EXPR
668 || rhs_code != IMAGPART_EXPR)
669 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
670 && (first_stmt_code == ARRAY_REF
671 || first_stmt_code == BIT_FIELD_REF
672 || first_stmt_code == INDIRECT_REF
673 || first_stmt_code == COMPONENT_REF
674 || first_stmt_code == MEM_REF)))
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: different operation "
680 "in stmt ");
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
682 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
684 /* Mismatch. */
685 continue;
688 if (need_same_oprnds
689 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
691 if (dump_enabled_p ())
693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
694 "Build SLP failed: different shift "
695 "arguments in ");
696 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
699 /* Mismatch. */
700 continue;
703 if (rhs_code == CALL_EXPR)
705 gimple first_stmt = stmts[0];
706 if (gimple_call_num_args (stmt) != nops
707 || !operand_equal_p (gimple_call_fn (first_stmt),
708 gimple_call_fn (stmt), 0)
709 || gimple_call_fntype (first_stmt)
710 != gimple_call_fntype (stmt))
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "Build SLP failed: different calls in ");
716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
717 stmt, 0);
718 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
720 /* Mismatch. */
721 continue;
726 /* Grouped store or load. */
727 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
729 if (REFERENCE_CLASS_P (lhs))
731 /* Store. */
734 else
736 /* Load. */
737 unsigned unrolling_factor
738 = least_common_multiple
739 (*max_nunits, group_size) / group_size;
740 /* FORNOW: Check that there is no gap between the loads
741 and no gap between the groups when we need to load
742 multiple groups at once.
743 ??? We should enhance this to only disallow gaps
744 inside vectors. */
745 if ((unrolling_factor > 1
746 && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
747 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
748 /* If the group is split up then GROUP_GAP
749 isn't correct here, nor is GROUP_FIRST_ELEMENT. */
750 || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
751 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
752 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: grouped "
758 "loads have gaps ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
760 stmt, 0);
761 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
763 /* Fatal mismatch. */
764 matches[0] = false;
765 return false;
768 /* Check that the size of interleaved loads group is not
769 greater than the SLP group size. */
770 unsigned ncopies
771 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
772 if (loop_vinfo
773 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
774 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
775 - GROUP_GAP (vinfo_for_stmt (stmt)))
776 > ncopies * group_size))
778 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 "Build SLP failed: the number "
782 "of interleaved loads is greater than "
783 "the SLP group size ");
784 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
785 stmt, 0);
786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
788 /* Fatal mismatch. */
789 matches[0] = false;
790 return false;
793 old_first_load = first_load;
794 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
795 if (prev_first_load)
797 /* Check that there are no loads from different interleaving
798 chains in the same node. */
799 if (prev_first_load != first_load)
801 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
804 vect_location,
805 "Build SLP failed: different "
806 "interleaving chains in one node ");
807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
808 stmt, 0);
809 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
811 /* Mismatch. */
812 continue;
815 else
816 prev_first_load = first_load;
818 /* In some cases a group of loads is just the same load
819 repeated N times. Only analyze its cost once. */
820 if (first_load == stmt && old_first_load != first_load)
822 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
823 if (vect_supportable_dr_alignment (first_dr, false)
824 == dr_unaligned_unsupported)
826 if (dump_enabled_p ())
828 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 vect_location,
830 "Build SLP failed: unsupported "
831 "unaligned load ");
832 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
833 stmt, 0);
834 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
836 /* Fatal mismatch. */
837 matches[0] = false;
838 return false;
842 } /* Grouped access. */
843 else
845 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
847 /* Not grouped load. */
848 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: not grouped load ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
853 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
856 /* FORNOW: Not grouped loads are not supported. */
857 /* Fatal mismatch. */
858 matches[0] = false;
859 return false;
862 /* Not memory operation. */
863 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
864 && TREE_CODE_CLASS (rhs_code) != tcc_unary
865 && rhs_code != COND_EXPR
866 && rhs_code != CALL_EXPR)
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "Build SLP failed: operation");
872 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
874 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
876 /* Fatal mismatch. */
877 matches[0] = false;
878 return false;
881 if (rhs_code == COND_EXPR)
883 tree cond_expr = gimple_assign_rhs1 (stmt);
885 if (i == 0)
886 first_cond_code = TREE_CODE (cond_expr);
887 else if (first_cond_code != TREE_CODE (cond_expr))
889 if (dump_enabled_p ())
891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
892 "Build SLP failed: different"
893 " operation");
894 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
895 stmt, 0);
896 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
898 /* Mismatch. */
899 continue;
904 matches[i] = true;
907 for (i = 0; i < group_size; ++i)
908 if (!matches[i])
909 return false;
911 return true;
914 /* Recursively build an SLP tree starting from NODE.
915 Fail (and return a value not equal to zero) if def-stmts are not
916 isomorphic, require data permutation or are of unsupported types of
917 operation. Otherwise, return 0.
918 The value returned is the depth in the SLP tree where a mismatch
919 was found. */
921 static bool
922 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
923 slp_tree *node, unsigned int group_size,
924 unsigned int *max_nunits,
925 vec<slp_tree> *loads,
926 unsigned int vectorization_factor,
927 bool *matches, unsigned *npermutes, unsigned *tree_size,
928 unsigned max_tree_size)
930 unsigned nops, i, this_tree_size = 0;
931 gimple stmt;
933 matches[0] = false;
935 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
936 if (is_gimple_call (stmt))
937 nops = gimple_call_num_args (stmt);
938 else if (is_gimple_assign (stmt))
940 nops = gimple_num_ops (stmt) - 1;
941 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
942 nops++;
944 else
945 return false;
947 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
948 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
949 max_nunits, vectorization_factor, matches))
950 return false;
952 /* If the SLP node is a load, terminate the recursion. */
953 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
954 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
956 loads->safe_push (*node);
957 return true;
960 /* Get at the operands, verifying they are compatible. */
961 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
962 slp_oprnd_info oprnd_info;
963 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
965 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
966 stmt, (i == 0), &oprnds_info))
968 case 0:
969 break;
970 case -1:
971 matches[0] = false;
972 vect_free_oprnd_info (oprnds_info);
973 return false;
974 case 1:
975 matches[i] = false;
976 break;
979 for (i = 0; i < group_size; ++i)
980 if (!matches[i])
982 vect_free_oprnd_info (oprnds_info);
983 return false;
986 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
988 /* Create SLP_TREE nodes for the definition node/s. */
989 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
991 slp_tree child;
992 unsigned old_nloads = loads->length ();
993 unsigned old_max_nunits = *max_nunits;
995 if (oprnd_info->first_dt != vect_internal_def)
996 continue;
998 if (++this_tree_size > max_tree_size)
1000 vect_free_oprnd_info (oprnds_info);
1001 return false;
1004 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1005 if (!child)
1007 vect_free_oprnd_info (oprnds_info);
1008 return false;
1011 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1012 group_size, max_nunits, loads,
1013 vectorization_factor, matches,
1014 npermutes, &this_tree_size, max_tree_size))
1016 oprnd_info->def_stmts = vNULL;
1017 SLP_TREE_CHILDREN (*node).quick_push (child);
1018 continue;
1021 /* If the SLP build failed fatally and we analyze a basic-block
1022 simply treat nodes we fail to build as externally defined
1023 (and thus build vectors from the scalar defs).
1024 The cost model will reject outright expensive cases.
1025 ??? This doesn't treat cases where permutation ultimatively
1026 fails (or we don't try permutation below). Ideally we'd
1027 even compute a permutation that will end up with the maximum
1028 SLP tree size... */
1029 if (bb_vinfo
1030 && !matches[0]
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1033 scalar version. */
1034 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "Building vector operands from scalars\n");
1038 oprnd_info->def_stmts = vNULL;
1039 vect_free_slp_tree (child);
1040 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1041 continue;
1044 /* If the SLP build for operand zero failed and operand zero
1045 and one can be commutated try that for the scalar stmts
1046 that failed the match. */
1047 if (i == 0
1048 /* A first scalar stmt mismatch signals a fatal mismatch. */
1049 && matches[0]
1050 /* ??? For COND_EXPRs we can swap the comparison operands
1051 as well as the arms under some constraints. */
1052 && nops == 2
1053 && oprnds_info[1]->first_dt == vect_internal_def
1054 && is_gimple_assign (stmt)
1055 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1056 /* Do so only if the number of not successful permutes was nor more
1057 than a cut-ff as re-trying the recursive match on
1058 possibly each level of the tree would expose exponential
1059 behavior. */
1060 && *npermutes < 4)
1062 unsigned int j;
1063 slp_tree grandchild;
1065 /* Roll back. */
1066 *max_nunits = old_max_nunits;
1067 loads->truncate (old_nloads);
1068 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1069 vect_free_slp_tree (grandchild);
1070 SLP_TREE_CHILDREN (child).truncate (0);
1072 /* Swap mismatched definition stmts. */
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "Re-trying with swapped operands of stmts ");
1075 for (j = 0; j < group_size; ++j)
1076 if (!matches[j])
1078 gimple tem = oprnds_info[0]->def_stmts[j];
1079 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1080 oprnds_info[1]->def_stmts[j] = tem;
1081 dump_printf (MSG_NOTE, "%d ", j);
1083 dump_printf (MSG_NOTE, "\n");
1084 /* And try again ... */
1085 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1086 group_size, max_nunits, loads,
1087 vectorization_factor,
1088 matches, npermutes, &this_tree_size,
1089 max_tree_size))
1091 oprnd_info->def_stmts = vNULL;
1092 SLP_TREE_CHILDREN (*node).quick_push (child);
1093 continue;
1096 ++*npermutes;
1099 oprnd_info->def_stmts = vNULL;
1100 vect_free_slp_tree (child);
1101 vect_free_oprnd_info (oprnds_info);
1102 return false;
1105 if (tree_size)
1106 *tree_size += this_tree_size;
1108 vect_free_oprnd_info (oprnds_info);
1109 return true;
1112 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1114 static void
1115 vect_print_slp_tree (int dump_kind, slp_tree node)
1117 int i;
1118 gimple stmt;
1119 slp_tree child;
1121 if (!node)
1122 return;
1124 dump_printf (dump_kind, "node ");
1125 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1127 dump_printf (dump_kind, "\n\tstmt %d ", i);
1128 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1130 dump_printf (dump_kind, "\n");
1132 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1133 vect_print_slp_tree (dump_kind, child);
1137 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1138 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1139 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1140 stmts in NODE are to be marked. */
1142 static void
1143 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1145 int i;
1146 gimple stmt;
1147 slp_tree child;
1149 if (!node)
1150 return;
1152 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1153 if (j < 0 || i == j)
1154 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1156 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1157 vect_mark_slp_stmts (child, mark, j);
1161 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1163 static void
1164 vect_mark_slp_stmts_relevant (slp_tree node)
1166 int i;
1167 gimple stmt;
1168 stmt_vec_info stmt_info;
1169 slp_tree child;
1171 if (!node)
1172 return;
1174 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1176 stmt_info = vinfo_for_stmt (stmt);
1177 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1178 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1179 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1182 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1183 vect_mark_slp_stmts_relevant (child);
1187 /* Rearrange the statements of NODE according to PERMUTATION. */
1189 static void
1190 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1191 vec<unsigned> permutation)
1193 gimple stmt;
1194 vec<gimple> tmp_stmts;
1195 unsigned int i;
1196 slp_tree child;
1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1199 vect_slp_rearrange_stmts (child, group_size, permutation);
1201 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1202 tmp_stmts.create (group_size);
1203 tmp_stmts.quick_grow_cleared (group_size);
1205 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1206 tmp_stmts[permutation[i]] = stmt;
1208 SLP_TREE_SCALAR_STMTS (node).release ();
1209 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1213 /* Check if the required load permutations in the SLP instance
1214 SLP_INSTN are supported. */
1216 static bool
1217 vect_supported_load_permutation_p (slp_instance slp_instn)
1219 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1220 unsigned int i, j, k, next;
1221 sbitmap load_index;
1222 slp_tree node;
1223 gimple stmt, load, next_load, first_load;
1224 struct data_reference *dr;
1226 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1229 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1230 if (node->load_permutation.exists ())
1231 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1232 dump_printf (MSG_NOTE, "%d ", next);
1233 else
1234 for (k = 0; k < group_size; ++k)
1235 dump_printf (MSG_NOTE, "%d ", k);
1236 dump_printf (MSG_NOTE, "\n");
1239 /* In case of reduction every load permutation is allowed, since the order
1240 of the reduction statements is not important (as opposed to the case of
1241 grouped stores). The only condition we need to check is that all the
1242 load nodes are of the same size and have the same permutation (and then
1243 rearrange all the nodes of the SLP instance according to this
1244 permutation). */
1246 /* Check that all the load nodes are of the same size. */
1247 /* ??? Can't we assert this? */
1248 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1249 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1250 return false;
1252 node = SLP_INSTANCE_TREE (slp_instn);
1253 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1255 /* Reduction (there are no data-refs in the root).
1256 In reduction chain the order of the loads is important. */
1257 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1258 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1260 slp_tree load;
1261 unsigned int lidx;
1263 /* Compare all the permutation sequences to the first one. We know
1264 that at least one load is permuted. */
1265 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1266 if (!node->load_permutation.exists ())
1267 return false;
1268 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1270 if (!load->load_permutation.exists ())
1271 return false;
1272 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1273 if (lidx != node->load_permutation[j])
1274 return false;
1277 /* Check that the loads in the first sequence are different and there
1278 are no gaps between them. */
1279 load_index = sbitmap_alloc (group_size);
1280 bitmap_clear (load_index);
1281 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1283 if (bitmap_bit_p (load_index, lidx))
1285 sbitmap_free (load_index);
1286 return false;
1288 bitmap_set_bit (load_index, lidx);
1290 for (i = 0; i < group_size; i++)
1291 if (!bitmap_bit_p (load_index, i))
1293 sbitmap_free (load_index);
1294 return false;
1296 sbitmap_free (load_index);
1298 /* This permutation is valid for reduction. Since the order of the
1299 statements in the nodes is not important unless they are memory
1300 accesses, we can rearrange the statements in all the nodes
1301 according to the order of the loads. */
1302 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1303 node->load_permutation);
1305 /* We are done, no actual permutations need to be generated. */
1306 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1307 SLP_TREE_LOAD_PERMUTATION (node).release ();
1308 return true;
1311 /* In basic block vectorization we allow any subchain of an interleaving
1312 chain.
1313 FORNOW: not supported in loop SLP because of realignment compications. */
1314 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1316 /* Check that for every node in the instance the loads
1317 form a subchain. */
1318 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1320 next_load = NULL;
1321 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1323 if (j != 0 && next_load != load)
1324 return false;
1325 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1329 /* Check that the alignment of the first load in every subchain, i.e.,
1330 the first statement in every load node, is supported.
1331 ??? This belongs in alignment checking. */
1332 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1334 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1335 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1337 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1338 if (vect_supportable_dr_alignment (dr, false)
1339 == dr_unaligned_unsupported)
1341 if (dump_enabled_p ())
1343 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1344 vect_location,
1345 "unsupported unaligned load ");
1346 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1347 first_load, 0);
1348 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1350 return false;
1355 /* We are done, no actual permutations need to be generated. */
1356 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1357 SLP_TREE_LOAD_PERMUTATION (node).release ();
1358 return true;
1361 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1362 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1363 well (unless it's reduction). */
1364 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1365 return false;
1366 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1367 if (!node->load_permutation.exists ())
1368 return false;
1370 load_index = sbitmap_alloc (group_size);
1371 bitmap_clear (load_index);
1372 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1374 unsigned int lidx = node->load_permutation[0];
1375 if (bitmap_bit_p (load_index, lidx))
1377 sbitmap_free (load_index);
1378 return false;
1380 bitmap_set_bit (load_index, lidx);
1381 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1382 if (k != lidx)
1384 sbitmap_free (load_index);
1385 return false;
1388 for (i = 0; i < group_size; i++)
1389 if (!bitmap_bit_p (load_index, i))
1391 sbitmap_free (load_index);
1392 return false;
1394 sbitmap_free (load_index);
1396 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1397 if (node->load_permutation.exists ()
1398 && !vect_transform_slp_perm_load
1399 (node, vNULL, NULL,
1400 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1401 return false;
1402 return true;
1406 /* Find the last store in SLP INSTANCE. */
1408 static gimple
1409 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1411 gimple last = NULL, stmt;
1413 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1415 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1416 if (is_pattern_stmt_p (stmt_vinfo))
1417 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1418 else
1419 last = get_later_stmt (stmt, last);
1422 return last;
1425 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1427 static void
1428 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1429 slp_instance instance, slp_tree node,
1430 stmt_vector_for_cost *prologue_cost_vec,
1431 unsigned ncopies_for_cost)
1433 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1435 unsigned i;
1436 slp_tree child;
1437 gimple stmt, s;
1438 stmt_vec_info stmt_info;
1439 tree lhs;
1440 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1442 /* Recurse down the SLP tree. */
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1444 if (child)
1445 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1446 instance, child, prologue_cost_vec,
1447 ncopies_for_cost);
1449 /* Look at the first scalar stmt to determine the cost. */
1450 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1451 stmt_info = vinfo_for_stmt (stmt);
1452 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1454 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1455 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1456 vect_uninitialized_def,
1457 node, prologue_cost_vec, body_cost_vec);
1458 else
1460 int i;
1461 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1462 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1463 node, prologue_cost_vec, body_cost_vec);
1464 /* If the load is permuted record the cost for the permutation.
1465 ??? Loads from multiple chains are let through here only
1466 for a single special case involving complex numbers where
1467 in the end no permutation is necessary. */
1468 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1469 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1470 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1471 && vect_get_place_in_interleaving_chain
1472 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1474 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1475 stmt_info, 0, vect_body);
1476 break;
1480 else
1481 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1482 stmt_info, 0, vect_body);
1484 /* Scan operands and account for prologue cost of constants/externals.
1485 ??? This over-estimates cost for multiple uses and should be
1486 re-engineered. */
1487 lhs = gimple_get_lhs (stmt);
1488 for (i = 0; i < gimple_num_ops (stmt); ++i)
1490 tree def, op = gimple_op (stmt, i);
1491 gimple def_stmt;
1492 enum vect_def_type dt;
1493 if (!op || op == lhs)
1494 continue;
1495 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1496 &def_stmt, &def, &dt))
1498 /* Without looking at the actual initializer a vector of
1499 constants can be implemented as load from the constant pool.
1500 ??? We need to pass down stmt_info for a vector type
1501 even if it points to the wrong stmt. */
1502 if (dt == vect_constant_def)
1503 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1504 stmt_info, 0, vect_prologue);
1505 else if (dt == vect_external_def)
1506 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1507 stmt_info, 0, vect_prologue);
1512 /* Compute the cost for the SLP instance INSTANCE. */
1514 static void
1515 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1516 slp_instance instance, unsigned nunits)
1518 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1519 unsigned ncopies_for_cost;
1520 stmt_info_for_cost *si;
1521 unsigned i;
1523 /* Calculate the number of vector stmts to create based on the unrolling
1524 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1525 GROUP_SIZE / NUNITS otherwise. */
1526 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1527 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1529 prologue_cost_vec.create (10);
1530 body_cost_vec.create (10);
1531 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1532 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1533 instance, SLP_INSTANCE_TREE (instance),
1534 &prologue_cost_vec, ncopies_for_cost);
1536 /* Record the prologue costs, which were delayed until we were
1537 sure that SLP was successful. Unlike the body costs, we know
1538 the final values now regardless of the loop vectorization factor. */
1539 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1540 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1541 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1543 struct _stmt_vec_info *stmt_info
1544 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1545 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1546 si->misalign, vect_prologue);
1549 prologue_cost_vec.release ();
1552 /* Analyze an SLP instance starting from a group of grouped stores. Call
1553 vect_build_slp_tree to build a tree of packed stmts if possible.
1554 Return FALSE if it's impossible to SLP any stmt in the loop. */
1556 static bool
1557 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1558 gimple stmt, unsigned max_tree_size)
1560 slp_instance new_instance;
1561 slp_tree node;
1562 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1563 unsigned int unrolling_factor = 1, nunits;
1564 tree vectype, scalar_type = NULL_TREE;
1565 gimple next;
1566 unsigned int vectorization_factor = 0;
1567 int i;
1568 unsigned int max_nunits = 0;
1569 vec<slp_tree> loads;
1570 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1571 vec<gimple> scalar_stmts;
1573 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1575 if (dr)
1577 scalar_type = TREE_TYPE (DR_REF (dr));
1578 vectype = get_vectype_for_scalar_type (scalar_type);
1580 else
1582 gcc_assert (loop_vinfo);
1583 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1586 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1588 else
1590 gcc_assert (loop_vinfo);
1591 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1592 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1595 if (!vectype)
1597 if (dump_enabled_p ())
1599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1600 "Build SLP failed: unsupported data-type ");
1601 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1602 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1605 return false;
1608 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1609 if (loop_vinfo)
1610 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1611 else
1612 vectorization_factor = nunits;
1614 /* Calculate the unrolling factor. */
1615 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1616 if (unrolling_factor != 1 && !loop_vinfo)
1618 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1620 "Build SLP failed: unrolling required in basic"
1621 " block SLP\n");
1623 return false;
1626 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1627 scalar_stmts.create (group_size);
1628 next = stmt;
1629 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1631 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1632 while (next)
1634 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1635 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1636 scalar_stmts.safe_push (
1637 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1638 else
1639 scalar_stmts.safe_push (next);
1640 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1643 else
1645 /* Collect reduction statements. */
1646 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1647 for (i = 0; reductions.iterate (i, &next); i++)
1648 scalar_stmts.safe_push (next);
1651 node = vect_create_new_slp_node (scalar_stmts);
1653 loads.create (group_size);
1655 /* Build the tree for the SLP instance. */
1656 bool *matches = XALLOCAVEC (bool, group_size);
1657 unsigned npermutes = 0;
1658 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1659 &max_nunits, &loads,
1660 vectorization_factor, matches, &npermutes, NULL,
1661 max_tree_size))
1663 /* Calculate the unrolling factor based on the smallest type. */
1664 if (max_nunits > nunits)
1665 unrolling_factor = least_common_multiple (max_nunits, group_size)
1666 / group_size;
1668 if (unrolling_factor != 1 && !loop_vinfo)
1670 if (dump_enabled_p ())
1671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1672 "Build SLP failed: unrolling required in basic"
1673 " block SLP\n");
1674 vect_free_slp_tree (node);
1675 loads.release ();
1676 return false;
1679 /* Create a new SLP instance. */
1680 new_instance = XNEW (struct _slp_instance);
1681 SLP_INSTANCE_TREE (new_instance) = node;
1682 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1683 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1684 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1685 SLP_INSTANCE_LOADS (new_instance) = loads;
1687 /* Compute the load permutation. */
1688 slp_tree load_node;
1689 bool loads_permuted = false;
1690 FOR_EACH_VEC_ELT (loads, i, load_node)
1692 vec<unsigned> load_permutation;
1693 int j;
1694 gimple load, first_stmt;
1695 bool this_load_permuted = false;
1696 load_permutation.create (group_size);
1697 first_stmt = GROUP_FIRST_ELEMENT
1698 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1699 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1701 int load_place
1702 = vect_get_place_in_interleaving_chain (load, first_stmt);
1703 gcc_assert (load_place != -1);
1704 if (load_place != j)
1705 this_load_permuted = true;
1706 load_permutation.safe_push (load_place);
1708 if (!this_load_permuted)
1710 load_permutation.release ();
1711 continue;
1713 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1714 loads_permuted = true;
1717 if (loads_permuted)
1719 if (!vect_supported_load_permutation_p (new_instance))
1721 if (dump_enabled_p ())
1723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1724 "Build SLP failed: unsupported load "
1725 "permutation ");
1726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1727 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1729 vect_free_slp_instance (new_instance);
1730 return false;
1735 if (loop_vinfo)
1737 /* Compute the costs of this SLP instance. Delay this for BB
1738 vectorization as we don't have vector types computed yet. */
1739 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1740 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1741 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1743 else
1744 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1746 if (dump_enabled_p ())
1747 vect_print_slp_tree (MSG_NOTE, node);
1749 return true;
1752 /* Failed to SLP. */
1753 /* Free the allocated memory. */
1754 vect_free_slp_tree (node);
1755 loads.release ();
1757 return false;
1761 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1762 trees of packed scalar stmts if SLP is possible. */
1764 bool
1765 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1766 unsigned max_tree_size)
1768 unsigned int i;
1769 vec<gimple> grouped_stores;
1770 vec<gimple> reductions = vNULL;
1771 vec<gimple> reduc_chains = vNULL;
1772 gimple first_element;
1773 bool ok = false;
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1778 if (loop_vinfo)
1780 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1781 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1782 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1784 else
1785 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1787 /* Find SLP sequences starting from groups of grouped stores. */
1788 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1789 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1790 max_tree_size))
1791 ok = true;
1793 if (bb_vinfo && !ok)
1795 if (dump_enabled_p ())
1796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1797 "Failed to SLP the basic block.\n");
1799 return false;
1802 if (loop_vinfo
1803 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1805 /* Find SLP sequences starting from reduction chains. */
1806 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1807 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1808 max_tree_size))
1809 ok = true;
1810 else
1811 return false;
1813 /* Don't try to vectorize SLP reductions if reduction chain was
1814 detected. */
1815 return ok;
1818 /* Find SLP sequences starting from groups of reductions. */
1819 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1820 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1821 max_tree_size))
1822 ok = true;
1824 return true;
1828 /* For each possible SLP instance decide whether to SLP it and calculate overall
1829 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1830 least one instance. */
1832 bool
1833 vect_make_slp_decision (loop_vec_info loop_vinfo)
1835 unsigned int i, unrolling_factor = 1;
1836 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1837 slp_instance instance;
1838 int decided_to_slp = 0;
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1842 "\n");
1844 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1846 /* FORNOW: SLP if you can. */
1847 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1848 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1850 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1851 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1852 loop-based vectorization. Such stmts will be marked as HYBRID. */
1853 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1854 decided_to_slp++;
1857 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1859 if (decided_to_slp && dump_enabled_p ())
1860 dump_printf_loc (MSG_NOTE, vect_location,
1861 "Decided to SLP %d instances. Unrolling factor %d\n",
1862 decided_to_slp, unrolling_factor);
1864 return (decided_to_slp > 0);
1868 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1869 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1871 static void
1872 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1874 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1875 imm_use_iterator imm_iter;
1876 gimple use_stmt;
1877 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
1878 slp_tree child;
1879 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1880 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1881 int j;
1883 /* Propagate hybrid down the SLP tree. */
1884 if (stype == hybrid)
1886 else if (HYBRID_SLP_STMT (stmt_vinfo))
1887 stype = hybrid;
1888 else
1890 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1891 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
1892 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1893 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1894 if (gimple_bb (use_stmt)
1895 && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1896 && (use_vinfo = vinfo_for_stmt (use_stmt))
1897 && !STMT_SLP_TYPE (use_vinfo)
1898 && (STMT_VINFO_RELEVANT (use_vinfo)
1899 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))
1900 || (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1901 && STMT_VINFO_RELATED_STMT (use_vinfo)
1902 && !STMT_SLP_TYPE (vinfo_for_stmt
1903 (STMT_VINFO_RELATED_STMT (use_vinfo)))))
1904 && !(gimple_code (use_stmt) == GIMPLE_PHI
1905 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
1906 stype = hybrid;
1909 if (stype == hybrid)
1910 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
1912 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
1913 if (child)
1914 vect_detect_hybrid_slp_stmts (child, i, stype);
1917 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
1919 static tree
1920 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
1922 walk_stmt_info *wi = (walk_stmt_info *)data;
1923 struct loop *loopp = (struct loop *)wi->info;
1925 if (wi->is_lhs)
1926 return NULL_TREE;
1928 if (TREE_CODE (*tp) == SSA_NAME
1929 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
1931 gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
1932 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
1933 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
1934 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
1937 return NULL_TREE;
1940 static tree
1941 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
1942 walk_stmt_info *)
1944 /* If the stmt is in a SLP instance then this isn't a reason
1945 to mark use definitions in other SLP instances as hybrid. */
1946 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
1947 *handled = true;
1948 return NULL_TREE;
1951 /* Find stmts that must be both vectorized and SLPed. */
1953 void
1954 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1956 unsigned int i;
1957 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1958 slp_instance instance;
1960 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1962 "\n");
1964 /* First walk all pattern stmt in the loop and mark defs of uses as
1965 hybrid because immediate uses in them are not recorded. */
1966 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
1968 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
1969 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1970 gsi_next (&gsi))
1972 gimple stmt = gsi_stmt (gsi);
1973 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1974 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1976 walk_stmt_info wi;
1977 memset (&wi, 0, sizeof (wi));
1978 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
1979 gimple_stmt_iterator gsi2
1980 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1981 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
1982 vect_detect_hybrid_slp_1, &wi);
1983 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
1984 vect_detect_hybrid_slp_2,
1985 vect_detect_hybrid_slp_1, &wi);
1990 /* Then walk the SLP instance trees marking stmts with uses in
1991 non-SLP stmts as hybrid, also propagating hybrid down the
1992 SLP tree, collecting the above info on-the-fly. */
1993 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1995 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
1996 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
1997 i, pure_slp);
2002 /* Create and initialize a new bb_vec_info struct for BB, as well as
2003 stmt_vec_info structs for all the stmts in it. */
2005 static bb_vec_info
2006 new_bb_vec_info (basic_block bb)
2008 bb_vec_info res = NULL;
2009 gimple_stmt_iterator gsi;
2011 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2012 BB_VINFO_BB (res) = bb;
2014 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2016 gimple stmt = gsi_stmt (gsi);
2017 gimple_set_uid (stmt, 0);
2018 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2021 BB_VINFO_GROUPED_STORES (res).create (10);
2022 BB_VINFO_SLP_INSTANCES (res).create (2);
2023 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2025 bb->aux = res;
2026 return res;
2030 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2031 stmts in the basic block. */
2033 static void
2034 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2036 vec<slp_instance> slp_instances;
2037 slp_instance instance;
2038 basic_block bb;
2039 gimple_stmt_iterator si;
2040 unsigned i;
2042 if (!bb_vinfo)
2043 return;
2045 bb = BB_VINFO_BB (bb_vinfo);
2047 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2049 gimple stmt = gsi_stmt (si);
2050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2052 if (stmt_info)
2053 /* Free stmt_vec_info. */
2054 free_stmt_vec_info (stmt);
2057 vect_destroy_datarefs (NULL, bb_vinfo);
2058 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2059 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2060 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2061 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2062 vect_free_slp_instance (instance);
2063 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2064 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2065 free (bb_vinfo);
2066 bb->aux = NULL;
2070 /* Analyze statements contained in SLP tree node after recursively analyzing
2071 the subtree. Return TRUE if the operations are supported. */
2073 static bool
2074 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
2076 bool dummy;
2077 int i;
2078 gimple stmt;
2079 slp_tree child;
2081 if (!node)
2082 return true;
2084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2085 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
2086 return false;
2088 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2091 gcc_assert (stmt_info);
2092 gcc_assert (PURE_SLP_STMT (stmt_info));
2094 if (!vect_analyze_stmt (stmt, &dummy, node))
2095 return false;
2098 return true;
2102 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2103 operations are supported. */
2105 static bool
2106 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2108 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2109 slp_instance instance;
2110 int i;
2112 for (i = 0; slp_instances.iterate (i, &instance); )
2114 if (!vect_slp_analyze_node_operations (bb_vinfo,
2115 SLP_INSTANCE_TREE (instance)))
2117 vect_free_slp_instance (instance);
2118 slp_instances.ordered_remove (i);
2120 else
2121 i++;
2124 if (!slp_instances.length ())
2125 return false;
2127 return true;
2131 /* Compute the scalar cost of the SLP node NODE and its children
2132 and return it. Do not account defs that are marked in LIFE and
2133 update LIFE according to uses of NODE. */
2135 static unsigned
2136 vect_bb_slp_scalar_cost (basic_block bb,
2137 slp_tree node, vec<bool, va_heap> *life)
2139 unsigned scalar_cost = 0;
2140 unsigned i;
2141 gimple stmt;
2142 slp_tree child;
2144 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2146 unsigned stmt_cost;
2147 ssa_op_iter op_iter;
2148 def_operand_p def_p;
2149 stmt_vec_info stmt_info;
2151 if ((*life)[i])
2152 continue;
2154 /* If there is a non-vectorized use of the defs then the scalar
2155 stmt is kept live in which case we do not account it or any
2156 required defs in the SLP children in the scalar cost. This
2157 way we make the vectorization more costly when compared to
2158 the scalar cost. */
2159 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2161 imm_use_iterator use_iter;
2162 gimple use_stmt;
2163 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2164 if (!is_gimple_debug (use_stmt)
2165 && (gimple_code (use_stmt) == GIMPLE_PHI
2166 || gimple_bb (use_stmt) != bb
2167 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2169 (*life)[i] = true;
2170 BREAK_FROM_IMM_USE_STMT (use_iter);
2173 if ((*life)[i])
2174 continue;
2176 stmt_info = vinfo_for_stmt (stmt);
2177 if (STMT_VINFO_DATA_REF (stmt_info))
2179 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2180 stmt_cost = vect_get_stmt_cost (scalar_load);
2181 else
2182 stmt_cost = vect_get_stmt_cost (scalar_store);
2184 else
2185 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2187 scalar_cost += stmt_cost;
2190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2191 if (child)
2192 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2194 return scalar_cost;
2197 /* Check if vectorization of the basic block is profitable. */
2199 static bool
2200 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2202 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2203 slp_instance instance;
2204 int i, j;
2205 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2206 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2207 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2208 stmt_vec_info stmt_info = NULL;
2209 stmt_vector_for_cost body_cost_vec;
2210 stmt_info_for_cost *ci;
2212 /* Calculate vector costs. */
2213 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2215 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2217 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2219 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2220 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2221 stmt_info, ci->misalign, vect_body);
2225 /* Calculate scalar cost. */
2226 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2228 auto_vec<bool, 20> life;
2229 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2230 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2231 SLP_INSTANCE_TREE (instance),
2232 &life);
2235 /* Complete the target-specific cost calculation. */
2236 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2237 &vec_inside_cost, &vec_epilogue_cost);
2239 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2241 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2244 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2245 vec_inside_cost);
2246 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2247 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2248 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2251 /* Vectorization is profitable if its cost is less than the cost of scalar
2252 version. */
2253 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2254 return false;
2256 return true;
2259 /* Check if the basic block can be vectorized. */
2261 static bb_vec_info
2262 vect_slp_analyze_bb_1 (basic_block bb)
2264 bb_vec_info bb_vinfo;
2265 vec<slp_instance> slp_instances;
2266 slp_instance instance;
2267 int i;
2268 int min_vf = 2;
2269 unsigned n_stmts = 0;
2271 bb_vinfo = new_bb_vec_info (bb);
2272 if (!bb_vinfo)
2273 return NULL;
2275 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2279 "not vectorized: unhandled data-ref in basic "
2280 "block.\n");
2282 destroy_bb_vec_info (bb_vinfo);
2283 return NULL;
2286 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2290 "not vectorized: not enough data-refs in "
2291 "basic block.\n");
2293 destroy_bb_vec_info (bb_vinfo);
2294 return NULL;
2297 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2301 "not vectorized: unhandled data access in "
2302 "basic block.\n");
2304 destroy_bb_vec_info (bb_vinfo);
2305 return NULL;
2308 vect_pattern_recog (NULL, bb_vinfo);
2310 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2312 if (dump_enabled_p ())
2313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2314 "not vectorized: bad data alignment in basic "
2315 "block.\n");
2317 destroy_bb_vec_info (bb_vinfo);
2318 return NULL;
2321 /* Check the SLP opportunities in the basic block, analyze and build SLP
2322 trees. */
2323 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "not vectorized: failed to find SLP opportunities "
2328 "in basic block.\n");
2330 destroy_bb_vec_info (bb_vinfo);
2331 return NULL;
2334 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2336 /* Mark all the statements that we want to vectorize as pure SLP and
2337 relevant. */
2338 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2340 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2341 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2344 /* Mark all the statements that we do not want to vectorize. */
2345 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2346 !gsi_end_p (gsi); gsi_next (&gsi))
2348 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2349 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2350 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2353 /* Analyze dependences. At this point all stmts not participating in
2354 vectorization have to be marked. Dependence analysis assumes
2355 that we either vectorize all SLP instances or none at all. */
2356 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "not vectorized: unhandled data dependence "
2361 "in basic block.\n");
2363 destroy_bb_vec_info (bb_vinfo);
2364 return NULL;
2367 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2369 if (dump_enabled_p ())
2370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2371 "not vectorized: unsupported alignment in basic "
2372 "block.\n");
2373 destroy_bb_vec_info (bb_vinfo);
2374 return NULL;
2377 if (!vect_slp_analyze_operations (bb_vinfo))
2379 if (dump_enabled_p ())
2380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2381 "not vectorized: bad operation in basic block.\n");
2383 destroy_bb_vec_info (bb_vinfo);
2384 return NULL;
2387 /* Compute the costs of the SLP instances. */
2388 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2390 gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
2391 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2392 vect_analyze_slp_cost (NULL, bb_vinfo,
2393 instance, TYPE_VECTOR_SUBPARTS (vectype));
2396 /* Cost model: check if the vectorization is worthwhile. */
2397 if (!unlimited_cost_model (NULL)
2398 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2402 "not vectorized: vectorization is not "
2403 "profitable.\n");
2405 destroy_bb_vec_info (bb_vinfo);
2406 return NULL;
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_NOTE, vect_location,
2411 "Basic block will be vectorized using SLP\n");
2413 return bb_vinfo;
2417 bb_vec_info
2418 vect_slp_analyze_bb (basic_block bb)
2420 bb_vec_info bb_vinfo;
2421 int insns = 0;
2422 gimple_stmt_iterator gsi;
2423 unsigned int vector_sizes;
2425 if (dump_enabled_p ())
2426 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2428 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2430 gimple stmt = gsi_stmt (gsi);
2431 if (!is_gimple_debug (stmt)
2432 && !gimple_nop_p (stmt)
2433 && gimple_code (stmt) != GIMPLE_LABEL)
2434 insns++;
2437 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "not vectorized: too many instructions in "
2442 "basic block.\n");
2444 return NULL;
2447 /* Autodetect first vector size we try. */
2448 current_vector_size = 0;
2449 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2451 while (1)
2453 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2454 if (bb_vinfo)
2455 return bb_vinfo;
2457 destroy_bb_vec_info (bb_vinfo);
2459 vector_sizes &= ~current_vector_size;
2460 if (vector_sizes == 0
2461 || current_vector_size == 0)
2462 return NULL;
2464 /* Try the next biggest vector size. */
2465 current_vector_size = 1 << floor_log2 (vector_sizes);
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_NOTE, vect_location,
2468 "***** Re-trying analysis with "
2469 "vector size %d\n", current_vector_size);
2474 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2475 the number of created vector stmts depends on the unrolling factor).
2476 However, the actual number of vector stmts for every SLP node depends on
2477 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2478 should be updated. In this function we assume that the inside costs
2479 calculated in vect_model_xxx_cost are linear in ncopies. */
2481 void
2482 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2484 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2485 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2486 slp_instance instance;
2487 stmt_vector_for_cost body_cost_vec;
2488 stmt_info_for_cost *si;
2489 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_NOTE, vect_location,
2493 "=== vect_update_slp_costs_according_to_vf ===\n");
2495 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2497 /* We assume that costs are linear in ncopies. */
2498 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2500 /* Record the instance's instructions in the target cost model.
2501 This was delayed until here because the count of instructions
2502 isn't known beforehand. */
2503 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2505 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2506 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2507 vinfo_for_stmt (si->stmt), si->misalign,
2508 vect_body);
2513 /* For constant and loop invariant defs of SLP_NODE this function returns
2514 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2515 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2516 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2517 REDUC_INDEX is the index of the reduction operand in the statements, unless
2518 it is -1. */
2520 static void
2521 vect_get_constant_vectors (tree op, slp_tree slp_node,
2522 vec<tree> *vec_oprnds,
2523 unsigned int op_num, unsigned int number_of_vectors,
2524 int reduc_index)
2526 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2527 gimple stmt = stmts[0];
2528 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2529 unsigned nunits;
2530 tree vec_cst;
2531 tree *elts;
2532 unsigned j, number_of_places_left_in_vector;
2533 tree vector_type;
2534 tree vop;
2535 int group_size = stmts.length ();
2536 unsigned int vec_num, i;
2537 unsigned number_of_copies = 1;
2538 vec<tree> voprnds;
2539 voprnds.create (number_of_vectors);
2540 bool constant_p, is_store;
2541 tree neutral_op = NULL;
2542 enum tree_code code = gimple_expr_code (stmt);
2543 gimple def_stmt;
2544 struct loop *loop;
2545 gimple_seq ctor_seq = NULL;
2547 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2548 && reduc_index != -1)
2550 op_num = reduc_index - 1;
2551 op = gimple_op (stmt, reduc_index);
2552 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2553 we need either neutral operands or the original operands. See
2554 get_initial_def_for_reduction() for details. */
2555 switch (code)
2557 case WIDEN_SUM_EXPR:
2558 case DOT_PROD_EXPR:
2559 case PLUS_EXPR:
2560 case MINUS_EXPR:
2561 case BIT_IOR_EXPR:
2562 case BIT_XOR_EXPR:
2563 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2564 neutral_op = build_real (TREE_TYPE (op), dconst0);
2565 else
2566 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2568 break;
2570 case MULT_EXPR:
2571 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2572 neutral_op = build_real (TREE_TYPE (op), dconst1);
2573 else
2574 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2576 break;
2578 case BIT_AND_EXPR:
2579 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2580 break;
2582 /* For MIN/MAX we don't have an easy neutral operand but
2583 the initial values can be used fine here. Only for
2584 a reduction chain we have to force a neutral element. */
2585 case MAX_EXPR:
2586 case MIN_EXPR:
2587 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2588 neutral_op = NULL;
2589 else
2591 def_stmt = SSA_NAME_DEF_STMT (op);
2592 loop = (gimple_bb (stmt))->loop_father;
2593 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2594 loop_preheader_edge (loop));
2596 break;
2598 default:
2599 neutral_op = NULL;
2603 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2605 is_store = true;
2606 op = gimple_assign_rhs1 (stmt);
2608 else
2609 is_store = false;
2611 gcc_assert (op);
2613 if (CONSTANT_CLASS_P (op))
2614 constant_p = true;
2615 else
2616 constant_p = false;
2618 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2619 gcc_assert (vector_type);
2620 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2622 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2623 created vectors. It is greater than 1 if unrolling is performed.
2625 For example, we have two scalar operands, s1 and s2 (e.g., group of
2626 strided accesses of size two), while NUNITS is four (i.e., four scalars
2627 of this type can be packed in a vector). The output vector will contain
2628 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2629 will be 2).
2631 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2632 containing the operands.
2634 For example, NUNITS is four as before, and the group size is 8
2635 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2636 {s5, s6, s7, s8}. */
2638 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2640 number_of_places_left_in_vector = nunits;
2641 elts = XALLOCAVEC (tree, nunits);
2642 bool place_after_defs = false;
2643 for (j = 0; j < number_of_copies; j++)
2645 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2647 if (is_store)
2648 op = gimple_assign_rhs1 (stmt);
2649 else
2651 switch (code)
2653 case COND_EXPR:
2654 if (op_num == 0 || op_num == 1)
2656 tree cond = gimple_assign_rhs1 (stmt);
2657 op = TREE_OPERAND (cond, op_num);
2659 else
2661 if (op_num == 2)
2662 op = gimple_assign_rhs2 (stmt);
2663 else
2664 op = gimple_assign_rhs3 (stmt);
2666 break;
2668 case CALL_EXPR:
2669 op = gimple_call_arg (stmt, op_num);
2670 break;
2672 case LSHIFT_EXPR:
2673 case RSHIFT_EXPR:
2674 case LROTATE_EXPR:
2675 case RROTATE_EXPR:
2676 op = gimple_op (stmt, op_num + 1);
2677 /* Unlike the other binary operators, shifts/rotates have
2678 the shift count being int, instead of the same type as
2679 the lhs, so make sure the scalar is the right type if
2680 we are dealing with vectors of
2681 long long/long/short/char. */
2682 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2683 op = fold_convert (TREE_TYPE (vector_type), op);
2684 break;
2686 default:
2687 op = gimple_op (stmt, op_num + 1);
2688 break;
2692 if (reduc_index != -1)
2694 loop = (gimple_bb (stmt))->loop_father;
2695 def_stmt = SSA_NAME_DEF_STMT (op);
2697 gcc_assert (loop);
2699 /* Get the def before the loop. In reduction chain we have only
2700 one initial value. */
2701 if ((j != (number_of_copies - 1)
2702 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2703 && i != 0))
2704 && neutral_op)
2705 op = neutral_op;
2706 else
2707 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2708 loop_preheader_edge (loop));
2711 /* Create 'vect_ = {op0,op1,...,opn}'. */
2712 number_of_places_left_in_vector--;
2713 tree orig_op = op;
2714 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2716 if (CONSTANT_CLASS_P (op))
2718 op = fold_unary (VIEW_CONVERT_EXPR,
2719 TREE_TYPE (vector_type), op);
2720 gcc_assert (op && CONSTANT_CLASS_P (op));
2722 else
2724 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2725 gimple init_stmt;
2726 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2727 init_stmt
2728 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2729 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2730 op = new_temp;
2733 elts[number_of_places_left_in_vector] = op;
2734 if (!CONSTANT_CLASS_P (op))
2735 constant_p = false;
2736 if (TREE_CODE (orig_op) == SSA_NAME
2737 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2738 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2739 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2740 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2741 place_after_defs = true;
2743 if (number_of_places_left_in_vector == 0)
2745 number_of_places_left_in_vector = nunits;
2747 if (constant_p)
2748 vec_cst = build_vector (vector_type, elts);
2749 else
2751 vec<constructor_elt, va_gc> *v;
2752 unsigned k;
2753 vec_alloc (v, nunits);
2754 for (k = 0; k < nunits; ++k)
2755 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2756 vec_cst = build_constructor (vector_type, v);
2758 tree init;
2759 gimple_stmt_iterator gsi;
2760 if (place_after_defs)
2762 gsi = gsi_for_stmt
2763 (vect_find_last_scalar_stmt_in_slp (slp_node));
2764 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2766 else
2767 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2768 if (ctor_seq != NULL)
2770 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2771 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2772 GSI_SAME_STMT);
2773 ctor_seq = NULL;
2775 voprnds.quick_push (init);
2776 place_after_defs = false;
2781 /* Since the vectors are created in the reverse order, we should invert
2782 them. */
2783 vec_num = voprnds.length ();
2784 for (j = vec_num; j != 0; j--)
2786 vop = voprnds[j - 1];
2787 vec_oprnds->quick_push (vop);
2790 voprnds.release ();
2792 /* In case that VF is greater than the unrolling factor needed for the SLP
2793 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2794 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2795 to replicate the vectors. */
2796 while (number_of_vectors > vec_oprnds->length ())
2798 tree neutral_vec = NULL;
2800 if (neutral_op)
2802 if (!neutral_vec)
2803 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2805 vec_oprnds->quick_push (neutral_vec);
2807 else
2809 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2810 vec_oprnds->quick_push (vop);
2816 /* Get vectorized definitions from SLP_NODE that contains corresponding
2817 vectorized def-stmts. */
2819 static void
2820 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2822 tree vec_oprnd;
2823 gimple vec_def_stmt;
2824 unsigned int i;
2826 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2828 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2830 gcc_assert (vec_def_stmt);
2831 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2832 vec_oprnds->quick_push (vec_oprnd);
2837 /* Get vectorized definitions for SLP_NODE.
2838 If the scalar definitions are loop invariants or constants, collect them and
2839 call vect_get_constant_vectors() to create vector stmts.
2840 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2841 must be stored in the corresponding child of SLP_NODE, and we call
2842 vect_get_slp_vect_defs () to retrieve them. */
2844 void
2845 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2846 vec<vec<tree> > *vec_oprnds, int reduc_index)
2848 gimple first_stmt;
2849 int number_of_vects = 0, i;
2850 unsigned int child_index = 0;
2851 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2852 slp_tree child = NULL;
2853 vec<tree> vec_defs;
2854 tree oprnd;
2855 bool vectorized_defs;
2857 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2858 FOR_EACH_VEC_ELT (ops, i, oprnd)
2860 /* For each operand we check if it has vectorized definitions in a child
2861 node or we need to create them (for invariants and constants). We
2862 check if the LHS of the first stmt of the next child matches OPRND.
2863 If it does, we found the correct child. Otherwise, we call
2864 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2865 to check this child node for the next operand. */
2866 vectorized_defs = false;
2867 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2869 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2871 /* We have to check both pattern and original def, if available. */
2872 if (child)
2874 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2875 gimple related
2876 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2878 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2879 || (related
2880 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2882 /* The number of vector defs is determined by the number of
2883 vector statements in the node from which we get those
2884 statements. */
2885 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2886 vectorized_defs = true;
2887 child_index++;
2890 else
2891 child_index++;
2894 if (!vectorized_defs)
2896 if (i == 0)
2898 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2899 /* Number of vector stmts was calculated according to LHS in
2900 vect_schedule_slp_instance (), fix it by replacing LHS with
2901 RHS, if necessary. See vect_get_smallest_scalar_type () for
2902 details. */
2903 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2904 &rhs_size_unit);
2905 if (rhs_size_unit != lhs_size_unit)
2907 number_of_vects *= rhs_size_unit;
2908 number_of_vects /= lhs_size_unit;
2913 /* Allocate memory for vectorized defs. */
2914 vec_defs = vNULL;
2915 vec_defs.create (number_of_vects);
2917 /* For reduction defs we call vect_get_constant_vectors (), since we are
2918 looking for initial loop invariant values. */
2919 if (vectorized_defs && reduc_index == -1)
2920 /* The defs are already vectorized. */
2921 vect_get_slp_vect_defs (child, &vec_defs);
2922 else
2923 /* Build vectors from scalar defs. */
2924 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2925 number_of_vects, reduc_index);
2927 vec_oprnds->quick_push (vec_defs);
2929 /* For reductions, we only need initial values. */
2930 if (reduc_index != -1)
2931 return;
2936 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2937 building a vector of type MASK_TYPE from it) and two input vectors placed in
2938 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2939 shifting by STRIDE elements of DR_CHAIN for every copy.
2940 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2941 copies).
2942 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2943 the created stmts must be inserted. */
2945 static inline void
2946 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2947 tree mask, int first_vec_indx, int second_vec_indx,
2948 gimple_stmt_iterator *gsi, slp_tree node,
2949 tree vectype, vec<tree> dr_chain,
2950 int ncopies, int vect_stmts_counter)
2952 tree perm_dest;
2953 gimple perm_stmt = NULL;
2954 stmt_vec_info next_stmt_info;
2955 int i, stride;
2956 tree first_vec, second_vec, data_ref;
2958 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2960 /* Initialize the vect stmts of NODE to properly insert the generated
2961 stmts later. */
2962 for (i = SLP_TREE_VEC_STMTS (node).length ();
2963 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2964 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2966 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2967 for (i = 0; i < ncopies; i++)
2969 first_vec = dr_chain[first_vec_indx];
2970 second_vec = dr_chain[second_vec_indx];
2972 /* Generate the permute statement. */
2973 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
2974 first_vec, second_vec, mask);
2975 data_ref = make_ssa_name (perm_dest, perm_stmt);
2976 gimple_set_lhs (perm_stmt, data_ref);
2977 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2979 /* Store the vector statement in NODE. */
2980 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2982 first_vec_indx += stride;
2983 second_vec_indx += stride;
2986 /* Mark the scalar stmt as vectorized. */
2987 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2988 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2992 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2993 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2994 representation. Check that the mask is valid and return FALSE if not.
2995 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2996 the next vector, i.e., the current first vector is not needed. */
2998 static bool
2999 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3000 int mask_nunits, bool only_one_vec, int index,
3001 unsigned char *mask, int *current_mask_element,
3002 bool *need_next_vector, int *number_of_mask_fixes,
3003 bool *mask_fixed, bool *needs_first_vector)
3005 int i;
3007 /* Convert to target specific representation. */
3008 *current_mask_element = first_mask_element + m;
3009 /* Adjust the value in case it's a mask for second and third vectors. */
3010 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3012 if (*current_mask_element < 0)
3014 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3017 "permutation requires past vector ");
3018 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3019 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3021 return false;
3024 if (*current_mask_element < mask_nunits)
3025 *needs_first_vector = true;
3027 /* We have only one input vector to permute but the mask accesses values in
3028 the next vector as well. */
3029 if (only_one_vec && *current_mask_element >= mask_nunits)
3031 if (dump_enabled_p ())
3033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3034 "permutation requires at least two vectors ");
3035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3036 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3039 return false;
3042 /* The mask requires the next vector. */
3043 while (*current_mask_element >= mask_nunits * 2)
3045 if (*needs_first_vector || *mask_fixed)
3047 /* We either need the first vector too or have already moved to the
3048 next vector. In both cases, this permutation needs three
3049 vectors. */
3050 if (dump_enabled_p ())
3052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3053 "permutation requires at "
3054 "least three vectors ");
3055 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3056 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3059 return false;
3062 /* We move to the next vector, dropping the first one and working with
3063 the second and the third - we need to adjust the values of the mask
3064 accordingly. */
3065 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3067 for (i = 0; i < index; i++)
3068 mask[i] -= mask_nunits * *number_of_mask_fixes;
3070 (*number_of_mask_fixes)++;
3071 *mask_fixed = true;
3074 *need_next_vector = *mask_fixed;
3076 /* This was the last element of this mask. Start a new one. */
3077 if (index == mask_nunits - 1)
3079 *number_of_mask_fixes = 1;
3080 *mask_fixed = false;
3081 *needs_first_vector = false;
3084 return true;
3088 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3089 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3090 permute statements for the SLP node NODE of the SLP instance
3091 SLP_NODE_INSTANCE. */
3093 bool
3094 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3095 gimple_stmt_iterator *gsi, int vf,
3096 slp_instance slp_node_instance, bool analyze_only)
3098 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3099 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3100 tree mask_element_type = NULL_TREE, mask_type;
3101 int i, j, k, nunits, vec_index = 0, scalar_index;
3102 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3103 gimple next_scalar_stmt;
3104 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3105 int first_mask_element;
3106 int index, unroll_factor, current_mask_element, ncopies;
3107 unsigned char *mask;
3108 bool only_one_vec = false, need_next_vector = false;
3109 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3110 int number_of_mask_fixes = 1;
3111 bool mask_fixed = false;
3112 bool needs_first_vector = false;
3113 machine_mode mode;
3115 mode = TYPE_MODE (vectype);
3117 if (!can_vec_perm_p (mode, false, NULL))
3119 if (dump_enabled_p ())
3121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3122 "no vect permute for ");
3123 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3124 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3126 return false;
3129 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3130 same size as the vector element being permuted. */
3131 mask_element_type = lang_hooks.types.type_for_mode
3132 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3133 mask_type = get_vectype_for_scalar_type (mask_element_type);
3134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3135 mask = XALLOCAVEC (unsigned char, nunits);
3136 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3138 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3139 unrolling factor. */
3140 orig_vec_stmts_num = group_size *
3141 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3142 if (orig_vec_stmts_num == 1)
3143 only_one_vec = true;
3145 /* Number of copies is determined by the final vectorization factor
3146 relatively to SLP_NODE_INSTANCE unrolling factor. */
3147 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3149 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3150 return false;
3152 /* Generate permutation masks for every NODE. Number of masks for each NODE
3153 is equal to GROUP_SIZE.
3154 E.g., we have a group of three nodes with three loads from the same
3155 location in each node, and the vector size is 4. I.e., we have a
3156 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3157 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3158 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3161 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3162 The last mask is illegal since we assume two operands for permute
3163 operation, and the mask element values can't be outside that range.
3164 Hence, the last mask must be converted into {2,5,5,5}.
3165 For the first two permutations we need the first and the second input
3166 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3167 we need the second and the third vectors: {b1,c1,a2,b2} and
3168 {c2,a3,b3,c3}. */
3171 scalar_index = 0;
3172 index = 0;
3173 vect_stmts_counter = 0;
3174 vec_index = 0;
3175 first_vec_index = vec_index++;
3176 if (only_one_vec)
3177 second_vec_index = first_vec_index;
3178 else
3179 second_vec_index = vec_index++;
3181 for (j = 0; j < unroll_factor; j++)
3183 for (k = 0; k < group_size; k++)
3185 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3186 first_mask_element = i + j * group_size;
3187 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3188 nunits, only_one_vec, index,
3189 mask, &current_mask_element,
3190 &need_next_vector,
3191 &number_of_mask_fixes, &mask_fixed,
3192 &needs_first_vector))
3193 return false;
3194 gcc_assert (current_mask_element >= 0
3195 && current_mask_element < 2 * nunits);
3196 mask[index++] = current_mask_element;
3198 if (index == nunits)
3200 index = 0;
3201 if (!can_vec_perm_p (mode, false, mask))
3203 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3206 vect_location,
3207 "unsupported vect permute { ");
3208 for (i = 0; i < nunits; ++i)
3209 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3210 mask[i]);
3211 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3213 return false;
3216 if (!analyze_only)
3218 int l;
3219 tree mask_vec, *mask_elts;
3220 mask_elts = XALLOCAVEC (tree, nunits);
3221 for (l = 0; l < nunits; ++l)
3222 mask_elts[l] = build_int_cst (mask_element_type,
3223 mask[l]);
3224 mask_vec = build_vector (mask_type, mask_elts);
3226 if (need_next_vector)
3228 first_vec_index = second_vec_index;
3229 second_vec_index = vec_index;
3232 next_scalar_stmt
3233 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3235 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3236 mask_vec, first_vec_index, second_vec_index,
3237 gsi, node, vectype, dr_chain,
3238 ncopies, vect_stmts_counter++);
3245 return true;
3250 /* Vectorize SLP instance tree in postorder. */
3252 static bool
3253 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3254 unsigned int vectorization_factor)
3256 gimple stmt;
3257 bool grouped_store, is_store;
3258 gimple_stmt_iterator si;
3259 stmt_vec_info stmt_info;
3260 unsigned int vec_stmts_size, nunits, group_size;
3261 tree vectype;
3262 int i;
3263 slp_tree child;
3265 if (!node)
3266 return false;
3268 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3269 vect_schedule_slp_instance (child, instance, vectorization_factor);
3271 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3272 stmt_info = vinfo_for_stmt (stmt);
3274 /* VECTYPE is the type of the destination. */
3275 vectype = STMT_VINFO_VECTYPE (stmt_info);
3276 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3277 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3279 /* For each SLP instance calculate number of vector stmts to be created
3280 for the scalar stmts in each node of the SLP tree. Number of vector
3281 elements in one vector iteration is the number of scalar elements in
3282 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3283 size. */
3284 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3286 if (!SLP_TREE_VEC_STMTS (node).exists ())
3288 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3289 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3292 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE,vect_location,
3295 "------>vectorizing SLP node starting from: ");
3296 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3297 dump_printf (MSG_NOTE, "\n");
3300 /* Vectorized stmts go before the last scalar stmt which is where
3301 all uses are ready. */
3302 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3304 /* Mark the first element of the reduction chain as reduction to properly
3305 transform the node. In the analysis phase only the last element of the
3306 chain is marked as reduction. */
3307 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3308 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3310 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3311 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3314 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3315 return is_store;
3318 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3319 For loop vectorization this is done in vectorizable_call, but for SLP
3320 it needs to be deferred until end of vect_schedule_slp, because multiple
3321 SLP instances may refer to the same scalar stmt. */
3323 static void
3324 vect_remove_slp_scalar_calls (slp_tree node)
3326 gimple stmt, new_stmt;
3327 gimple_stmt_iterator gsi;
3328 int i;
3329 slp_tree child;
3330 tree lhs;
3331 stmt_vec_info stmt_info;
3333 if (!node)
3334 return;
3336 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3337 vect_remove_slp_scalar_calls (child);
3339 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3341 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3342 continue;
3343 stmt_info = vinfo_for_stmt (stmt);
3344 if (stmt_info == NULL
3345 || is_pattern_stmt_p (stmt_info)
3346 || !PURE_SLP_STMT (stmt_info))
3347 continue;
3348 lhs = gimple_call_lhs (stmt);
3349 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3350 set_vinfo_for_stmt (new_stmt, stmt_info);
3351 set_vinfo_for_stmt (stmt, NULL);
3352 STMT_VINFO_STMT (stmt_info) = new_stmt;
3353 gsi = gsi_for_stmt (stmt);
3354 gsi_replace (&gsi, new_stmt, false);
3355 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3359 /* Generate vector code for all SLP instances in the loop/basic block. */
3361 bool
3362 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3364 vec<slp_instance> slp_instances;
3365 slp_instance instance;
3366 unsigned int i, vf;
3367 bool is_store = false;
3369 if (loop_vinfo)
3371 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3372 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3374 else
3376 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3377 vf = 1;
3380 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3382 /* Schedule the tree of INSTANCE. */
3383 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3384 instance, vf);
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "vectorizing stmts using SLP.\n");
3390 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3392 slp_tree root = SLP_INSTANCE_TREE (instance);
3393 gimple store;
3394 unsigned int j;
3395 gimple_stmt_iterator gsi;
3397 /* Remove scalar call stmts. Do not do this for basic-block
3398 vectorization as not all uses may be vectorized.
3399 ??? Why should this be necessary? DCE should be able to
3400 remove the stmts itself.
3401 ??? For BB vectorization we can as well remove scalar
3402 stmts starting from the SLP tree root if they have no
3403 uses. */
3404 if (loop_vinfo)
3405 vect_remove_slp_scalar_calls (root);
3407 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3408 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3410 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3411 break;
3413 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3414 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3415 /* Free the attached stmt_vec_info and remove the stmt. */
3416 gsi = gsi_for_stmt (store);
3417 unlink_stmt_vdef (store);
3418 gsi_remove (&gsi, true);
3419 release_defs (store);
3420 free_stmt_vec_info (store);
3424 return is_store;
3428 /* Vectorize the basic block. */
3430 void
3431 vect_slp_transform_bb (basic_block bb)
3433 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3434 gimple_stmt_iterator si;
3436 gcc_assert (bb_vinfo);
3438 if (dump_enabled_p ())
3439 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3441 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3443 gimple stmt = gsi_stmt (si);
3444 stmt_vec_info stmt_info;
3446 if (dump_enabled_p ())
3448 dump_printf_loc (MSG_NOTE, vect_location,
3449 "------>SLPing statement: ");
3450 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3451 dump_printf (MSG_NOTE, "\n");
3454 stmt_info = vinfo_for_stmt (stmt);
3455 gcc_assert (stmt_info);
3457 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3458 if (STMT_SLP_TYPE (stmt_info))
3460 vect_schedule_slp (NULL, bb_vinfo);
3461 break;
3465 if (dump_enabled_p ())
3466 dump_printf_loc (MSG_NOTE, vect_location,
3467 "BASIC BLOCK VECTORIZED\n");
3469 destroy_bb_vec_info (bb_vinfo);