PR target/65871
[official-gcc.git] / gcc / tree-vect-slp.c
blob2b8f9bde5ae9d062bbcb031feae55e92fc8d7336
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "basic-block.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-pass.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "recog.h" /* FIXME: for insn_data */
74 #include "insn-codes.h"
75 #include "optabs.h"
76 #include "tree-vectorizer.h"
77 #include "langhooks.h"
78 #include "gimple-walk.h"
80 /* Extract the location of the basic block in the source code.
81 Return the basic block location if succeed and NULL if not. */
83 source_location
84 find_bb_location (basic_block bb)
86 gimple stmt = NULL;
87 gimple_stmt_iterator si;
89 if (!bb)
90 return UNKNOWN_LOCATION;
92 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
94 stmt = gsi_stmt (si);
95 if (gimple_location (stmt) != UNKNOWN_LOCATION)
96 return gimple_location (stmt);
99 return UNKNOWN_LOCATION;
103 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
105 static void
106 vect_free_slp_tree (slp_tree node)
108 int i;
109 slp_tree child;
111 if (!node)
112 return;
114 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
115 vect_free_slp_tree (child);
117 SLP_TREE_CHILDREN (node).release ();
118 SLP_TREE_SCALAR_STMTS (node).release ();
119 SLP_TREE_VEC_STMTS (node).release ();
120 SLP_TREE_LOAD_PERMUTATION (node).release ();
122 free (node);
126 /* Free the memory allocated for the SLP instance. */
128 void
129 vect_free_slp_instance (slp_instance instance)
131 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
132 SLP_INSTANCE_LOADS (instance).release ();
133 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
134 free (instance);
138 /* Create an SLP node for SCALAR_STMTS. */
140 static slp_tree
141 vect_create_new_slp_node (vec<gimple> scalar_stmts)
143 slp_tree node;
144 gimple stmt = scalar_stmts[0];
145 unsigned int nops;
147 if (is_gimple_call (stmt))
148 nops = gimple_call_num_args (stmt);
149 else if (is_gimple_assign (stmt))
151 nops = gimple_num_ops (stmt) - 1;
152 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
153 nops++;
155 else
156 return NULL;
158 node = XNEW (struct _slp_tree);
159 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
160 SLP_TREE_VEC_STMTS (node).create (0);
161 SLP_TREE_CHILDREN (node).create (nops);
162 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
164 return node;
168 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
169 operand. */
170 static vec<slp_oprnd_info>
171 vect_create_oprnd_info (int nops, int group_size)
173 int i;
174 slp_oprnd_info oprnd_info;
175 vec<slp_oprnd_info> oprnds_info;
177 oprnds_info.create (nops);
178 for (i = 0; i < nops; i++)
180 oprnd_info = XNEW (struct _slp_oprnd_info);
181 oprnd_info->def_stmts.create (group_size);
182 oprnd_info->first_dt = vect_uninitialized_def;
183 oprnd_info->first_op_type = NULL_TREE;
184 oprnd_info->first_pattern = false;
185 oprnds_info.quick_push (oprnd_info);
188 return oprnds_info;
192 /* Free operands info. */
194 static void
195 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
197 int i;
198 slp_oprnd_info oprnd_info;
200 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
202 oprnd_info->def_stmts.release ();
203 XDELETE (oprnd_info);
206 oprnds_info.release ();
210 /* Find the place of the data-ref in STMT in the interleaving chain that starts
211 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
213 static int
214 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
216 gimple next_stmt = first_stmt;
217 int result = 0;
219 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
220 return -1;
224 if (next_stmt == stmt)
225 return result;
226 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
227 if (next_stmt)
228 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
230 while (next_stmt);
232 return -1;
236 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
237 they are of a valid type and that they match the defs of the first stmt of
238 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
239 return -1, if the error could be corrected by swapping operands of the
240 operation return 1, if everything is ok return 0. */
242 static int
243 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
244 gimple stmt, bool first,
245 vec<slp_oprnd_info> *oprnds_info)
247 tree oprnd;
248 unsigned int i, number_of_oprnds;
249 tree def;
250 gimple def_stmt;
251 enum vect_def_type dt = vect_uninitialized_def;
252 struct loop *loop = NULL;
253 bool pattern = false;
254 slp_oprnd_info oprnd_info;
255 int first_op_idx = 1;
256 bool commutative = false;
257 bool first_op_cond = false;
259 if (loop_vinfo)
260 loop = LOOP_VINFO_LOOP (loop_vinfo);
262 if (is_gimple_call (stmt))
264 number_of_oprnds = gimple_call_num_args (stmt);
265 first_op_idx = 3;
267 else if (is_gimple_assign (stmt))
269 enum tree_code code = gimple_assign_rhs_code (stmt);
270 number_of_oprnds = gimple_num_ops (stmt) - 1;
271 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
273 first_op_cond = true;
274 commutative = true;
275 number_of_oprnds++;
277 else
278 commutative = commutative_tree_code (code);
280 else
281 return -1;
283 bool swapped = false;
284 for (i = 0; i < number_of_oprnds; i++)
286 again:
287 if (first_op_cond)
289 if (i == 0 || i == 1)
290 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
291 swapped ? !i : i);
292 else
293 oprnd = gimple_op (stmt, first_op_idx + i - 1);
295 else
296 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
298 oprnd_info = (*oprnds_info)[i];
300 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
301 &def, &dt)
302 || (!def_stmt && dt != vect_constant_def))
304 if (dump_enabled_p ())
306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
307 "Build SLP failed: can't find def for ");
308 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
309 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
312 return -1;
315 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
316 from the pattern. Check that all the stmts of the node are in the
317 pattern. */
318 if (def_stmt && gimple_bb (def_stmt)
319 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
320 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
321 && gimple_code (def_stmt) != GIMPLE_PHI))
322 && vinfo_for_stmt (def_stmt)
323 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
324 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
325 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
327 pattern = true;
328 if (!first && !oprnd_info->first_pattern)
330 if (i == 0
331 && !swapped
332 && commutative)
334 swapped = true;
335 goto again;
338 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
341 "Build SLP failed: some of the stmts"
342 " are in a pattern, and others are not ");
343 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
344 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
347 return 1;
350 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
351 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
353 if (dt == vect_unknown_def_type)
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
357 "Unsupported pattern.\n");
358 return -1;
361 switch (gimple_code (def_stmt))
363 case GIMPLE_PHI:
364 def = gimple_phi_result (def_stmt);
365 break;
367 case GIMPLE_ASSIGN:
368 def = gimple_assign_lhs (def_stmt);
369 break;
371 default:
372 if (dump_enabled_p ())
373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
374 "unsupported defining stmt:\n");
375 return -1;
379 if (first)
381 oprnd_info->first_dt = dt;
382 oprnd_info->first_pattern = pattern;
383 oprnd_info->first_op_type = TREE_TYPE (oprnd);
385 else
387 /* Not first stmt of the group, check that the def-stmt/s match
388 the def-stmt/s of the first stmt. Allow different definition
389 types for reduction chains: the first stmt must be a
390 vect_reduction_def (a phi node), and the rest
391 vect_internal_def. */
392 if (((oprnd_info->first_dt != dt
393 && !(oprnd_info->first_dt == vect_reduction_def
394 && dt == vect_internal_def)
395 && !((oprnd_info->first_dt == vect_external_def
396 || oprnd_info->first_dt == vect_constant_def)
397 && (dt == vect_external_def
398 || dt == vect_constant_def)))
399 || !types_compatible_p (oprnd_info->first_op_type,
400 TREE_TYPE (oprnd))))
402 /* Try swapping operands if we got a mismatch. */
403 if (i == 0
404 && !swapped
405 && commutative)
407 swapped = true;
408 goto again;
411 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
413 "Build SLP failed: different types\n");
415 return 1;
419 /* Check the types of the definitions. */
420 switch (dt)
422 case vect_constant_def:
423 case vect_external_def:
424 case vect_reduction_def:
425 break;
427 case vect_internal_def:
428 oprnd_info->def_stmts.quick_push (def_stmt);
429 break;
431 default:
432 /* FORNOW: Not supported. */
433 if (dump_enabled_p ())
435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
436 "Build SLP failed: illegal type of def ");
437 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
438 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
441 return -1;
445 /* Swap operands. */
446 if (swapped)
448 if (first_op_cond)
450 tree cond = gimple_assign_rhs1 (stmt);
451 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
452 &TREE_OPERAND (cond, 1));
453 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
455 else
456 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
457 gimple_assign_rhs2_ptr (stmt));
460 return 0;
464 /* Verify if the scalar stmts STMTS are isomorphic, require data
465 permutation or are of unsupported types of operation. Return
466 true if they are, otherwise return false and indicate in *MATCHES
467 which stmts are not isomorphic to the first one. If MATCHES[0]
468 is false then this indicates the comparison could not be
469 carried out or the stmts will never be vectorized by SLP. */
471 static bool
472 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
473 vec<gimple> stmts, unsigned int group_size,
474 unsigned nops, unsigned int *max_nunits,
475 unsigned int vectorization_factor, bool *matches)
477 unsigned int i;
478 gimple stmt = stmts[0];
479 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
480 enum tree_code first_cond_code = ERROR_MARK;
481 tree lhs;
482 bool need_same_oprnds = false;
483 tree vectype, scalar_type, first_op1 = NULL_TREE;
484 optab optab;
485 int icode;
486 machine_mode optab_op2_mode;
487 machine_mode vec_mode;
488 struct data_reference *first_dr;
489 HOST_WIDE_INT dummy;
490 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
491 tree cond;
493 /* For every stmt in NODE find its def stmt/s. */
494 FOR_EACH_VEC_ELT (stmts, i, stmt)
496 matches[i] = false;
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
501 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
502 dump_printf (MSG_NOTE, "\n");
505 /* Fail to vectorize statements marked as unvectorizable. */
506 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
508 if (dump_enabled_p ())
510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511 "Build SLP failed: unvectorizable statement ");
512 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
513 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
515 /* Fatal mismatch. */
516 matches[0] = false;
517 return false;
520 lhs = gimple_get_lhs (stmt);
521 if (lhs == NULL_TREE)
523 if (dump_enabled_p ())
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "Build SLP failed: not GIMPLE_ASSIGN nor "
527 "GIMPLE_CALL ");
528 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
529 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
531 /* Fatal mismatch. */
532 matches[0] = false;
533 return false;
536 if (is_gimple_assign (stmt)
537 && gimple_assign_rhs_code (stmt) == COND_EXPR
538 && (cond = gimple_assign_rhs1 (stmt))
539 && !COMPARISON_CLASS_P (cond))
541 if (dump_enabled_p ())
543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
544 "Build SLP failed: condition is not "
545 "comparison ");
546 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
547 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
549 /* Fatal mismatch. */
550 matches[0] = false;
551 return false;
554 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
555 vectype = get_vectype_for_scalar_type (scalar_type);
556 if (!vectype)
558 if (dump_enabled_p ())
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type ");
562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
563 scalar_type);
564 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
566 /* Fatal mismatch. */
567 matches[0] = false;
568 return false;
571 /* In case of multiple types we need to detect the smallest type. */
572 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
574 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
575 if (bb_vinfo)
576 vectorization_factor = *max_nunits;
579 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
581 rhs_code = CALL_EXPR;
582 if (gimple_call_internal_p (call_stmt)
583 || gimple_call_tail_p (call_stmt)
584 || gimple_call_noreturn_p (call_stmt)
585 || !gimple_call_nothrow_p (call_stmt)
586 || gimple_call_chain (call_stmt))
588 if (dump_enabled_p ())
590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
591 "Build SLP failed: unsupported call type ");
592 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
593 call_stmt, 0);
594 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
596 /* Fatal mismatch. */
597 matches[0] = false;
598 return false;
601 else
602 rhs_code = gimple_assign_rhs_code (stmt);
604 /* Check the operation. */
605 if (i == 0)
607 first_stmt_code = rhs_code;
609 /* Shift arguments should be equal in all the packed stmts for a
610 vector shift with scalar shift operand. */
611 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
612 || rhs_code == LROTATE_EXPR
613 || rhs_code == RROTATE_EXPR)
615 vec_mode = TYPE_MODE (vectype);
617 /* First see if we have a vector/vector shift. */
618 optab = optab_for_tree_code (rhs_code, vectype,
619 optab_vector);
621 if (!optab
622 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
624 /* No vector/vector shift, try for a vector/scalar shift. */
625 optab = optab_for_tree_code (rhs_code, vectype,
626 optab_scalar);
628 if (!optab)
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
632 "Build SLP failed: no optab.\n");
633 /* Fatal mismatch. */
634 matches[0] = false;
635 return false;
637 icode = (int) optab_handler (optab, vec_mode);
638 if (icode == CODE_FOR_nothing)
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: "
643 "op not supported by target.\n");
644 /* Fatal mismatch. */
645 matches[0] = false;
646 return false;
648 optab_op2_mode = insn_data[icode].operand[2].mode;
649 if (!VECTOR_MODE_P (optab_op2_mode))
651 need_same_oprnds = true;
652 first_op1 = gimple_assign_rhs2 (stmt);
656 else if (rhs_code == WIDEN_LSHIFT_EXPR)
658 need_same_oprnds = true;
659 first_op1 = gimple_assign_rhs2 (stmt);
662 else
664 if (first_stmt_code != rhs_code
665 && (first_stmt_code != IMAGPART_EXPR
666 || rhs_code != REALPART_EXPR)
667 && (first_stmt_code != REALPART_EXPR
668 || rhs_code != IMAGPART_EXPR)
669 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
670 && (first_stmt_code == ARRAY_REF
671 || first_stmt_code == BIT_FIELD_REF
672 || first_stmt_code == INDIRECT_REF
673 || first_stmt_code == COMPONENT_REF
674 || first_stmt_code == MEM_REF)))
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: different operation "
680 "in stmt ");
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
682 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
684 /* Mismatch. */
685 continue;
688 if (need_same_oprnds
689 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
691 if (dump_enabled_p ())
693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
694 "Build SLP failed: different shift "
695 "arguments in ");
696 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
699 /* Mismatch. */
700 continue;
703 if (rhs_code == CALL_EXPR)
705 gimple first_stmt = stmts[0];
706 if (gimple_call_num_args (stmt) != nops
707 || !operand_equal_p (gimple_call_fn (first_stmt),
708 gimple_call_fn (stmt), 0)
709 || gimple_call_fntype (first_stmt)
710 != gimple_call_fntype (stmt))
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "Build SLP failed: different calls in ");
716 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
717 stmt, 0);
718 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
720 /* Mismatch. */
721 continue;
726 /* Grouped store or load. */
727 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
729 if (REFERENCE_CLASS_P (lhs))
731 /* Store. */
734 else
736 /* Load. */
737 unsigned unrolling_factor
738 = least_common_multiple
739 (*max_nunits, group_size) / group_size;
740 /* FORNOW: Check that there is no gap between the loads
741 and no gap between the groups when we need to load
742 multiple groups at once.
743 ??? We should enhance this to only disallow gaps
744 inside vectors. */
745 if ((unrolling_factor > 1
746 && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
747 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
748 /* If the group is split up then GROUP_GAP
749 isn't correct here, nor is GROUP_FIRST_ELEMENT. */
750 || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
751 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
752 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: grouped "
758 "loads have gaps ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
760 stmt, 0);
761 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
763 /* Fatal mismatch. */
764 matches[0] = false;
765 return false;
768 /* Check that the size of interleaved loads group is not
769 greater than the SLP group size. */
770 unsigned ncopies
771 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
772 if (loop_vinfo
773 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
774 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
775 - GROUP_GAP (vinfo_for_stmt (stmt)))
776 > ncopies * group_size))
778 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781 "Build SLP failed: the number "
782 "of interleaved loads is greater than "
783 "the SLP group size ");
784 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
785 stmt, 0);
786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
788 /* Fatal mismatch. */
789 matches[0] = false;
790 return false;
793 old_first_load = first_load;
794 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
795 if (prev_first_load)
797 /* Check that there are no loads from different interleaving
798 chains in the same node. */
799 if (prev_first_load != first_load)
801 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
804 vect_location,
805 "Build SLP failed: different "
806 "interleaving chains in one node ");
807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
808 stmt, 0);
809 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
811 /* Mismatch. */
812 continue;
815 else
816 prev_first_load = first_load;
818 /* In some cases a group of loads is just the same load
819 repeated N times. Only analyze its cost once. */
820 if (first_load == stmt && old_first_load != first_load)
822 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
823 if (vect_supportable_dr_alignment (first_dr, false)
824 == dr_unaligned_unsupported)
826 if (dump_enabled_p ())
828 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 vect_location,
830 "Build SLP failed: unsupported "
831 "unaligned load ");
832 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
833 stmt, 0);
834 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
836 /* Fatal mismatch. */
837 matches[0] = false;
838 return false;
842 } /* Grouped access. */
843 else
845 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
847 /* Not grouped load. */
848 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: not grouped load ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
853 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
856 /* FORNOW: Not grouped loads are not supported. */
857 /* Fatal mismatch. */
858 matches[0] = false;
859 return false;
862 /* Not memory operation. */
863 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
864 && TREE_CODE_CLASS (rhs_code) != tcc_unary
865 && rhs_code != COND_EXPR
866 && rhs_code != CALL_EXPR)
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "Build SLP failed: operation");
872 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
874 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
876 /* Fatal mismatch. */
877 matches[0] = false;
878 return false;
881 if (rhs_code == COND_EXPR)
883 tree cond_expr = gimple_assign_rhs1 (stmt);
885 if (i == 0)
886 first_cond_code = TREE_CODE (cond_expr);
887 else if (first_cond_code != TREE_CODE (cond_expr))
889 if (dump_enabled_p ())
891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
892 "Build SLP failed: different"
893 " operation");
894 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
895 stmt, 0);
896 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
898 /* Mismatch. */
899 continue;
904 matches[i] = true;
907 for (i = 0; i < group_size; ++i)
908 if (!matches[i])
909 return false;
911 return true;
914 /* Recursively build an SLP tree starting from NODE.
915 Fail (and return a value not equal to zero) if def-stmts are not
916 isomorphic, require data permutation or are of unsupported types of
917 operation. Otherwise, return 0.
918 The value returned is the depth in the SLP tree where a mismatch
919 was found. */
921 static bool
922 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
923 slp_tree *node, unsigned int group_size,
924 unsigned int *max_nunits,
925 vec<slp_tree> *loads,
926 unsigned int vectorization_factor,
927 bool *matches, unsigned *npermutes, unsigned *tree_size,
928 unsigned max_tree_size)
930 unsigned nops, i, this_tree_size = 0;
931 gimple stmt;
933 matches[0] = false;
935 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
936 if (is_gimple_call (stmt))
937 nops = gimple_call_num_args (stmt);
938 else if (is_gimple_assign (stmt))
940 nops = gimple_num_ops (stmt) - 1;
941 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
942 nops++;
944 else
945 return false;
947 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
948 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
949 max_nunits, vectorization_factor, matches))
950 return false;
952 /* If the SLP node is a load, terminate the recursion. */
953 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
954 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
956 loads->safe_push (*node);
957 return true;
960 /* Get at the operands, verifying they are compatible. */
961 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
962 slp_oprnd_info oprnd_info;
963 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
965 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
966 stmt, (i == 0), &oprnds_info))
968 case 0:
969 break;
970 case -1:
971 matches[0] = false;
972 vect_free_oprnd_info (oprnds_info);
973 return false;
974 case 1:
975 matches[i] = false;
976 break;
979 for (i = 0; i < group_size; ++i)
980 if (!matches[i])
982 vect_free_oprnd_info (oprnds_info);
983 return false;
986 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
988 /* Create SLP_TREE nodes for the definition node/s. */
989 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
991 slp_tree child;
992 unsigned old_nloads = loads->length ();
993 unsigned old_max_nunits = *max_nunits;
995 if (oprnd_info->first_dt != vect_internal_def)
996 continue;
998 if (++this_tree_size > max_tree_size)
1000 vect_free_oprnd_info (oprnds_info);
1001 return false;
1004 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1005 if (!child)
1007 vect_free_oprnd_info (oprnds_info);
1008 return false;
1011 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1012 group_size, max_nunits, loads,
1013 vectorization_factor, matches,
1014 npermutes, &this_tree_size, max_tree_size))
1016 oprnd_info->def_stmts = vNULL;
1017 SLP_TREE_CHILDREN (*node).quick_push (child);
1018 continue;
1021 /* If the SLP build failed fatally and we analyze a basic-block
1022 simply treat nodes we fail to build as externally defined
1023 (and thus build vectors from the scalar defs).
1024 The cost model will reject outright expensive cases.
1025 ??? This doesn't treat cases where permutation ultimatively
1026 fails (or we don't try permutation below). Ideally we'd
1027 even compute a permutation that will end up with the maximum
1028 SLP tree size... */
1029 if (bb_vinfo
1030 && !matches[0]
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1033 scalar version. */
1034 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "Building vector operands from scalars\n");
1038 oprnd_info->def_stmts = vNULL;
1039 vect_free_slp_tree (child);
1040 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1041 continue;
1044 /* If the SLP build for operand zero failed and operand zero
1045 and one can be commutated try that for the scalar stmts
1046 that failed the match. */
1047 if (i == 0
1048 /* A first scalar stmt mismatch signals a fatal mismatch. */
1049 && matches[0]
1050 /* ??? For COND_EXPRs we can swap the comparison operands
1051 as well as the arms under some constraints. */
1052 && nops == 2
1053 && oprnds_info[1]->first_dt == vect_internal_def
1054 && is_gimple_assign (stmt)
1055 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1056 /* Do so only if the number of not successful permutes was nor more
1057 than a cut-ff as re-trying the recursive match on
1058 possibly each level of the tree would expose exponential
1059 behavior. */
1060 && *npermutes < 4)
1062 unsigned int j;
1063 slp_tree grandchild;
1065 /* Roll back. */
1066 *max_nunits = old_max_nunits;
1067 loads->truncate (old_nloads);
1068 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1069 vect_free_slp_tree (grandchild);
1070 SLP_TREE_CHILDREN (child).truncate (0);
1072 /* Swap mismatched definition stmts. */
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "Re-trying with swapped operands of stmts ");
1075 for (j = 0; j < group_size; ++j)
1076 if (!matches[j])
1078 gimple tem = oprnds_info[0]->def_stmts[j];
1079 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1080 oprnds_info[1]->def_stmts[j] = tem;
1081 dump_printf (MSG_NOTE, "%d ", j);
1083 dump_printf (MSG_NOTE, "\n");
1084 /* And try again with scratch 'matches' ... */
1085 bool *tem = XALLOCAVEC (bool, group_size);
1086 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1087 group_size, max_nunits, loads,
1088 vectorization_factor,
1089 tem, npermutes, &this_tree_size,
1090 max_tree_size))
1092 /* ... so if successful we can apply the operand swapping
1093 to the GIMPLE IL. This is necessary because for example
1094 vect_get_slp_defs uses operand indexes and thus expects
1095 canonical operand order. */
1096 for (j = 0; j < group_size; ++j)
1097 if (!matches[j])
1099 gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1100 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1101 gimple_assign_rhs2_ptr (stmt));
1103 oprnd_info->def_stmts = vNULL;
1104 SLP_TREE_CHILDREN (*node).quick_push (child);
1105 continue;
1108 ++*npermutes;
1111 oprnd_info->def_stmts = vNULL;
1112 vect_free_slp_tree (child);
1113 vect_free_oprnd_info (oprnds_info);
1114 return false;
1117 if (tree_size)
1118 *tree_size += this_tree_size;
1120 vect_free_oprnd_info (oprnds_info);
1121 return true;
1124 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1126 static void
1127 vect_print_slp_tree (int dump_kind, slp_tree node)
1129 int i;
1130 gimple stmt;
1131 slp_tree child;
1133 if (!node)
1134 return;
1136 dump_printf (dump_kind, "node ");
1137 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1139 dump_printf (dump_kind, "\n\tstmt %d ", i);
1140 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1142 dump_printf (dump_kind, "\n");
1144 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1145 vect_print_slp_tree (dump_kind, child);
1149 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1150 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1151 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1152 stmts in NODE are to be marked. */
1154 static void
1155 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1157 int i;
1158 gimple stmt;
1159 slp_tree child;
1161 if (!node)
1162 return;
1164 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1165 if (j < 0 || i == j)
1166 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1168 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1169 vect_mark_slp_stmts (child, mark, j);
1173 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1175 static void
1176 vect_mark_slp_stmts_relevant (slp_tree node)
1178 int i;
1179 gimple stmt;
1180 stmt_vec_info stmt_info;
1181 slp_tree child;
1183 if (!node)
1184 return;
1186 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1188 stmt_info = vinfo_for_stmt (stmt);
1189 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1190 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1191 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1194 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1195 vect_mark_slp_stmts_relevant (child);
1199 /* Rearrange the statements of NODE according to PERMUTATION. */
1201 static void
1202 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1203 vec<unsigned> permutation)
1205 gimple stmt;
1206 vec<gimple> tmp_stmts;
1207 unsigned int i;
1208 slp_tree child;
1210 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1211 vect_slp_rearrange_stmts (child, group_size, permutation);
1213 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1214 tmp_stmts.create (group_size);
1215 tmp_stmts.quick_grow_cleared (group_size);
1217 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1218 tmp_stmts[permutation[i]] = stmt;
1220 SLP_TREE_SCALAR_STMTS (node).release ();
1221 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1225 /* Check if the required load permutations in the SLP instance
1226 SLP_INSTN are supported. */
1228 static bool
1229 vect_supported_load_permutation_p (slp_instance slp_instn)
1231 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1232 unsigned int i, j, k, next;
1233 sbitmap load_index;
1234 slp_tree node;
1235 gimple stmt, load, next_load, first_load;
1236 struct data_reference *dr;
1238 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1241 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1242 if (node->load_permutation.exists ())
1243 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1244 dump_printf (MSG_NOTE, "%d ", next);
1245 else
1246 for (k = 0; k < group_size; ++k)
1247 dump_printf (MSG_NOTE, "%d ", k);
1248 dump_printf (MSG_NOTE, "\n");
1251 /* In case of reduction every load permutation is allowed, since the order
1252 of the reduction statements is not important (as opposed to the case of
1253 grouped stores). The only condition we need to check is that all the
1254 load nodes are of the same size and have the same permutation (and then
1255 rearrange all the nodes of the SLP instance according to this
1256 permutation). */
1258 /* Check that all the load nodes are of the same size. */
1259 /* ??? Can't we assert this? */
1260 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1261 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1262 return false;
1264 node = SLP_INSTANCE_TREE (slp_instn);
1265 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1267 /* Reduction (there are no data-refs in the root).
1268 In reduction chain the order of the loads is important. */
1269 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1270 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1272 slp_tree load;
1273 unsigned int lidx;
1275 /* Compare all the permutation sequences to the first one. We know
1276 that at least one load is permuted. */
1277 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1278 if (!node->load_permutation.exists ())
1279 return false;
1280 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1282 if (!load->load_permutation.exists ())
1283 return false;
1284 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1285 if (lidx != node->load_permutation[j])
1286 return false;
1289 /* Check that the loads in the first sequence are different and there
1290 are no gaps between them. */
1291 load_index = sbitmap_alloc (group_size);
1292 bitmap_clear (load_index);
1293 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1295 if (bitmap_bit_p (load_index, lidx))
1297 sbitmap_free (load_index);
1298 return false;
1300 bitmap_set_bit (load_index, lidx);
1302 for (i = 0; i < group_size; i++)
1303 if (!bitmap_bit_p (load_index, i))
1305 sbitmap_free (load_index);
1306 return false;
1308 sbitmap_free (load_index);
1310 /* This permutation is valid for reduction. Since the order of the
1311 statements in the nodes is not important unless they are memory
1312 accesses, we can rearrange the statements in all the nodes
1313 according to the order of the loads. */
1314 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1315 node->load_permutation);
1317 /* We are done, no actual permutations need to be generated. */
1318 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1319 SLP_TREE_LOAD_PERMUTATION (node).release ();
1320 return true;
1323 /* In basic block vectorization we allow any subchain of an interleaving
1324 chain.
1325 FORNOW: not supported in loop SLP because of realignment compications. */
1326 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1328 /* Check that for every node in the instance the loads
1329 form a subchain. */
1330 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1332 next_load = NULL;
1333 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1335 if (j != 0 && next_load != load)
1336 return false;
1337 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1341 /* Check that the alignment of the first load in every subchain, i.e.,
1342 the first statement in every load node, is supported.
1343 ??? This belongs in alignment checking. */
1344 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1346 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1347 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1349 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1350 if (vect_supportable_dr_alignment (dr, false)
1351 == dr_unaligned_unsupported)
1353 if (dump_enabled_p ())
1355 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1356 vect_location,
1357 "unsupported unaligned load ");
1358 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1359 first_load, 0);
1360 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1362 return false;
1367 /* We are done, no actual permutations need to be generated. */
1368 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1369 SLP_TREE_LOAD_PERMUTATION (node).release ();
1370 return true;
1373 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1374 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1375 well (unless it's reduction). */
1376 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1377 return false;
1378 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1379 if (!node->load_permutation.exists ())
1380 return false;
1382 load_index = sbitmap_alloc (group_size);
1383 bitmap_clear (load_index);
1384 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1386 unsigned int lidx = node->load_permutation[0];
1387 if (bitmap_bit_p (load_index, lidx))
1389 sbitmap_free (load_index);
1390 return false;
1392 bitmap_set_bit (load_index, lidx);
1393 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1394 if (k != lidx)
1396 sbitmap_free (load_index);
1397 return false;
1400 for (i = 0; i < group_size; i++)
1401 if (!bitmap_bit_p (load_index, i))
1403 sbitmap_free (load_index);
1404 return false;
1406 sbitmap_free (load_index);
1408 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1409 if (node->load_permutation.exists ()
1410 && !vect_transform_slp_perm_load
1411 (node, vNULL, NULL,
1412 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1413 return false;
1414 return true;
1418 /* Find the last store in SLP INSTANCE. */
1420 static gimple
1421 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1423 gimple last = NULL, stmt;
1425 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1427 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1428 if (is_pattern_stmt_p (stmt_vinfo))
1429 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1430 else
1431 last = get_later_stmt (stmt, last);
1434 return last;
1437 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1439 static void
1440 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1441 slp_instance instance, slp_tree node,
1442 stmt_vector_for_cost *prologue_cost_vec,
1443 unsigned ncopies_for_cost)
1445 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1447 unsigned i;
1448 slp_tree child;
1449 gimple stmt, s;
1450 stmt_vec_info stmt_info;
1451 tree lhs;
1452 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1454 /* Recurse down the SLP tree. */
1455 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1456 if (child)
1457 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1458 instance, child, prologue_cost_vec,
1459 ncopies_for_cost);
1461 /* Look at the first scalar stmt to determine the cost. */
1462 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1463 stmt_info = vinfo_for_stmt (stmt);
1464 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1466 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1467 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1468 vect_uninitialized_def,
1469 node, prologue_cost_vec, body_cost_vec);
1470 else
1472 int i;
1473 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1474 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1475 node, prologue_cost_vec, body_cost_vec);
1476 /* If the load is permuted record the cost for the permutation.
1477 ??? Loads from multiple chains are let through here only
1478 for a single special case involving complex numbers where
1479 in the end no permutation is necessary. */
1480 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1481 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1482 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1483 && vect_get_place_in_interleaving_chain
1484 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1486 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1487 stmt_info, 0, vect_body);
1488 break;
1492 else
1493 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1494 stmt_info, 0, vect_body);
1496 /* Scan operands and account for prologue cost of constants/externals.
1497 ??? This over-estimates cost for multiple uses and should be
1498 re-engineered. */
1499 lhs = gimple_get_lhs (stmt);
1500 for (i = 0; i < gimple_num_ops (stmt); ++i)
1502 tree def, op = gimple_op (stmt, i);
1503 gimple def_stmt;
1504 enum vect_def_type dt;
1505 if (!op || op == lhs)
1506 continue;
1507 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1508 &def_stmt, &def, &dt))
1510 /* Without looking at the actual initializer a vector of
1511 constants can be implemented as load from the constant pool.
1512 ??? We need to pass down stmt_info for a vector type
1513 even if it points to the wrong stmt. */
1514 if (dt == vect_constant_def)
1515 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1516 stmt_info, 0, vect_prologue);
1517 else if (dt == vect_external_def)
1518 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1519 stmt_info, 0, vect_prologue);
1524 /* Compute the cost for the SLP instance INSTANCE. */
1526 static void
1527 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1528 slp_instance instance, unsigned nunits)
1530 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1531 unsigned ncopies_for_cost;
1532 stmt_info_for_cost *si;
1533 unsigned i;
1535 /* Calculate the number of vector stmts to create based on the unrolling
1536 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1537 GROUP_SIZE / NUNITS otherwise. */
1538 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1539 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1541 prologue_cost_vec.create (10);
1542 body_cost_vec.create (10);
1543 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1544 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1545 instance, SLP_INSTANCE_TREE (instance),
1546 &prologue_cost_vec, ncopies_for_cost);
1548 /* Record the prologue costs, which were delayed until we were
1549 sure that SLP was successful. Unlike the body costs, we know
1550 the final values now regardless of the loop vectorization factor. */
1551 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1552 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1553 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1555 struct _stmt_vec_info *stmt_info
1556 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1557 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1558 si->misalign, vect_prologue);
1561 prologue_cost_vec.release ();
1564 /* Analyze an SLP instance starting from a group of grouped stores. Call
1565 vect_build_slp_tree to build a tree of packed stmts if possible.
1566 Return FALSE if it's impossible to SLP any stmt in the loop. */
1568 static bool
1569 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1570 gimple stmt, unsigned max_tree_size)
1572 slp_instance new_instance;
1573 slp_tree node;
1574 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1575 unsigned int unrolling_factor = 1, nunits;
1576 tree vectype, scalar_type = NULL_TREE;
1577 gimple next;
1578 unsigned int vectorization_factor = 0;
1579 int i;
1580 unsigned int max_nunits = 0;
1581 vec<slp_tree> loads;
1582 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1583 vec<gimple> scalar_stmts;
1585 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1587 if (dr)
1589 scalar_type = TREE_TYPE (DR_REF (dr));
1590 vectype = get_vectype_for_scalar_type (scalar_type);
1592 else
1594 gcc_assert (loop_vinfo);
1595 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1598 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1600 else
1602 gcc_assert (loop_vinfo);
1603 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1604 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1607 if (!vectype)
1609 if (dump_enabled_p ())
1611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1612 "Build SLP failed: unsupported data-type ");
1613 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1614 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1617 return false;
1620 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1621 if (loop_vinfo)
1622 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1623 else
1624 vectorization_factor = nunits;
1626 /* Calculate the unrolling factor. */
1627 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1628 if (unrolling_factor != 1 && !loop_vinfo)
1630 if (dump_enabled_p ())
1631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1632 "Build SLP failed: unrolling required in basic"
1633 " block SLP\n");
1635 return false;
1638 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1639 scalar_stmts.create (group_size);
1640 next = stmt;
1641 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1643 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1644 while (next)
1646 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1647 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1648 scalar_stmts.safe_push (
1649 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1650 else
1651 scalar_stmts.safe_push (next);
1652 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1655 else
1657 /* Collect reduction statements. */
1658 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1659 for (i = 0; reductions.iterate (i, &next); i++)
1660 scalar_stmts.safe_push (next);
1663 node = vect_create_new_slp_node (scalar_stmts);
1665 loads.create (group_size);
1667 /* Build the tree for the SLP instance. */
1668 bool *matches = XALLOCAVEC (bool, group_size);
1669 unsigned npermutes = 0;
1670 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1671 &max_nunits, &loads,
1672 vectorization_factor, matches, &npermutes, NULL,
1673 max_tree_size))
1675 /* Calculate the unrolling factor based on the smallest type. */
1676 if (max_nunits > nunits)
1677 unrolling_factor = least_common_multiple (max_nunits, group_size)
1678 / group_size;
1680 if (unrolling_factor != 1 && !loop_vinfo)
1682 if (dump_enabled_p ())
1683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1684 "Build SLP failed: unrolling required in basic"
1685 " block SLP\n");
1686 vect_free_slp_tree (node);
1687 loads.release ();
1688 return false;
1691 /* Create a new SLP instance. */
1692 new_instance = XNEW (struct _slp_instance);
1693 SLP_INSTANCE_TREE (new_instance) = node;
1694 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1695 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1696 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1697 SLP_INSTANCE_LOADS (new_instance) = loads;
1699 /* Compute the load permutation. */
1700 slp_tree load_node;
1701 bool loads_permuted = false;
1702 FOR_EACH_VEC_ELT (loads, i, load_node)
1704 vec<unsigned> load_permutation;
1705 int j;
1706 gimple load, first_stmt;
1707 bool this_load_permuted = false;
1708 load_permutation.create (group_size);
1709 first_stmt = GROUP_FIRST_ELEMENT
1710 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1711 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1713 int load_place
1714 = vect_get_place_in_interleaving_chain (load, first_stmt);
1715 gcc_assert (load_place != -1);
1716 if (load_place != j)
1717 this_load_permuted = true;
1718 load_permutation.safe_push (load_place);
1720 if (!this_load_permuted)
1722 load_permutation.release ();
1723 continue;
1725 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1726 loads_permuted = true;
1729 if (loads_permuted)
1731 if (!vect_supported_load_permutation_p (new_instance))
1733 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "Build SLP failed: unsupported load "
1737 "permutation ");
1738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1739 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1741 vect_free_slp_instance (new_instance);
1742 return false;
1747 if (loop_vinfo)
1749 /* Compute the costs of this SLP instance. Delay this for BB
1750 vectorization as we don't have vector types computed yet. */
1751 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1752 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1753 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1755 else
1756 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1758 if (dump_enabled_p ())
1759 vect_print_slp_tree (MSG_NOTE, node);
1761 return true;
1764 /* Failed to SLP. */
1765 /* Free the allocated memory. */
1766 vect_free_slp_tree (node);
1767 loads.release ();
1769 return false;
1773 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1774 trees of packed scalar stmts if SLP is possible. */
1776 bool
1777 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1778 unsigned max_tree_size)
1780 unsigned int i;
1781 vec<gimple> grouped_stores;
1782 vec<gimple> reductions = vNULL;
1783 vec<gimple> reduc_chains = vNULL;
1784 gimple first_element;
1785 bool ok = false;
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1790 if (loop_vinfo)
1792 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1793 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1794 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1796 else
1797 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1799 /* Find SLP sequences starting from groups of grouped stores. */
1800 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1801 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1802 max_tree_size))
1803 ok = true;
1805 if (bb_vinfo && !ok)
1807 if (dump_enabled_p ())
1808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1809 "Failed to SLP the basic block.\n");
1811 return false;
1814 if (loop_vinfo
1815 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1817 /* Find SLP sequences starting from reduction chains. */
1818 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1819 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1820 max_tree_size))
1821 ok = true;
1822 else
1823 return false;
1825 /* Don't try to vectorize SLP reductions if reduction chain was
1826 detected. */
1827 return ok;
1830 /* Find SLP sequences starting from groups of reductions. */
1831 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1832 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1833 max_tree_size))
1834 ok = true;
1836 return true;
1840 /* For each possible SLP instance decide whether to SLP it and calculate overall
1841 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1842 least one instance. */
1844 bool
1845 vect_make_slp_decision (loop_vec_info loop_vinfo)
1847 unsigned int i, unrolling_factor = 1;
1848 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1849 slp_instance instance;
1850 int decided_to_slp = 0;
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1854 "\n");
1856 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1858 /* FORNOW: SLP if you can. */
1859 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1860 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1862 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1863 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1864 loop-based vectorization. Such stmts will be marked as HYBRID. */
1865 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1866 decided_to_slp++;
1869 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1871 if (decided_to_slp && dump_enabled_p ())
1872 dump_printf_loc (MSG_NOTE, vect_location,
1873 "Decided to SLP %d instances. Unrolling factor %d\n",
1874 decided_to_slp, unrolling_factor);
1876 return (decided_to_slp > 0);
1880 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1881 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1883 static void
1884 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1886 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1887 imm_use_iterator imm_iter;
1888 gimple use_stmt;
1889 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
1890 slp_tree child;
1891 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1892 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1893 int j;
1895 /* Propagate hybrid down the SLP tree. */
1896 if (stype == hybrid)
1898 else if (HYBRID_SLP_STMT (stmt_vinfo))
1899 stype = hybrid;
1900 else
1902 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1903 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
1904 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1905 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1906 if (gimple_bb (use_stmt)
1907 && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1908 && (use_vinfo = vinfo_for_stmt (use_stmt))
1909 && !STMT_SLP_TYPE (use_vinfo)
1910 && (STMT_VINFO_RELEVANT (use_vinfo)
1911 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))
1912 || (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1913 && STMT_VINFO_RELATED_STMT (use_vinfo)
1914 && !STMT_SLP_TYPE (vinfo_for_stmt
1915 (STMT_VINFO_RELATED_STMT (use_vinfo)))))
1916 && !(gimple_code (use_stmt) == GIMPLE_PHI
1917 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
1918 stype = hybrid;
1921 if (stype == hybrid)
1922 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
1924 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
1925 if (child)
1926 vect_detect_hybrid_slp_stmts (child, i, stype);
1929 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
1931 static tree
1932 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
1934 walk_stmt_info *wi = (walk_stmt_info *)data;
1935 struct loop *loopp = (struct loop *)wi->info;
1937 if (wi->is_lhs)
1938 return NULL_TREE;
1940 if (TREE_CODE (*tp) == SSA_NAME
1941 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
1943 gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
1944 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
1945 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
1946 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
1949 return NULL_TREE;
1952 static tree
1953 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
1954 walk_stmt_info *)
1956 /* If the stmt is in a SLP instance then this isn't a reason
1957 to mark use definitions in other SLP instances as hybrid. */
1958 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
1959 *handled = true;
1960 return NULL_TREE;
1963 /* Find stmts that must be both vectorized and SLPed. */
1965 void
1966 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1968 unsigned int i;
1969 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1970 slp_instance instance;
1972 if (dump_enabled_p ())
1973 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1974 "\n");
1976 /* First walk all pattern stmt in the loop and mark defs of uses as
1977 hybrid because immediate uses in them are not recorded. */
1978 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
1980 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
1981 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1982 gsi_next (&gsi))
1984 gimple stmt = gsi_stmt (gsi);
1985 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1986 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1988 walk_stmt_info wi;
1989 memset (&wi, 0, sizeof (wi));
1990 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
1991 gimple_stmt_iterator gsi2
1992 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1993 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
1994 vect_detect_hybrid_slp_1, &wi);
1995 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
1996 vect_detect_hybrid_slp_2,
1997 vect_detect_hybrid_slp_1, &wi);
2002 /* Then walk the SLP instance trees marking stmts with uses in
2003 non-SLP stmts as hybrid, also propagating hybrid down the
2004 SLP tree, collecting the above info on-the-fly. */
2005 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2007 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2008 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2009 i, pure_slp);
2014 /* Create and initialize a new bb_vec_info struct for BB, as well as
2015 stmt_vec_info structs for all the stmts in it. */
2017 static bb_vec_info
2018 new_bb_vec_info (basic_block bb)
2020 bb_vec_info res = NULL;
2021 gimple_stmt_iterator gsi;
2023 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2024 BB_VINFO_BB (res) = bb;
2026 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2028 gimple stmt = gsi_stmt (gsi);
2029 gimple_set_uid (stmt, 0);
2030 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2033 BB_VINFO_GROUPED_STORES (res).create (10);
2034 BB_VINFO_SLP_INSTANCES (res).create (2);
2035 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2037 bb->aux = res;
2038 return res;
2042 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2043 stmts in the basic block. */
2045 static void
2046 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2048 vec<slp_instance> slp_instances;
2049 slp_instance instance;
2050 basic_block bb;
2051 gimple_stmt_iterator si;
2052 unsigned i;
2054 if (!bb_vinfo)
2055 return;
2057 bb = BB_VINFO_BB (bb_vinfo);
2059 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2061 gimple stmt = gsi_stmt (si);
2062 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2064 if (stmt_info)
2065 /* Free stmt_vec_info. */
2066 free_stmt_vec_info (stmt);
2069 vect_destroy_datarefs (NULL, bb_vinfo);
2070 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2071 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2072 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2073 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2074 vect_free_slp_instance (instance);
2075 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2076 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2077 free (bb_vinfo);
2078 bb->aux = NULL;
2082 /* Analyze statements contained in SLP tree node after recursively analyzing
2083 the subtree. Return TRUE if the operations are supported. */
2085 static bool
2086 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
2088 bool dummy;
2089 int i;
2090 gimple stmt;
2091 slp_tree child;
2093 if (!node)
2094 return true;
2096 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2097 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
2098 return false;
2100 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2102 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2103 gcc_assert (stmt_info);
2104 gcc_assert (PURE_SLP_STMT (stmt_info));
2106 if (!vect_analyze_stmt (stmt, &dummy, node))
2107 return false;
2110 return true;
2114 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2115 operations are supported. */
2117 static bool
2118 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2120 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2121 slp_instance instance;
2122 int i;
2124 for (i = 0; slp_instances.iterate (i, &instance); )
2126 if (!vect_slp_analyze_node_operations (bb_vinfo,
2127 SLP_INSTANCE_TREE (instance)))
2129 vect_free_slp_instance (instance);
2130 slp_instances.ordered_remove (i);
2132 else
2133 i++;
2136 if (!slp_instances.length ())
2137 return false;
2139 return true;
2143 /* Compute the scalar cost of the SLP node NODE and its children
2144 and return it. Do not account defs that are marked in LIFE and
2145 update LIFE according to uses of NODE. */
2147 static unsigned
2148 vect_bb_slp_scalar_cost (basic_block bb,
2149 slp_tree node, vec<bool, va_heap> *life)
2151 unsigned scalar_cost = 0;
2152 unsigned i;
2153 gimple stmt;
2154 slp_tree child;
2156 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2158 unsigned stmt_cost;
2159 ssa_op_iter op_iter;
2160 def_operand_p def_p;
2161 stmt_vec_info stmt_info;
2163 if ((*life)[i])
2164 continue;
2166 /* If there is a non-vectorized use of the defs then the scalar
2167 stmt is kept live in which case we do not account it or any
2168 required defs in the SLP children in the scalar cost. This
2169 way we make the vectorization more costly when compared to
2170 the scalar cost. */
2171 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2173 imm_use_iterator use_iter;
2174 gimple use_stmt;
2175 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2176 if (!is_gimple_debug (use_stmt)
2177 && (gimple_code (use_stmt) == GIMPLE_PHI
2178 || gimple_bb (use_stmt) != bb
2179 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2181 (*life)[i] = true;
2182 BREAK_FROM_IMM_USE_STMT (use_iter);
2185 if ((*life)[i])
2186 continue;
2188 stmt_info = vinfo_for_stmt (stmt);
2189 if (STMT_VINFO_DATA_REF (stmt_info))
2191 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2192 stmt_cost = vect_get_stmt_cost (scalar_load);
2193 else
2194 stmt_cost = vect_get_stmt_cost (scalar_store);
2196 else
2197 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2199 scalar_cost += stmt_cost;
2202 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2203 if (child)
2204 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2206 return scalar_cost;
2209 /* Check if vectorization of the basic block is profitable. */
2211 static bool
2212 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2214 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2215 slp_instance instance;
2216 int i, j;
2217 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2218 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2219 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2220 stmt_vec_info stmt_info = NULL;
2221 stmt_vector_for_cost body_cost_vec;
2222 stmt_info_for_cost *ci;
2224 /* Calculate vector costs. */
2225 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2227 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2229 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2231 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2232 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2233 stmt_info, ci->misalign, vect_body);
2237 /* Calculate scalar cost. */
2238 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2240 auto_vec<bool, 20> life;
2241 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2242 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2243 SLP_INSTANCE_TREE (instance),
2244 &life);
2247 /* Complete the target-specific cost calculation. */
2248 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2249 &vec_inside_cost, &vec_epilogue_cost);
2251 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2256 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2257 vec_inside_cost);
2258 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2259 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2260 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2263 /* Vectorization is profitable if its cost is less than the cost of scalar
2264 version. */
2265 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2266 return false;
2268 return true;
2271 /* Check if the basic block can be vectorized. */
2273 static bb_vec_info
2274 vect_slp_analyze_bb_1 (basic_block bb)
2276 bb_vec_info bb_vinfo;
2277 vec<slp_instance> slp_instances;
2278 slp_instance instance;
2279 int i;
2280 int min_vf = 2;
2281 unsigned n_stmts = 0;
2283 bb_vinfo = new_bb_vec_info (bb);
2284 if (!bb_vinfo)
2285 return NULL;
2287 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2291 "not vectorized: unhandled data-ref in basic "
2292 "block.\n");
2294 destroy_bb_vec_info (bb_vinfo);
2295 return NULL;
2298 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "not vectorized: not enough data-refs in "
2303 "basic block.\n");
2305 destroy_bb_vec_info (bb_vinfo);
2306 return NULL;
2309 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2313 "not vectorized: unhandled data access in "
2314 "basic block.\n");
2316 destroy_bb_vec_info (bb_vinfo);
2317 return NULL;
2320 vect_pattern_recog (NULL, bb_vinfo);
2322 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2326 "not vectorized: bad data alignment in basic "
2327 "block.\n");
2329 destroy_bb_vec_info (bb_vinfo);
2330 return NULL;
2333 /* Check the SLP opportunities in the basic block, analyze and build SLP
2334 trees. */
2335 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2339 "not vectorized: failed to find SLP opportunities "
2340 "in basic block.\n");
2342 destroy_bb_vec_info (bb_vinfo);
2343 return NULL;
2346 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2348 /* Mark all the statements that we want to vectorize as pure SLP and
2349 relevant. */
2350 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2352 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2353 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2356 /* Mark all the statements that we do not want to vectorize. */
2357 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2358 !gsi_end_p (gsi); gsi_next (&gsi))
2360 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2361 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2362 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2365 /* Analyze dependences. At this point all stmts not participating in
2366 vectorization have to be marked. Dependence analysis assumes
2367 that we either vectorize all SLP instances or none at all. */
2368 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2370 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2372 "not vectorized: unhandled data dependence "
2373 "in basic block.\n");
2375 destroy_bb_vec_info (bb_vinfo);
2376 return NULL;
2379 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2381 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2383 "not vectorized: unsupported alignment in basic "
2384 "block.\n");
2385 destroy_bb_vec_info (bb_vinfo);
2386 return NULL;
2389 if (!vect_slp_analyze_operations (bb_vinfo))
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2393 "not vectorized: bad operation in basic block.\n");
2395 destroy_bb_vec_info (bb_vinfo);
2396 return NULL;
2399 /* Compute the costs of the SLP instances. */
2400 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2402 gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
2403 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2404 vect_analyze_slp_cost (NULL, bb_vinfo,
2405 instance, TYPE_VECTOR_SUBPARTS (vectype));
2408 /* Cost model: check if the vectorization is worthwhile. */
2409 if (!unlimited_cost_model (NULL)
2410 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "not vectorized: vectorization is not "
2415 "profitable.\n");
2417 destroy_bb_vec_info (bb_vinfo);
2418 return NULL;
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_NOTE, vect_location,
2423 "Basic block will be vectorized using SLP\n");
2425 return bb_vinfo;
2429 bb_vec_info
2430 vect_slp_analyze_bb (basic_block bb)
2432 bb_vec_info bb_vinfo;
2433 int insns = 0;
2434 gimple_stmt_iterator gsi;
2435 unsigned int vector_sizes;
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2440 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2442 gimple stmt = gsi_stmt (gsi);
2443 if (!is_gimple_debug (stmt)
2444 && !gimple_nop_p (stmt)
2445 && gimple_code (stmt) != GIMPLE_LABEL)
2446 insns++;
2449 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "not vectorized: too many instructions in "
2454 "basic block.\n");
2456 return NULL;
2459 /* Autodetect first vector size we try. */
2460 current_vector_size = 0;
2461 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2463 while (1)
2465 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2466 if (bb_vinfo)
2467 return bb_vinfo;
2469 destroy_bb_vec_info (bb_vinfo);
2471 vector_sizes &= ~current_vector_size;
2472 if (vector_sizes == 0
2473 || current_vector_size == 0)
2474 return NULL;
2476 /* Try the next biggest vector size. */
2477 current_vector_size = 1 << floor_log2 (vector_sizes);
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_NOTE, vect_location,
2480 "***** Re-trying analysis with "
2481 "vector size %d\n", current_vector_size);
2486 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2487 the number of created vector stmts depends on the unrolling factor).
2488 However, the actual number of vector stmts for every SLP node depends on
2489 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2490 should be updated. In this function we assume that the inside costs
2491 calculated in vect_model_xxx_cost are linear in ncopies. */
2493 void
2494 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2496 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2497 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2498 slp_instance instance;
2499 stmt_vector_for_cost body_cost_vec;
2500 stmt_info_for_cost *si;
2501 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_NOTE, vect_location,
2505 "=== vect_update_slp_costs_according_to_vf ===\n");
2507 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2509 /* We assume that costs are linear in ncopies. */
2510 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2512 /* Record the instance's instructions in the target cost model.
2513 This was delayed until here because the count of instructions
2514 isn't known beforehand. */
2515 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2517 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2518 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2519 vinfo_for_stmt (si->stmt), si->misalign,
2520 vect_body);
2525 /* For constant and loop invariant defs of SLP_NODE this function returns
2526 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2527 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2528 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2529 REDUC_INDEX is the index of the reduction operand in the statements, unless
2530 it is -1. */
2532 static void
2533 vect_get_constant_vectors (tree op, slp_tree slp_node,
2534 vec<tree> *vec_oprnds,
2535 unsigned int op_num, unsigned int number_of_vectors,
2536 int reduc_index)
2538 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2539 gimple stmt = stmts[0];
2540 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2541 unsigned nunits;
2542 tree vec_cst;
2543 tree *elts;
2544 unsigned j, number_of_places_left_in_vector;
2545 tree vector_type;
2546 tree vop;
2547 int group_size = stmts.length ();
2548 unsigned int vec_num, i;
2549 unsigned number_of_copies = 1;
2550 vec<tree> voprnds;
2551 voprnds.create (number_of_vectors);
2552 bool constant_p, is_store;
2553 tree neutral_op = NULL;
2554 enum tree_code code = gimple_expr_code (stmt);
2555 gimple def_stmt;
2556 struct loop *loop;
2557 gimple_seq ctor_seq = NULL;
2559 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2560 && reduc_index != -1)
2562 op_num = reduc_index - 1;
2563 op = gimple_op (stmt, reduc_index);
2564 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2565 we need either neutral operands or the original operands. See
2566 get_initial_def_for_reduction() for details. */
2567 switch (code)
2569 case WIDEN_SUM_EXPR:
2570 case DOT_PROD_EXPR:
2571 case PLUS_EXPR:
2572 case MINUS_EXPR:
2573 case BIT_IOR_EXPR:
2574 case BIT_XOR_EXPR:
2575 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2576 neutral_op = build_real (TREE_TYPE (op), dconst0);
2577 else
2578 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2580 break;
2582 case MULT_EXPR:
2583 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2584 neutral_op = build_real (TREE_TYPE (op), dconst1);
2585 else
2586 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2588 break;
2590 case BIT_AND_EXPR:
2591 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2592 break;
2594 /* For MIN/MAX we don't have an easy neutral operand but
2595 the initial values can be used fine here. Only for
2596 a reduction chain we have to force a neutral element. */
2597 case MAX_EXPR:
2598 case MIN_EXPR:
2599 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2600 neutral_op = NULL;
2601 else
2603 def_stmt = SSA_NAME_DEF_STMT (op);
2604 loop = (gimple_bb (stmt))->loop_father;
2605 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2606 loop_preheader_edge (loop));
2608 break;
2610 default:
2611 neutral_op = NULL;
2615 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2617 is_store = true;
2618 op = gimple_assign_rhs1 (stmt);
2620 else
2621 is_store = false;
2623 gcc_assert (op);
2625 if (CONSTANT_CLASS_P (op))
2626 constant_p = true;
2627 else
2628 constant_p = false;
2630 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2631 gcc_assert (vector_type);
2632 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2634 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2635 created vectors. It is greater than 1 if unrolling is performed.
2637 For example, we have two scalar operands, s1 and s2 (e.g., group of
2638 strided accesses of size two), while NUNITS is four (i.e., four scalars
2639 of this type can be packed in a vector). The output vector will contain
2640 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2641 will be 2).
2643 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2644 containing the operands.
2646 For example, NUNITS is four as before, and the group size is 8
2647 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2648 {s5, s6, s7, s8}. */
2650 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2652 number_of_places_left_in_vector = nunits;
2653 elts = XALLOCAVEC (tree, nunits);
2654 bool place_after_defs = false;
2655 for (j = 0; j < number_of_copies; j++)
2657 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2659 if (is_store)
2660 op = gimple_assign_rhs1 (stmt);
2661 else
2663 switch (code)
2665 case COND_EXPR:
2666 if (op_num == 0 || op_num == 1)
2668 tree cond = gimple_assign_rhs1 (stmt);
2669 op = TREE_OPERAND (cond, op_num);
2671 else
2673 if (op_num == 2)
2674 op = gimple_assign_rhs2 (stmt);
2675 else
2676 op = gimple_assign_rhs3 (stmt);
2678 break;
2680 case CALL_EXPR:
2681 op = gimple_call_arg (stmt, op_num);
2682 break;
2684 case LSHIFT_EXPR:
2685 case RSHIFT_EXPR:
2686 case LROTATE_EXPR:
2687 case RROTATE_EXPR:
2688 op = gimple_op (stmt, op_num + 1);
2689 /* Unlike the other binary operators, shifts/rotates have
2690 the shift count being int, instead of the same type as
2691 the lhs, so make sure the scalar is the right type if
2692 we are dealing with vectors of
2693 long long/long/short/char. */
2694 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2695 op = fold_convert (TREE_TYPE (vector_type), op);
2696 break;
2698 default:
2699 op = gimple_op (stmt, op_num + 1);
2700 break;
2704 if (reduc_index != -1)
2706 loop = (gimple_bb (stmt))->loop_father;
2707 def_stmt = SSA_NAME_DEF_STMT (op);
2709 gcc_assert (loop);
2711 /* Get the def before the loop. In reduction chain we have only
2712 one initial value. */
2713 if ((j != (number_of_copies - 1)
2714 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2715 && i != 0))
2716 && neutral_op)
2717 op = neutral_op;
2718 else
2719 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2720 loop_preheader_edge (loop));
2723 /* Create 'vect_ = {op0,op1,...,opn}'. */
2724 number_of_places_left_in_vector--;
2725 tree orig_op = op;
2726 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2728 if (CONSTANT_CLASS_P (op))
2730 op = fold_unary (VIEW_CONVERT_EXPR,
2731 TREE_TYPE (vector_type), op);
2732 gcc_assert (op && CONSTANT_CLASS_P (op));
2734 else
2736 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2737 gimple init_stmt;
2738 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2739 init_stmt
2740 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2741 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2742 op = new_temp;
2745 elts[number_of_places_left_in_vector] = op;
2746 if (!CONSTANT_CLASS_P (op))
2747 constant_p = false;
2748 if (TREE_CODE (orig_op) == SSA_NAME
2749 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2750 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2751 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2752 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2753 place_after_defs = true;
2755 if (number_of_places_left_in_vector == 0)
2757 number_of_places_left_in_vector = nunits;
2759 if (constant_p)
2760 vec_cst = build_vector (vector_type, elts);
2761 else
2763 vec<constructor_elt, va_gc> *v;
2764 unsigned k;
2765 vec_alloc (v, nunits);
2766 for (k = 0; k < nunits; ++k)
2767 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2768 vec_cst = build_constructor (vector_type, v);
2770 tree init;
2771 gimple_stmt_iterator gsi;
2772 if (place_after_defs)
2774 gsi = gsi_for_stmt
2775 (vect_find_last_scalar_stmt_in_slp (slp_node));
2776 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2778 else
2779 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2780 if (ctor_seq != NULL)
2782 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2783 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2784 GSI_SAME_STMT);
2785 ctor_seq = NULL;
2787 voprnds.quick_push (init);
2788 place_after_defs = false;
2793 /* Since the vectors are created in the reverse order, we should invert
2794 them. */
2795 vec_num = voprnds.length ();
2796 for (j = vec_num; j != 0; j--)
2798 vop = voprnds[j - 1];
2799 vec_oprnds->quick_push (vop);
2802 voprnds.release ();
2804 /* In case that VF is greater than the unrolling factor needed for the SLP
2805 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2806 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2807 to replicate the vectors. */
2808 while (number_of_vectors > vec_oprnds->length ())
2810 tree neutral_vec = NULL;
2812 if (neutral_op)
2814 if (!neutral_vec)
2815 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2817 vec_oprnds->quick_push (neutral_vec);
2819 else
2821 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2822 vec_oprnds->quick_push (vop);
2828 /* Get vectorized definitions from SLP_NODE that contains corresponding
2829 vectorized def-stmts. */
2831 static void
2832 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2834 tree vec_oprnd;
2835 gimple vec_def_stmt;
2836 unsigned int i;
2838 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2840 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2842 gcc_assert (vec_def_stmt);
2843 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2844 vec_oprnds->quick_push (vec_oprnd);
2849 /* Get vectorized definitions for SLP_NODE.
2850 If the scalar definitions are loop invariants or constants, collect them and
2851 call vect_get_constant_vectors() to create vector stmts.
2852 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2853 must be stored in the corresponding child of SLP_NODE, and we call
2854 vect_get_slp_vect_defs () to retrieve them. */
2856 void
2857 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2858 vec<vec<tree> > *vec_oprnds, int reduc_index)
2860 gimple first_stmt;
2861 int number_of_vects = 0, i;
2862 unsigned int child_index = 0;
2863 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2864 slp_tree child = NULL;
2865 vec<tree> vec_defs;
2866 tree oprnd;
2867 bool vectorized_defs;
2869 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2870 FOR_EACH_VEC_ELT (ops, i, oprnd)
2872 /* For each operand we check if it has vectorized definitions in a child
2873 node or we need to create them (for invariants and constants). We
2874 check if the LHS of the first stmt of the next child matches OPRND.
2875 If it does, we found the correct child. Otherwise, we call
2876 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2877 to check this child node for the next operand. */
2878 vectorized_defs = false;
2879 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2881 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2883 /* We have to check both pattern and original def, if available. */
2884 if (child)
2886 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2887 gimple related
2888 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2890 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2891 || (related
2892 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2894 /* The number of vector defs is determined by the number of
2895 vector statements in the node from which we get those
2896 statements. */
2897 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2898 vectorized_defs = true;
2899 child_index++;
2902 else
2903 child_index++;
2906 if (!vectorized_defs)
2908 if (i == 0)
2910 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2911 /* Number of vector stmts was calculated according to LHS in
2912 vect_schedule_slp_instance (), fix it by replacing LHS with
2913 RHS, if necessary. See vect_get_smallest_scalar_type () for
2914 details. */
2915 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2916 &rhs_size_unit);
2917 if (rhs_size_unit != lhs_size_unit)
2919 number_of_vects *= rhs_size_unit;
2920 number_of_vects /= lhs_size_unit;
2925 /* Allocate memory for vectorized defs. */
2926 vec_defs = vNULL;
2927 vec_defs.create (number_of_vects);
2929 /* For reduction defs we call vect_get_constant_vectors (), since we are
2930 looking for initial loop invariant values. */
2931 if (vectorized_defs && reduc_index == -1)
2932 /* The defs are already vectorized. */
2933 vect_get_slp_vect_defs (child, &vec_defs);
2934 else
2935 /* Build vectors from scalar defs. */
2936 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2937 number_of_vects, reduc_index);
2939 vec_oprnds->quick_push (vec_defs);
2941 /* For reductions, we only need initial values. */
2942 if (reduc_index != -1)
2943 return;
2948 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2949 building a vector of type MASK_TYPE from it) and two input vectors placed in
2950 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2951 shifting by STRIDE elements of DR_CHAIN for every copy.
2952 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2953 copies).
2954 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2955 the created stmts must be inserted. */
2957 static inline void
2958 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2959 tree mask, int first_vec_indx, int second_vec_indx,
2960 gimple_stmt_iterator *gsi, slp_tree node,
2961 tree vectype, vec<tree> dr_chain,
2962 int ncopies, int vect_stmts_counter)
2964 tree perm_dest;
2965 gimple perm_stmt = NULL;
2966 stmt_vec_info next_stmt_info;
2967 int i, stride;
2968 tree first_vec, second_vec, data_ref;
2970 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2972 /* Initialize the vect stmts of NODE to properly insert the generated
2973 stmts later. */
2974 for (i = SLP_TREE_VEC_STMTS (node).length ();
2975 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2976 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2978 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2979 for (i = 0; i < ncopies; i++)
2981 first_vec = dr_chain[first_vec_indx];
2982 second_vec = dr_chain[second_vec_indx];
2984 /* Generate the permute statement. */
2985 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
2986 first_vec, second_vec, mask);
2987 data_ref = make_ssa_name (perm_dest, perm_stmt);
2988 gimple_set_lhs (perm_stmt, data_ref);
2989 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2991 /* Store the vector statement in NODE. */
2992 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2994 first_vec_indx += stride;
2995 second_vec_indx += stride;
2998 /* Mark the scalar stmt as vectorized. */
2999 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3000 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3004 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3005 return in CURRENT_MASK_ELEMENT its equivalent in target specific
3006 representation. Check that the mask is valid and return FALSE if not.
3007 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3008 the next vector, i.e., the current first vector is not needed. */
3010 static bool
3011 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3012 int mask_nunits, bool only_one_vec, int index,
3013 unsigned char *mask, int *current_mask_element,
3014 bool *need_next_vector, int *number_of_mask_fixes,
3015 bool *mask_fixed, bool *needs_first_vector)
3017 int i;
3019 /* Convert to target specific representation. */
3020 *current_mask_element = first_mask_element + m;
3021 /* Adjust the value in case it's a mask for second and third vectors. */
3022 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3024 if (*current_mask_element < 0)
3026 if (dump_enabled_p ())
3028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3029 "permutation requires past vector ");
3030 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3031 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3033 return false;
3036 if (*current_mask_element < mask_nunits)
3037 *needs_first_vector = true;
3039 /* We have only one input vector to permute but the mask accesses values in
3040 the next vector as well. */
3041 if (only_one_vec && *current_mask_element >= mask_nunits)
3043 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3046 "permutation requires at least two vectors ");
3047 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3048 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3051 return false;
3054 /* The mask requires the next vector. */
3055 while (*current_mask_element >= mask_nunits * 2)
3057 if (*needs_first_vector || *mask_fixed)
3059 /* We either need the first vector too or have already moved to the
3060 next vector. In both cases, this permutation needs three
3061 vectors. */
3062 if (dump_enabled_p ())
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3065 "permutation requires at "
3066 "least three vectors ");
3067 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3068 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3071 return false;
3074 /* We move to the next vector, dropping the first one and working with
3075 the second and the third - we need to adjust the values of the mask
3076 accordingly. */
3077 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3079 for (i = 0; i < index; i++)
3080 mask[i] -= mask_nunits * *number_of_mask_fixes;
3082 (*number_of_mask_fixes)++;
3083 *mask_fixed = true;
3086 *need_next_vector = *mask_fixed;
3088 /* This was the last element of this mask. Start a new one. */
3089 if (index == mask_nunits - 1)
3091 *number_of_mask_fixes = 1;
3092 *mask_fixed = false;
3093 *needs_first_vector = false;
3096 return true;
3100 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3101 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3102 permute statements for the SLP node NODE of the SLP instance
3103 SLP_NODE_INSTANCE. */
3105 bool
3106 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3107 gimple_stmt_iterator *gsi, int vf,
3108 slp_instance slp_node_instance, bool analyze_only)
3110 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3112 tree mask_element_type = NULL_TREE, mask_type;
3113 int i, j, k, nunits, vec_index = 0, scalar_index;
3114 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3115 gimple next_scalar_stmt;
3116 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3117 int first_mask_element;
3118 int index, unroll_factor, current_mask_element, ncopies;
3119 unsigned char *mask;
3120 bool only_one_vec = false, need_next_vector = false;
3121 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3122 int number_of_mask_fixes = 1;
3123 bool mask_fixed = false;
3124 bool needs_first_vector = false;
3125 machine_mode mode;
3127 mode = TYPE_MODE (vectype);
3129 if (!can_vec_perm_p (mode, false, NULL))
3131 if (dump_enabled_p ())
3133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3134 "no vect permute for ");
3135 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3136 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3138 return false;
3141 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3142 same size as the vector element being permuted. */
3143 mask_element_type = lang_hooks.types.type_for_mode
3144 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3145 mask_type = get_vectype_for_scalar_type (mask_element_type);
3146 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3147 mask = XALLOCAVEC (unsigned char, nunits);
3148 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3150 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3151 unrolling factor. */
3152 orig_vec_stmts_num = group_size *
3153 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3154 if (orig_vec_stmts_num == 1)
3155 only_one_vec = true;
3157 /* Number of copies is determined by the final vectorization factor
3158 relatively to SLP_NODE_INSTANCE unrolling factor. */
3159 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3161 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3162 return false;
3164 /* Generate permutation masks for every NODE. Number of masks for each NODE
3165 is equal to GROUP_SIZE.
3166 E.g., we have a group of three nodes with three loads from the same
3167 location in each node, and the vector size is 4. I.e., we have a
3168 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3169 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3170 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3173 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3174 The last mask is illegal since we assume two operands for permute
3175 operation, and the mask element values can't be outside that range.
3176 Hence, the last mask must be converted into {2,5,5,5}.
3177 For the first two permutations we need the first and the second input
3178 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3179 we need the second and the third vectors: {b1,c1,a2,b2} and
3180 {c2,a3,b3,c3}. */
3183 scalar_index = 0;
3184 index = 0;
3185 vect_stmts_counter = 0;
3186 vec_index = 0;
3187 first_vec_index = vec_index++;
3188 if (only_one_vec)
3189 second_vec_index = first_vec_index;
3190 else
3191 second_vec_index = vec_index++;
3193 for (j = 0; j < unroll_factor; j++)
3195 for (k = 0; k < group_size; k++)
3197 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3198 first_mask_element = i + j * group_size;
3199 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3200 nunits, only_one_vec, index,
3201 mask, &current_mask_element,
3202 &need_next_vector,
3203 &number_of_mask_fixes, &mask_fixed,
3204 &needs_first_vector))
3205 return false;
3206 gcc_assert (current_mask_element >= 0
3207 && current_mask_element < 2 * nunits);
3208 mask[index++] = current_mask_element;
3210 if (index == nunits)
3212 index = 0;
3213 if (!can_vec_perm_p (mode, false, mask))
3215 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3218 vect_location,
3219 "unsupported vect permute { ");
3220 for (i = 0; i < nunits; ++i)
3221 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3222 mask[i]);
3223 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3225 return false;
3228 if (!analyze_only)
3230 int l;
3231 tree mask_vec, *mask_elts;
3232 mask_elts = XALLOCAVEC (tree, nunits);
3233 for (l = 0; l < nunits; ++l)
3234 mask_elts[l] = build_int_cst (mask_element_type,
3235 mask[l]);
3236 mask_vec = build_vector (mask_type, mask_elts);
3238 if (need_next_vector)
3240 first_vec_index = second_vec_index;
3241 second_vec_index = vec_index;
3244 next_scalar_stmt
3245 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3247 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3248 mask_vec, first_vec_index, second_vec_index,
3249 gsi, node, vectype, dr_chain,
3250 ncopies, vect_stmts_counter++);
3257 return true;
3262 /* Vectorize SLP instance tree in postorder. */
3264 static bool
3265 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3266 unsigned int vectorization_factor)
3268 gimple stmt;
3269 bool grouped_store, is_store;
3270 gimple_stmt_iterator si;
3271 stmt_vec_info stmt_info;
3272 unsigned int vec_stmts_size, nunits, group_size;
3273 tree vectype;
3274 int i;
3275 slp_tree child;
3277 if (!node)
3278 return false;
3280 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3281 vect_schedule_slp_instance (child, instance, vectorization_factor);
3283 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3284 stmt_info = vinfo_for_stmt (stmt);
3286 /* VECTYPE is the type of the destination. */
3287 vectype = STMT_VINFO_VECTYPE (stmt_info);
3288 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3289 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3291 /* For each SLP instance calculate number of vector stmts to be created
3292 for the scalar stmts in each node of the SLP tree. Number of vector
3293 elements in one vector iteration is the number of scalar elements in
3294 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3295 size. */
3296 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3298 if (!SLP_TREE_VEC_STMTS (node).exists ())
3300 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3301 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3304 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_NOTE,vect_location,
3307 "------>vectorizing SLP node starting from: ");
3308 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3309 dump_printf (MSG_NOTE, "\n");
3312 /* Vectorized stmts go before the last scalar stmt which is where
3313 all uses are ready. */
3314 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3316 /* Mark the first element of the reduction chain as reduction to properly
3317 transform the node. In the analysis phase only the last element of the
3318 chain is marked as reduction. */
3319 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3320 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3322 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3323 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3326 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3327 return is_store;
3330 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3331 For loop vectorization this is done in vectorizable_call, but for SLP
3332 it needs to be deferred until end of vect_schedule_slp, because multiple
3333 SLP instances may refer to the same scalar stmt. */
3335 static void
3336 vect_remove_slp_scalar_calls (slp_tree node)
3338 gimple stmt, new_stmt;
3339 gimple_stmt_iterator gsi;
3340 int i;
3341 slp_tree child;
3342 tree lhs;
3343 stmt_vec_info stmt_info;
3345 if (!node)
3346 return;
3348 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3349 vect_remove_slp_scalar_calls (child);
3351 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3353 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3354 continue;
3355 stmt_info = vinfo_for_stmt (stmt);
3356 if (stmt_info == NULL
3357 || is_pattern_stmt_p (stmt_info)
3358 || !PURE_SLP_STMT (stmt_info))
3359 continue;
3360 lhs = gimple_call_lhs (stmt);
3361 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3362 set_vinfo_for_stmt (new_stmt, stmt_info);
3363 set_vinfo_for_stmt (stmt, NULL);
3364 STMT_VINFO_STMT (stmt_info) = new_stmt;
3365 gsi = gsi_for_stmt (stmt);
3366 gsi_replace (&gsi, new_stmt, false);
3367 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3371 /* Generate vector code for all SLP instances in the loop/basic block. */
3373 bool
3374 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3376 vec<slp_instance> slp_instances;
3377 slp_instance instance;
3378 unsigned int i, vf;
3379 bool is_store = false;
3381 if (loop_vinfo)
3383 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3384 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3386 else
3388 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3389 vf = 1;
3392 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3394 /* Schedule the tree of INSTANCE. */
3395 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3396 instance, vf);
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_NOTE, vect_location,
3399 "vectorizing stmts using SLP.\n");
3402 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3404 slp_tree root = SLP_INSTANCE_TREE (instance);
3405 gimple store;
3406 unsigned int j;
3407 gimple_stmt_iterator gsi;
3409 /* Remove scalar call stmts. Do not do this for basic-block
3410 vectorization as not all uses may be vectorized.
3411 ??? Why should this be necessary? DCE should be able to
3412 remove the stmts itself.
3413 ??? For BB vectorization we can as well remove scalar
3414 stmts starting from the SLP tree root if they have no
3415 uses. */
3416 if (loop_vinfo)
3417 vect_remove_slp_scalar_calls (root);
3419 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3420 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3422 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3423 break;
3425 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3426 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3427 /* Free the attached stmt_vec_info and remove the stmt. */
3428 gsi = gsi_for_stmt (store);
3429 unlink_stmt_vdef (store);
3430 gsi_remove (&gsi, true);
3431 release_defs (store);
3432 free_stmt_vec_info (store);
3436 return is_store;
3440 /* Vectorize the basic block. */
3442 void
3443 vect_slp_transform_bb (basic_block bb)
3445 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3446 gimple_stmt_iterator si;
3448 gcc_assert (bb_vinfo);
3450 if (dump_enabled_p ())
3451 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3453 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3455 gimple stmt = gsi_stmt (si);
3456 stmt_vec_info stmt_info;
3458 if (dump_enabled_p ())
3460 dump_printf_loc (MSG_NOTE, vect_location,
3461 "------>SLPing statement: ");
3462 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3463 dump_printf (MSG_NOTE, "\n");
3466 stmt_info = vinfo_for_stmt (stmt);
3467 gcc_assert (stmt_info);
3469 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3470 if (STMT_SLP_TYPE (stmt_info))
3472 vect_schedule_slp (NULL, bb_vinfo);
3473 break;
3477 if (dump_enabled_p ())
3478 dump_printf_loc (MSG_NOTE, vect_location,
3479 "BASIC BLOCK VECTORIZED\n");
3481 destroy_bb_vec_info (bb_vinfo);