* gfortran.dg/pr68251.f90: New test.
[official-gcc.git] / gcc / tree-vect-slp.c
blob954b85ef8bce45e4ad817ba7daee6f58b738eebb
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 if (!node)
55 return;
57 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
58 vect_free_slp_tree (child);
60 SLP_TREE_CHILDREN (node).release ();
61 SLP_TREE_SCALAR_STMTS (node).release ();
62 SLP_TREE_VEC_STMTS (node).release ();
63 SLP_TREE_LOAD_PERMUTATION (node).release ();
65 free (node);
69 /* Free the memory allocated for the SLP instance. */
71 void
72 vect_free_slp_instance (slp_instance instance)
74 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
75 SLP_INSTANCE_LOADS (instance).release ();
76 free (instance);
80 /* Create an SLP node for SCALAR_STMTS. */
82 static slp_tree
83 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
85 slp_tree node;
86 gimple *stmt = scalar_stmts[0];
87 unsigned int nops;
89 if (is_gimple_call (stmt))
90 nops = gimple_call_num_args (stmt);
91 else if (is_gimple_assign (stmt))
93 nops = gimple_num_ops (stmt) - 1;
94 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
95 nops++;
97 else
98 return NULL;
100 node = XNEW (struct _slp_tree);
101 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
102 SLP_TREE_VEC_STMTS (node).create (0);
103 SLP_TREE_CHILDREN (node).create (nops);
104 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
105 SLP_TREE_TWO_OPERATORS (node) = false;
107 return node;
111 /* This structure is used in creation of an SLP tree. Each instance
112 corresponds to the same operand in a group of scalar stmts in an SLP
113 node. */
114 typedef struct _slp_oprnd_info
116 /* Def-stmts for the operands. */
117 vec<gimple *> def_stmts;
118 /* Information about the first statement, its vector def-type, type, the
119 operand itself in case it's constant, and an indication if it's a pattern
120 stmt. */
121 enum vect_def_type first_dt;
122 tree first_op_type;
123 bool first_pattern;
124 bool second_pattern;
125 } *slp_oprnd_info;
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_op_type = NULL_TREE;
144 oprnd_info->first_pattern = false;
145 oprnd_info->second_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
149 return oprnds_info;
153 /* Free operands info. */
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
158 int i;
159 slp_oprnd_info oprnd_info;
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
167 oprnds_info.release ();
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
174 static int
175 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
177 gimple *next_stmt = first_stmt;
178 int result = 0;
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
185 if (next_stmt == stmt)
186 return result;
187 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
188 if (next_stmt)
189 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
191 while (next_stmt);
193 return -1;
197 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
198 they are of a valid type and that they match the defs of the first stmt of
199 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
200 return -1, if the error could be corrected by swapping operands of the
201 operation return 1, if everything is ok return 0. */
203 static int
204 vect_get_and_check_slp_defs (vec_info *vinfo,
205 gimple *stmt, unsigned stmt_num,
206 vec<slp_oprnd_info> *oprnds_info)
208 tree oprnd;
209 unsigned int i, number_of_oprnds;
210 gimple *def_stmt;
211 enum vect_def_type dt = vect_uninitialized_def;
212 bool pattern = false;
213 slp_oprnd_info oprnd_info;
214 int first_op_idx = 1;
215 bool commutative = false;
216 bool first_op_cond = false;
217 bool first = stmt_num == 0;
218 bool second = stmt_num == 1;
220 if (is_gimple_call (stmt))
222 number_of_oprnds = gimple_call_num_args (stmt);
223 first_op_idx = 3;
225 else if (is_gimple_assign (stmt))
227 enum tree_code code = gimple_assign_rhs_code (stmt);
228 number_of_oprnds = gimple_num_ops (stmt) - 1;
229 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
231 first_op_cond = true;
232 commutative = true;
233 number_of_oprnds++;
235 else
236 commutative = commutative_tree_code (code);
238 else
239 return -1;
241 bool swapped = false;
242 for (i = 0; i < number_of_oprnds; i++)
244 again:
245 if (first_op_cond)
247 if (i == 0 || i == 1)
248 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
249 swapped ? !i : i);
250 else
251 oprnd = gimple_op (stmt, first_op_idx + i - 1);
253 else
254 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
256 oprnd_info = (*oprnds_info)[i];
258 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
260 if (dump_enabled_p ())
262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
263 "Build SLP failed: can't analyze def for ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
265 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
268 return -1;
271 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
272 from the pattern. Check that all the stmts of the node are in the
273 pattern. */
274 if (def_stmt && gimple_bb (def_stmt)
275 && vect_stmt_in_region_p (vinfo, def_stmt)
276 && vinfo_for_stmt (def_stmt)
277 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
278 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
279 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
281 pattern = true;
282 if (!first && !oprnd_info->first_pattern
283 /* Allow different pattern state for the defs of the
284 first stmt in reduction chains. */
285 && (oprnd_info->first_dt != vect_reduction_def
286 || (!second && !oprnd_info->second_pattern)))
288 if (i == 0
289 && !swapped
290 && commutative)
292 swapped = true;
293 goto again;
296 if (dump_enabled_p ())
298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
299 "Build SLP failed: some of the stmts"
300 " are in a pattern, and others are not ");
301 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
302 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
305 return 1;
308 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
309 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
311 if (dt == vect_unknown_def_type)
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315 "Unsupported pattern.\n");
316 return -1;
319 switch (gimple_code (def_stmt))
321 case GIMPLE_PHI:
322 case GIMPLE_ASSIGN:
323 break;
325 default:
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
328 "unsupported defining stmt:\n");
329 return -1;
333 if (second)
334 oprnd_info->second_pattern = pattern;
336 if (first)
338 oprnd_info->first_dt = dt;
339 oprnd_info->first_pattern = pattern;
340 oprnd_info->first_op_type = TREE_TYPE (oprnd);
342 else
344 /* Not first stmt of the group, check that the def-stmt/s match
345 the def-stmt/s of the first stmt. Allow different definition
346 types for reduction chains: the first stmt must be a
347 vect_reduction_def (a phi node), and the rest
348 vect_internal_def. */
349 if (((oprnd_info->first_dt != dt
350 && !(oprnd_info->first_dt == vect_reduction_def
351 && dt == vect_internal_def)
352 && !((oprnd_info->first_dt == vect_external_def
353 || oprnd_info->first_dt == vect_constant_def)
354 && (dt == vect_external_def
355 || dt == vect_constant_def)))
356 || !types_compatible_p (oprnd_info->first_op_type,
357 TREE_TYPE (oprnd))))
359 /* Try swapping operands if we got a mismatch. */
360 if (i == 0
361 && !swapped
362 && commutative)
364 swapped = true;
365 goto again;
368 if (dump_enabled_p ())
369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
370 "Build SLP failed: different types\n");
372 return 1;
376 /* Check the types of the definitions. */
377 switch (dt)
379 case vect_constant_def:
380 case vect_external_def:
381 case vect_reduction_def:
382 break;
384 case vect_internal_def:
385 oprnd_info->def_stmts.quick_push (def_stmt);
386 break;
388 default:
389 /* FORNOW: Not supported. */
390 if (dump_enabled_p ())
392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
393 "Build SLP failed: illegal type of def ");
394 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
395 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
398 return -1;
402 /* Swap operands. */
403 if (swapped)
405 if (first_op_cond)
407 tree cond = gimple_assign_rhs1 (stmt);
408 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
409 &TREE_OPERAND (cond, 1));
410 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
412 else
413 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
414 gimple_assign_rhs2_ptr (stmt));
417 return 0;
421 /* Verify if the scalar stmts STMTS are isomorphic, require data
422 permutation or are of unsupported types of operation. Return
423 true if they are, otherwise return false and indicate in *MATCHES
424 which stmts are not isomorphic to the first one. If MATCHES[0]
425 is false then this indicates the comparison could not be
426 carried out or the stmts will never be vectorized by SLP. */
428 static bool
429 vect_build_slp_tree_1 (vec_info *vinfo,
430 vec<gimple *> stmts, unsigned int group_size,
431 unsigned nops, unsigned int *max_nunits,
432 unsigned int vectorization_factor, bool *matches,
433 bool *two_operators)
435 unsigned int i;
436 gimple *first_stmt = stmts[0], *stmt = stmts[0];
437 enum tree_code first_stmt_code = ERROR_MARK;
438 enum tree_code alt_stmt_code = ERROR_MARK;
439 enum tree_code rhs_code = ERROR_MARK;
440 enum tree_code first_cond_code = ERROR_MARK;
441 tree lhs;
442 bool need_same_oprnds = false;
443 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
444 optab optab;
445 int icode;
446 machine_mode optab_op2_mode;
447 machine_mode vec_mode;
448 HOST_WIDE_INT dummy;
449 gimple *first_load = NULL, *prev_first_load = NULL;
450 tree cond;
452 /* For every stmt in NODE find its def stmt/s. */
453 FOR_EACH_VEC_ELT (stmts, i, stmt)
455 matches[i] = false;
457 if (dump_enabled_p ())
459 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
460 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
461 dump_printf (MSG_NOTE, "\n");
464 /* Fail to vectorize statements marked as unvectorizable. */
465 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
470 "Build SLP failed: unvectorizable statement ");
471 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
472 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
474 /* Fatal mismatch. */
475 matches[0] = false;
476 return false;
479 lhs = gimple_get_lhs (stmt);
480 if (lhs == NULL_TREE)
482 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: not GIMPLE_ASSIGN nor "
486 "GIMPLE_CALL ");
487 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
488 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
490 /* Fatal mismatch. */
491 matches[0] = false;
492 return false;
495 if (is_gimple_assign (stmt)
496 && gimple_assign_rhs_code (stmt) == COND_EXPR
497 && (cond = gimple_assign_rhs1 (stmt))
498 && !COMPARISON_CLASS_P (cond))
500 if (dump_enabled_p ())
502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
503 "Build SLP failed: condition is not "
504 "comparison ");
505 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
506 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
508 /* Fatal mismatch. */
509 matches[0] = false;
510 return false;
513 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
514 vectype = get_vectype_for_scalar_type (scalar_type);
515 if (!vectype)
517 if (dump_enabled_p ())
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
520 "Build SLP failed: unsupported data-type ");
521 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
522 scalar_type);
523 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
525 /* Fatal mismatch. */
526 matches[0] = false;
527 return false;
530 /* If populating the vector type requires unrolling then fail
531 before adjusting *max_nunits for basic-block vectorization. */
532 if (is_a <bb_vec_info> (vinfo)
533 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
536 "Build SLP failed: unrolling required "
537 "in basic block SLP\n");
538 /* Fatal mismatch. */
539 matches[0] = false;
540 return false;
543 /* In case of multiple types we need to detect the smallest type. */
544 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
546 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
547 if (is_a <bb_vec_info> (vinfo))
548 vectorization_factor = *max_nunits;
551 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
553 rhs_code = CALL_EXPR;
554 if (gimple_call_internal_p (call_stmt)
555 || gimple_call_tail_p (call_stmt)
556 || gimple_call_noreturn_p (call_stmt)
557 || !gimple_call_nothrow_p (call_stmt)
558 || gimple_call_chain (call_stmt))
560 if (dump_enabled_p ())
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
563 "Build SLP failed: unsupported call type ");
564 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
565 call_stmt, 0);
566 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
568 /* Fatal mismatch. */
569 matches[0] = false;
570 return false;
573 else
574 rhs_code = gimple_assign_rhs_code (stmt);
576 /* Check the operation. */
577 if (i == 0)
579 first_stmt_code = rhs_code;
581 /* Shift arguments should be equal in all the packed stmts for a
582 vector shift with scalar shift operand. */
583 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
584 || rhs_code == LROTATE_EXPR
585 || rhs_code == RROTATE_EXPR)
587 vec_mode = TYPE_MODE (vectype);
589 /* First see if we have a vector/vector shift. */
590 optab = optab_for_tree_code (rhs_code, vectype,
591 optab_vector);
593 if (!optab
594 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
596 /* No vector/vector shift, try for a vector/scalar shift. */
597 optab = optab_for_tree_code (rhs_code, vectype,
598 optab_scalar);
600 if (!optab)
602 if (dump_enabled_p ())
603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
604 "Build SLP failed: no optab.\n");
605 /* Fatal mismatch. */
606 matches[0] = false;
607 return false;
609 icode = (int) optab_handler (optab, vec_mode);
610 if (icode == CODE_FOR_nothing)
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
614 "Build SLP failed: "
615 "op not supported by target.\n");
616 /* Fatal mismatch. */
617 matches[0] = false;
618 return false;
620 optab_op2_mode = insn_data[icode].operand[2].mode;
621 if (!VECTOR_MODE_P (optab_op2_mode))
623 need_same_oprnds = true;
624 first_op1 = gimple_assign_rhs2 (stmt);
628 else if (rhs_code == WIDEN_LSHIFT_EXPR)
630 need_same_oprnds = true;
631 first_op1 = gimple_assign_rhs2 (stmt);
634 else
636 if (first_stmt_code != rhs_code
637 && alt_stmt_code == ERROR_MARK)
638 alt_stmt_code = rhs_code;
639 if (first_stmt_code != rhs_code
640 && (first_stmt_code != IMAGPART_EXPR
641 || rhs_code != REALPART_EXPR)
642 && (first_stmt_code != REALPART_EXPR
643 || rhs_code != IMAGPART_EXPR)
644 /* Handle mismatches in plus/minus by computing both
645 and merging the results. */
646 && !((first_stmt_code == PLUS_EXPR
647 || first_stmt_code == MINUS_EXPR)
648 && (alt_stmt_code == PLUS_EXPR
649 || alt_stmt_code == MINUS_EXPR)
650 && rhs_code == alt_stmt_code)
651 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
652 && (first_stmt_code == ARRAY_REF
653 || first_stmt_code == BIT_FIELD_REF
654 || first_stmt_code == INDIRECT_REF
655 || first_stmt_code == COMPONENT_REF
656 || first_stmt_code == MEM_REF)))
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
661 "Build SLP failed: different operation "
662 "in stmt ");
663 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "original stmt ");
666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
667 first_stmt, 0);
669 /* Mismatch. */
670 continue;
673 if (need_same_oprnds
674 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: different shift "
680 "arguments in ");
681 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
682 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
684 /* Mismatch. */
685 continue;
688 if (rhs_code == CALL_EXPR)
690 gimple *first_stmt = stmts[0];
691 if (gimple_call_num_args (stmt) != nops
692 || !operand_equal_p (gimple_call_fn (first_stmt),
693 gimple_call_fn (stmt), 0)
694 || gimple_call_fntype (first_stmt)
695 != gimple_call_fntype (stmt))
697 if (dump_enabled_p ())
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
700 "Build SLP failed: different calls in ");
701 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
702 stmt, 0);
703 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
705 /* Mismatch. */
706 continue;
711 /* Grouped store or load. */
712 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
714 if (REFERENCE_CLASS_P (lhs))
716 /* Store. */
719 else
721 /* Load. */
722 /* Check that the size of interleaved loads group is not
723 greater than the SLP group size. */
724 unsigned ncopies
725 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
726 if (is_a <loop_vec_info> (vinfo)
727 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
728 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
729 - GROUP_GAP (vinfo_for_stmt (stmt)))
730 > ncopies * group_size))
732 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "Build SLP failed: the number "
736 "of interleaved loads is greater than "
737 "the SLP group size ");
738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
739 stmt, 0);
740 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
742 /* Fatal mismatch. */
743 matches[0] = false;
744 return false;
747 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
748 if (prev_first_load)
750 /* Check that there are no loads from different interleaving
751 chains in the same node. */
752 if (prev_first_load != first_load)
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
757 vect_location,
758 "Build SLP failed: different "
759 "interleaving chains in one node ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
761 stmt, 0);
762 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
764 /* Mismatch. */
765 continue;
768 else
769 prev_first_load = first_load;
771 } /* Grouped access. */
772 else
774 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
776 /* Not grouped load. */
777 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "Build SLP failed: not grouped load ");
781 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
782 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
785 /* FORNOW: Not grouped loads are not supported. */
786 /* Fatal mismatch. */
787 matches[0] = false;
788 return false;
791 /* Not memory operation. */
792 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
793 && TREE_CODE_CLASS (rhs_code) != tcc_unary
794 && TREE_CODE_CLASS (rhs_code) != tcc_expression
795 && rhs_code != CALL_EXPR)
797 if (dump_enabled_p ())
799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
800 "Build SLP failed: operation");
801 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
802 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
803 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
805 /* Fatal mismatch. */
806 matches[0] = false;
807 return false;
810 if (rhs_code == COND_EXPR)
812 tree cond_expr = gimple_assign_rhs1 (stmt);
814 if (i == 0)
815 first_cond_code = TREE_CODE (cond_expr);
816 else if (first_cond_code != TREE_CODE (cond_expr))
818 if (dump_enabled_p ())
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
821 "Build SLP failed: different"
822 " operation");
823 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
824 stmt, 0);
825 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
827 /* Mismatch. */
828 continue;
833 matches[i] = true;
836 for (i = 0; i < group_size; ++i)
837 if (!matches[i])
838 return false;
840 /* If we allowed a two-operation SLP node verify the target can cope
841 with the permute we are going to use. */
842 if (alt_stmt_code != ERROR_MARK
843 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
845 unsigned char *sel
846 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
847 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
849 sel[i] = i;
850 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
851 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
853 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
855 for (i = 0; i < group_size; ++i)
856 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
858 matches[i] = false;
859 if (dump_enabled_p ())
861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
862 "Build SLP failed: different operation "
863 "in stmt ");
864 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
865 stmts[i], 0);
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
867 "original stmt ");
868 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
869 first_stmt, 0);
872 return false;
874 *two_operators = true;
877 return true;
880 /* Recursively build an SLP tree starting from NODE.
881 Fail (and return a value not equal to zero) if def-stmts are not
882 isomorphic, require data permutation or are of unsupported types of
883 operation. Otherwise, return 0.
884 The value returned is the depth in the SLP tree where a mismatch
885 was found. */
887 static bool
888 vect_build_slp_tree (vec_info *vinfo,
889 slp_tree *node, unsigned int group_size,
890 unsigned int *max_nunits,
891 vec<slp_tree> *loads,
892 unsigned int vectorization_factor,
893 bool *matches, unsigned *npermutes, unsigned *tree_size,
894 unsigned max_tree_size)
896 unsigned nops, i, this_tree_size = 0;
897 gimple *stmt;
899 matches[0] = false;
901 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
902 if (is_gimple_call (stmt))
903 nops = gimple_call_num_args (stmt);
904 else if (is_gimple_assign (stmt))
906 nops = gimple_num_ops (stmt) - 1;
907 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
908 nops++;
910 else
911 return false;
913 bool two_operators = false;
914 if (!vect_build_slp_tree_1 (vinfo,
915 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
916 max_nunits, vectorization_factor, matches,
917 &two_operators))
918 return false;
919 SLP_TREE_TWO_OPERATORS (*node) = two_operators;
921 /* If the SLP node is a load, terminate the recursion. */
922 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
923 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
925 loads->safe_push (*node);
926 return true;
929 /* Get at the operands, verifying they are compatible. */
930 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
931 slp_oprnd_info oprnd_info;
932 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
934 switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
936 case 0:
937 break;
938 case -1:
939 matches[0] = false;
940 vect_free_oprnd_info (oprnds_info);
941 return false;
942 case 1:
943 matches[i] = false;
944 break;
947 for (i = 0; i < group_size; ++i)
948 if (!matches[i])
950 vect_free_oprnd_info (oprnds_info);
951 return false;
954 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
956 /* Create SLP_TREE nodes for the definition node/s. */
957 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
959 slp_tree child;
960 unsigned old_nloads = loads->length ();
961 unsigned old_max_nunits = *max_nunits;
963 if (oprnd_info->first_dt != vect_internal_def)
964 continue;
966 if (++this_tree_size > max_tree_size)
968 vect_free_oprnd_info (oprnds_info);
969 return false;
972 child = vect_create_new_slp_node (oprnd_info->def_stmts);
973 if (!child)
975 vect_free_oprnd_info (oprnds_info);
976 return false;
979 if (vect_build_slp_tree (vinfo, &child,
980 group_size, max_nunits, loads,
981 vectorization_factor, matches,
982 npermutes, &this_tree_size, max_tree_size))
984 /* If we have all children of child built up from scalars then just
985 throw that away and build it up this node from scalars. */
986 if (!SLP_TREE_CHILDREN (child).is_empty ())
988 unsigned int j;
989 slp_tree grandchild;
991 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
992 if (grandchild != NULL)
993 break;
994 if (!grandchild)
996 /* Roll back. */
997 *max_nunits = old_max_nunits;
998 loads->truncate (old_nloads);
999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1000 vect_free_slp_tree (grandchild);
1001 SLP_TREE_CHILDREN (child).truncate (0);
1003 dump_printf_loc (MSG_NOTE, vect_location,
1004 "Building parent vector operands from "
1005 "scalars instead\n");
1006 oprnd_info->def_stmts = vNULL;
1007 vect_free_slp_tree (child);
1008 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1009 continue;
1013 oprnd_info->def_stmts = vNULL;
1014 SLP_TREE_CHILDREN (*node).quick_push (child);
1015 continue;
1018 /* If the SLP build failed fatally and we analyze a basic-block
1019 simply treat nodes we fail to build as externally defined
1020 (and thus build vectors from the scalar defs).
1021 The cost model will reject outright expensive cases.
1022 ??? This doesn't treat cases where permutation ultimatively
1023 fails (or we don't try permutation below). Ideally we'd
1024 even compute a permutation that will end up with the maximum
1025 SLP tree size... */
1026 if (is_a <bb_vec_info> (vinfo)
1027 && !matches[0]
1028 /* ??? Rejecting patterns this way doesn't work. We'd have to
1029 do extra work to cancel the pattern so the uses see the
1030 scalar version. */
1031 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1033 unsigned int j;
1034 slp_tree grandchild;
1036 /* Roll back. */
1037 *max_nunits = old_max_nunits;
1038 loads->truncate (old_nloads);
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1040 vect_free_slp_tree (grandchild);
1041 SLP_TREE_CHILDREN (child).truncate (0);
1043 dump_printf_loc (MSG_NOTE, vect_location,
1044 "Building vector operands from scalars\n");
1045 oprnd_info->def_stmts = vNULL;
1046 vect_free_slp_tree (child);
1047 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1048 continue;
1051 /* If the SLP build for operand zero failed and operand zero
1052 and one can be commutated try that for the scalar stmts
1053 that failed the match. */
1054 if (i == 0
1055 /* A first scalar stmt mismatch signals a fatal mismatch. */
1056 && matches[0]
1057 /* ??? For COND_EXPRs we can swap the comparison operands
1058 as well as the arms under some constraints. */
1059 && nops == 2
1060 && oprnds_info[1]->first_dt == vect_internal_def
1061 && is_gimple_assign (stmt)
1062 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1063 && !SLP_TREE_TWO_OPERATORS (*node)
1064 /* Do so only if the number of not successful permutes was nor more
1065 than a cut-ff as re-trying the recursive match on
1066 possibly each level of the tree would expose exponential
1067 behavior. */
1068 && *npermutes < 4)
1070 unsigned int j;
1071 slp_tree grandchild;
1073 /* Roll back. */
1074 *max_nunits = old_max_nunits;
1075 loads->truncate (old_nloads);
1076 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1077 vect_free_slp_tree (grandchild);
1078 SLP_TREE_CHILDREN (child).truncate (0);
1080 /* Swap mismatched definition stmts. */
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "Re-trying with swapped operands of stmts ");
1083 for (j = 0; j < group_size; ++j)
1084 if (!matches[j])
1086 std::swap (oprnds_info[0]->def_stmts[j],
1087 oprnds_info[1]->def_stmts[j]);
1088 dump_printf (MSG_NOTE, "%d ", j);
1090 dump_printf (MSG_NOTE, "\n");
1091 /* And try again with scratch 'matches' ... */
1092 bool *tem = XALLOCAVEC (bool, group_size);
1093 if (vect_build_slp_tree (vinfo, &child,
1094 group_size, max_nunits, loads,
1095 vectorization_factor,
1096 tem, npermutes, &this_tree_size,
1097 max_tree_size))
1099 /* ... so if successful we can apply the operand swapping
1100 to the GIMPLE IL. This is necessary because for example
1101 vect_get_slp_defs uses operand indexes and thus expects
1102 canonical operand order. */
1103 for (j = 0; j < group_size; ++j)
1104 if (!matches[j])
1106 gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1107 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1108 gimple_assign_rhs2_ptr (stmt));
1110 oprnd_info->def_stmts = vNULL;
1111 SLP_TREE_CHILDREN (*node).quick_push (child);
1112 continue;
1115 ++*npermutes;
1118 oprnd_info->def_stmts = vNULL;
1119 vect_free_slp_tree (child);
1120 vect_free_oprnd_info (oprnds_info);
1121 return false;
1124 if (tree_size)
1125 *tree_size += this_tree_size;
1127 vect_free_oprnd_info (oprnds_info);
1128 return true;
1131 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1133 static void
1134 vect_print_slp_tree (int dump_kind, slp_tree node)
1136 int i;
1137 gimple *stmt;
1138 slp_tree child;
1140 if (!node)
1141 return;
1143 dump_printf (dump_kind, "node ");
1144 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1146 dump_printf (dump_kind, "\n\tstmt %d ", i);
1147 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1149 dump_printf (dump_kind, "\n");
1151 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1152 vect_print_slp_tree (dump_kind, child);
1156 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1157 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1158 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1159 stmts in NODE are to be marked. */
1161 static void
1162 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1164 int i;
1165 gimple *stmt;
1166 slp_tree child;
1168 if (!node)
1169 return;
1171 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1172 if (j < 0 || i == j)
1173 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1176 vect_mark_slp_stmts (child, mark, j);
1180 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1182 static void
1183 vect_mark_slp_stmts_relevant (slp_tree node)
1185 int i;
1186 gimple *stmt;
1187 stmt_vec_info stmt_info;
1188 slp_tree child;
1190 if (!node)
1191 return;
1193 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1195 stmt_info = vinfo_for_stmt (stmt);
1196 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1197 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1198 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1201 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1202 vect_mark_slp_stmts_relevant (child);
1206 /* Rearrange the statements of NODE according to PERMUTATION. */
1208 static void
1209 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1210 vec<unsigned> permutation)
1212 gimple *stmt;
1213 vec<gimple *> tmp_stmts;
1214 unsigned int i;
1215 slp_tree child;
1217 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1218 vect_slp_rearrange_stmts (child, group_size, permutation);
1220 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1221 tmp_stmts.create (group_size);
1222 tmp_stmts.quick_grow_cleared (group_size);
1224 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1225 tmp_stmts[permutation[i]] = stmt;
1227 SLP_TREE_SCALAR_STMTS (node).release ();
1228 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1232 /* Attempt to reorder stmts in a reduction chain so that we don't
1233 require any load permutation. Return true if that was possible,
1234 otherwise return false. */
1236 static bool
1237 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1239 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1240 unsigned int i, j;
1241 sbitmap load_index;
1242 unsigned int lidx;
1243 slp_tree node, load;
1245 /* Compare all the permutation sequences to the first one. We know
1246 that at least one load is permuted. */
1247 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1248 if (!node->load_permutation.exists ())
1249 return false;
1250 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1252 if (!load->load_permutation.exists ())
1253 return false;
1254 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1255 if (lidx != node->load_permutation[j])
1256 return false;
1259 /* Check that the loads in the first sequence are different and there
1260 are no gaps between them. */
1261 load_index = sbitmap_alloc (group_size);
1262 bitmap_clear (load_index);
1263 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1265 if (bitmap_bit_p (load_index, lidx))
1267 sbitmap_free (load_index);
1268 return false;
1270 bitmap_set_bit (load_index, lidx);
1272 for (i = 0; i < group_size; i++)
1273 if (!bitmap_bit_p (load_index, i))
1275 sbitmap_free (load_index);
1276 return false;
1278 sbitmap_free (load_index);
1280 /* This permutation is valid for reduction. Since the order of the
1281 statements in the nodes is not important unless they are memory
1282 accesses, we can rearrange the statements in all the nodes
1283 according to the order of the loads. */
1284 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1285 node->load_permutation);
1287 /* We are done, no actual permutations need to be generated. */
1288 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1289 SLP_TREE_LOAD_PERMUTATION (node).release ();
1290 return true;
1293 /* Check if the required load permutations in the SLP instance
1294 SLP_INSTN are supported. */
1296 static bool
1297 vect_supported_load_permutation_p (slp_instance slp_instn)
1299 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1300 unsigned int i, j, k, next;
1301 slp_tree node;
1302 gimple *stmt, *load, *next_load, *first_load;
1303 struct data_reference *dr;
1305 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1309 if (node->load_permutation.exists ())
1310 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1311 dump_printf (MSG_NOTE, "%d ", next);
1312 else
1313 for (k = 0; k < group_size; ++k)
1314 dump_printf (MSG_NOTE, "%d ", k);
1315 dump_printf (MSG_NOTE, "\n");
1318 /* In case of reduction every load permutation is allowed, since the order
1319 of the reduction statements is not important (as opposed to the case of
1320 grouped stores). The only condition we need to check is that all the
1321 load nodes are of the same size and have the same permutation (and then
1322 rearrange all the nodes of the SLP instance according to this
1323 permutation). */
1325 /* Check that all the load nodes are of the same size. */
1326 /* ??? Can't we assert this? */
1327 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1328 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1329 return false;
1331 node = SLP_INSTANCE_TREE (slp_instn);
1332 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1334 /* Reduction (there are no data-refs in the root).
1335 In reduction chain the order of the loads is not important. */
1336 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1337 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1339 if (vect_attempt_slp_rearrange_stmts (slp_instn))
1340 return true;
1342 /* Fallthru to general load permutation handling. */
1345 /* In basic block vectorization we allow any subchain of an interleaving
1346 chain.
1347 FORNOW: not supported in loop SLP because of realignment compications. */
1348 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1350 /* Check whether the loads in an instance form a subchain and thus
1351 no permutation is necessary. */
1352 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1354 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1355 continue;
1356 bool subchain_p = true;
1357 next_load = NULL;
1358 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1360 if (j != 0
1361 && (next_load != load
1362 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1364 subchain_p = false;
1365 break;
1367 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1369 if (subchain_p)
1370 SLP_TREE_LOAD_PERMUTATION (node).release ();
1371 else
1373 /* Verify the permutation can be generated. */
1374 vec<tree> tem;
1375 if (!vect_transform_slp_perm_load (node, tem, NULL,
1376 1, slp_instn, true))
1378 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1379 vect_location,
1380 "unsupported load permutation\n");
1381 return false;
1386 /* Check that the alignment of the first load in every subchain, i.e.,
1387 the first statement in every load node, is supported.
1388 ??? This belongs in alignment checking. */
1389 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1391 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1392 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1394 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1395 if (vect_supportable_dr_alignment (dr, false)
1396 == dr_unaligned_unsupported)
1398 if (dump_enabled_p ())
1400 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1401 vect_location,
1402 "unsupported unaligned load ");
1403 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1404 first_load, 0);
1405 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1407 return false;
1412 return true;
1415 /* For loop vectorization verify we can generate the permutation. */
1416 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1417 if (node->load_permutation.exists ()
1418 && !vect_transform_slp_perm_load
1419 (node, vNULL, NULL,
1420 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1421 return false;
1423 return true;
1427 /* Find the last store in SLP INSTANCE. */
1429 gimple *
1430 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1432 gimple *last = NULL, *stmt;
1434 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1436 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1437 if (is_pattern_stmt_p (stmt_vinfo))
1438 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1439 else
1440 last = get_later_stmt (stmt, last);
1443 return last;
1446 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1448 static void
1449 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1450 stmt_vector_for_cost *prologue_cost_vec,
1451 stmt_vector_for_cost *body_cost_vec,
1452 unsigned ncopies_for_cost)
1454 unsigned i;
1455 slp_tree child;
1456 gimple *stmt, *s;
1457 stmt_vec_info stmt_info;
1458 tree lhs;
1459 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1461 /* Recurse down the SLP tree. */
1462 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1463 if (child)
1464 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1465 body_cost_vec, ncopies_for_cost);
1467 /* Look at the first scalar stmt to determine the cost. */
1468 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1469 stmt_info = vinfo_for_stmt (stmt);
1470 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1472 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1473 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1474 vect_uninitialized_def,
1475 node, prologue_cost_vec, body_cost_vec);
1476 else
1478 int i;
1479 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1480 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1481 node, prologue_cost_vec, body_cost_vec);
1482 /* If the load is permuted record the cost for the permutation.
1483 ??? Loads from multiple chains are let through here only
1484 for a single special case involving complex numbers where
1485 in the end no permutation is necessary. */
1486 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1487 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1488 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1489 && vect_get_place_in_interleaving_chain
1490 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1492 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1493 stmt_info, 0, vect_body);
1494 break;
1498 else
1500 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1501 stmt_info, 0, vect_body);
1502 if (SLP_TREE_TWO_OPERATORS (node))
1504 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1505 stmt_info, 0, vect_body);
1506 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1507 stmt_info, 0, vect_body);
1511 /* Scan operands and account for prologue cost of constants/externals.
1512 ??? This over-estimates cost for multiple uses and should be
1513 re-engineered. */
1514 lhs = gimple_get_lhs (stmt);
1515 for (i = 0; i < gimple_num_ops (stmt); ++i)
1517 tree op = gimple_op (stmt, i);
1518 gimple *def_stmt;
1519 enum vect_def_type dt;
1520 if (!op || op == lhs)
1521 continue;
1522 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1524 /* Without looking at the actual initializer a vector of
1525 constants can be implemented as load from the constant pool.
1526 ??? We need to pass down stmt_info for a vector type
1527 even if it points to the wrong stmt. */
1528 if (dt == vect_constant_def)
1529 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1530 stmt_info, 0, vect_prologue);
1531 else if (dt == vect_external_def)
1532 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1533 stmt_info, 0, vect_prologue);
1538 /* Compute the cost for the SLP instance INSTANCE. */
1540 static void
1541 vect_analyze_slp_cost (slp_instance instance, void *data)
1543 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1544 unsigned ncopies_for_cost;
1545 stmt_info_for_cost *si;
1546 unsigned i;
1548 if (dump_enabled_p ())
1549 dump_printf_loc (MSG_NOTE, vect_location,
1550 "=== vect_analyze_slp_cost ===\n");
1552 /* Calculate the number of vector stmts to create based on the unrolling
1553 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1554 GROUP_SIZE / NUNITS otherwise. */
1555 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1556 slp_tree node = SLP_INSTANCE_TREE (instance);
1557 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1558 /* Adjust the group_size by the vectorization factor which is always one
1559 for basic-block vectorization. */
1560 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1561 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1562 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1563 /* For reductions look at a reduction operand in case the reduction
1564 operation is widening like DOT_PROD or SAD. */
1565 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1567 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1568 switch (gimple_assign_rhs_code (stmt))
1570 case DOT_PROD_EXPR:
1571 case SAD_EXPR:
1572 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1573 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1574 break;
1575 default:;
1578 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1580 prologue_cost_vec.create (10);
1581 body_cost_vec.create (10);
1582 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1583 &prologue_cost_vec, &body_cost_vec,
1584 ncopies_for_cost);
1586 /* Record the prologue costs, which were delayed until we were
1587 sure that SLP was successful. */
1588 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1590 struct _stmt_vec_info *stmt_info
1591 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1592 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1593 si->misalign, vect_prologue);
1596 /* Record the instance's instructions in the target cost model. */
1597 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1599 struct _stmt_vec_info *stmt_info
1600 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1601 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1602 si->misalign, vect_body);
1605 prologue_cost_vec.release ();
1606 body_cost_vec.release ();
1609 /* Analyze an SLP instance starting from a group of grouped stores. Call
1610 vect_build_slp_tree to build a tree of packed stmts if possible.
1611 Return FALSE if it's impossible to SLP any stmt in the loop. */
1613 static bool
1614 vect_analyze_slp_instance (vec_info *vinfo,
1615 gimple *stmt, unsigned max_tree_size)
1617 slp_instance new_instance;
1618 slp_tree node;
1619 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1620 unsigned int unrolling_factor = 1, nunits;
1621 tree vectype, scalar_type = NULL_TREE;
1622 gimple *next;
1623 unsigned int vectorization_factor = 0;
1624 int i;
1625 unsigned int max_nunits = 0;
1626 vec<slp_tree> loads;
1627 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1628 vec<gimple *> scalar_stmts;
1630 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1632 if (dr)
1634 scalar_type = TREE_TYPE (DR_REF (dr));
1635 vectype = get_vectype_for_scalar_type (scalar_type);
1637 else
1639 gcc_assert (is_a <loop_vec_info> (vinfo));
1640 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1643 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1645 else
1647 gcc_assert (is_a <loop_vec_info> (vinfo));
1648 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1649 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1652 if (!vectype)
1654 if (dump_enabled_p ())
1656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1657 "Build SLP failed: unsupported data-type ");
1658 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1659 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1662 return false;
1665 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1666 if (is_a <loop_vec_info> (vinfo))
1667 vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
1668 else
1669 vectorization_factor = nunits;
1671 /* Calculate the unrolling factor. */
1672 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1673 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1675 if (dump_enabled_p ())
1676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1677 "Build SLP failed: unrolling required in basic"
1678 " block SLP\n");
1680 return false;
1683 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1684 scalar_stmts.create (group_size);
1685 next = stmt;
1686 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1688 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1689 while (next)
1691 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1692 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1693 scalar_stmts.safe_push (
1694 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1695 else
1696 scalar_stmts.safe_push (next);
1697 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1699 /* Mark the first element of the reduction chain as reduction to properly
1700 transform the node. In the reduction analysis phase only the last
1701 element of the chain is marked as reduction. */
1702 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1703 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1705 else
1707 /* Collect reduction statements. */
1708 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1709 for (i = 0; reductions.iterate (i, &next); i++)
1710 scalar_stmts.safe_push (next);
1713 node = vect_create_new_slp_node (scalar_stmts);
1715 loads.create (group_size);
1717 /* Build the tree for the SLP instance. */
1718 bool *matches = XALLOCAVEC (bool, group_size);
1719 unsigned npermutes = 0;
1720 if (vect_build_slp_tree (vinfo, &node, group_size,
1721 &max_nunits, &loads,
1722 vectorization_factor, matches, &npermutes, NULL,
1723 max_tree_size))
1725 /* Calculate the unrolling factor based on the smallest type. */
1726 if (max_nunits > nunits)
1727 unrolling_factor = least_common_multiple (max_nunits, group_size)
1728 / group_size;
1730 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1732 if (dump_enabled_p ())
1733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1734 "Build SLP failed: unrolling required in basic"
1735 " block SLP\n");
1736 vect_free_slp_tree (node);
1737 loads.release ();
1738 return false;
1741 /* Create a new SLP instance. */
1742 new_instance = XNEW (struct _slp_instance);
1743 SLP_INSTANCE_TREE (new_instance) = node;
1744 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1745 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1746 SLP_INSTANCE_LOADS (new_instance) = loads;
1748 /* Compute the load permutation. */
1749 slp_tree load_node;
1750 bool loads_permuted = false;
1751 FOR_EACH_VEC_ELT (loads, i, load_node)
1753 vec<unsigned> load_permutation;
1754 int j;
1755 gimple *load, *first_stmt;
1756 bool this_load_permuted = false;
1757 load_permutation.create (group_size);
1758 first_stmt = GROUP_FIRST_ELEMENT
1759 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1760 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1762 int load_place
1763 = vect_get_place_in_interleaving_chain (load, first_stmt);
1764 gcc_assert (load_place != -1);
1765 if (load_place != j)
1766 this_load_permuted = true;
1767 load_permutation.safe_push (load_place);
1769 if (!this_load_permuted
1770 /* The load requires permutation when unrolling exposes
1771 a gap either because the group is larger than the SLP
1772 group-size or because there is a gap between the groups. */
1773 && (unrolling_factor == 1
1774 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1775 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1777 load_permutation.release ();
1778 continue;
1780 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1781 loads_permuted = true;
1784 if (loads_permuted)
1786 if (!vect_supported_load_permutation_p (new_instance))
1788 if (dump_enabled_p ())
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1791 "Build SLP failed: unsupported load "
1792 "permutation ");
1793 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1794 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1796 vect_free_slp_instance (new_instance);
1797 return false;
1801 vinfo->slp_instances.safe_push (new_instance);
1803 if (dump_enabled_p ())
1804 vect_print_slp_tree (MSG_NOTE, node);
1806 return true;
1809 /* Failed to SLP. */
1810 /* Free the allocated memory. */
1811 vect_free_slp_tree (node);
1812 loads.release ();
1814 return false;
1818 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1819 trees of packed scalar stmts if SLP is possible. */
1821 bool
1822 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
1824 unsigned int i;
1825 gimple *first_element;
1826 bool ok = false;
1828 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1831 /* Find SLP sequences starting from groups of grouped stores. */
1832 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1833 if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
1834 ok = true;
1836 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
1838 if (loop_vinfo->reduction_chains.length () > 0)
1840 /* Find SLP sequences starting from reduction chains. */
1841 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1842 if (vect_analyze_slp_instance (vinfo, first_element,
1843 max_tree_size))
1844 ok = true;
1845 else
1846 return false;
1848 /* Don't try to vectorize SLP reductions if reduction chain was
1849 detected. */
1850 return ok;
1853 /* Find SLP sequences starting from groups of reductions. */
1854 if (loop_vinfo->reductions.length () > 1
1855 && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
1856 max_tree_size))
1857 ok = true;
1860 return true;
1864 /* For each possible SLP instance decide whether to SLP it and calculate overall
1865 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1866 least one instance. */
1868 bool
1869 vect_make_slp_decision (loop_vec_info loop_vinfo)
1871 unsigned int i, unrolling_factor = 1;
1872 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1873 slp_instance instance;
1874 int decided_to_slp = 0;
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1878 "\n");
1880 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1882 /* FORNOW: SLP if you can. */
1883 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1884 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1886 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1887 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1888 loop-based vectorization. Such stmts will be marked as HYBRID. */
1889 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1890 decided_to_slp++;
1893 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1895 if (decided_to_slp && dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE, vect_location,
1897 "Decided to SLP %d instances. Unrolling factor %d\n",
1898 decided_to_slp, unrolling_factor);
1900 return (decided_to_slp > 0);
1904 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1905 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1907 static void
1908 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1910 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1911 imm_use_iterator imm_iter;
1912 gimple *use_stmt;
1913 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
1914 slp_tree child;
1915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1916 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1917 int j;
1919 /* Propagate hybrid down the SLP tree. */
1920 if (stype == hybrid)
1922 else if (HYBRID_SLP_STMT (stmt_vinfo))
1923 stype = hybrid;
1924 else
1926 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1927 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
1928 /* We always get the pattern stmt here, but for immediate
1929 uses we have to use the LHS of the original stmt. */
1930 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1931 if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
1932 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1933 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1934 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1936 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1937 continue;
1938 use_vinfo = vinfo_for_stmt (use_stmt);
1939 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1940 && STMT_VINFO_RELATED_STMT (use_vinfo))
1941 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
1942 if (!STMT_SLP_TYPE (use_vinfo)
1943 && (STMT_VINFO_RELEVANT (use_vinfo)
1944 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
1945 && !(gimple_code (use_stmt) == GIMPLE_PHI
1946 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
1948 if (dump_enabled_p ())
1950 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
1951 "def in non-SLP stmt: ");
1952 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
1954 stype = hybrid;
1959 if (stype == hybrid
1960 && !HYBRID_SLP_STMT (stmt_vinfo))
1962 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
1965 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
1967 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
1970 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
1971 if (child)
1972 vect_detect_hybrid_slp_stmts (child, i, stype);
1975 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
1977 static tree
1978 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
1980 walk_stmt_info *wi = (walk_stmt_info *)data;
1981 struct loop *loopp = (struct loop *)wi->info;
1983 if (wi->is_lhs)
1984 return NULL_TREE;
1986 if (TREE_CODE (*tp) == SSA_NAME
1987 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
1989 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
1990 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
1991 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
1993 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
1996 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1998 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2002 return NULL_TREE;
2005 static tree
2006 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2007 walk_stmt_info *)
2009 /* If the stmt is in a SLP instance then this isn't a reason
2010 to mark use definitions in other SLP instances as hybrid. */
2011 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2012 *handled = true;
2013 return NULL_TREE;
2016 /* Find stmts that must be both vectorized and SLPed. */
2018 void
2019 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2021 unsigned int i;
2022 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2023 slp_instance instance;
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2027 "\n");
2029 /* First walk all pattern stmt in the loop and mark defs of uses as
2030 hybrid because immediate uses in them are not recorded. */
2031 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2033 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2034 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2035 gsi_next (&gsi))
2037 gimple *stmt = gsi_stmt (gsi);
2038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2039 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2041 walk_stmt_info wi;
2042 memset (&wi, 0, sizeof (wi));
2043 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2044 gimple_stmt_iterator gsi2
2045 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2046 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2047 vect_detect_hybrid_slp_1, &wi);
2048 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2049 vect_detect_hybrid_slp_2,
2050 vect_detect_hybrid_slp_1, &wi);
2055 /* Then walk the SLP instance trees marking stmts with uses in
2056 non-SLP stmts as hybrid, also propagating hybrid down the
2057 SLP tree, collecting the above info on-the-fly. */
2058 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2060 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2061 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2062 i, pure_slp);
2067 /* Create and initialize a new bb_vec_info struct for BB, as well as
2068 stmt_vec_info structs for all the stmts in it. */
2070 static bb_vec_info
2071 new_bb_vec_info (gimple_stmt_iterator region_begin,
2072 gimple_stmt_iterator region_end)
2074 basic_block bb = gsi_bb (region_begin);
2075 bb_vec_info res = NULL;
2076 gimple_stmt_iterator gsi;
2078 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2079 res->kind = vec_info::bb;
2080 BB_VINFO_BB (res) = bb;
2081 res->region_begin = region_begin;
2082 res->region_end = region_end;
2084 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2085 gsi_next (&gsi))
2087 gimple *stmt = gsi_stmt (gsi);
2088 gimple_set_uid (stmt, 0);
2089 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2092 BB_VINFO_GROUPED_STORES (res).create (10);
2093 BB_VINFO_SLP_INSTANCES (res).create (2);
2094 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2096 bb->aux = res;
2097 return res;
2101 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2102 stmts in the basic block. */
2104 static void
2105 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2107 vec<slp_instance> slp_instances;
2108 slp_instance instance;
2109 basic_block bb;
2110 gimple_stmt_iterator si;
2111 unsigned i;
2113 if (!bb_vinfo)
2114 return;
2116 bb = BB_VINFO_BB (bb_vinfo);
2118 for (si = bb_vinfo->region_begin;
2119 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2121 gimple *stmt = gsi_stmt (si);
2122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2124 if (stmt_info)
2125 /* Free stmt_vec_info. */
2126 free_stmt_vec_info (stmt);
2128 /* Reset region marker. */
2129 gimple_set_uid (stmt, -1);
2132 vect_destroy_datarefs (bb_vinfo);
2133 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2134 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2135 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2136 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2137 vect_free_slp_instance (instance);
2138 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2139 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2140 free (bb_vinfo);
2141 bb->aux = NULL;
2145 /* Analyze statements contained in SLP tree node after recursively analyzing
2146 the subtree. Return TRUE if the operations are supported. */
2148 static bool
2149 vect_slp_analyze_node_operations (slp_tree node)
2151 bool dummy;
2152 int i;
2153 gimple *stmt;
2154 slp_tree child;
2156 if (!node)
2157 return true;
2159 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2160 if (!vect_slp_analyze_node_operations (child))
2161 return false;
2163 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2165 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2166 gcc_assert (stmt_info);
2167 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2169 if (!vect_analyze_stmt (stmt, &dummy, node))
2170 return false;
2173 return true;
2177 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2178 operations are supported. */
2180 bool
2181 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2183 slp_instance instance;
2184 int i;
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_NOTE, vect_location,
2188 "=== vect_slp_analyze_operations ===\n");
2190 for (i = 0; slp_instances.iterate (i, &instance); )
2192 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2194 dump_printf_loc (MSG_NOTE, vect_location,
2195 "removing SLP instance operations starting from: ");
2196 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2197 SLP_TREE_SCALAR_STMTS
2198 (SLP_INSTANCE_TREE (instance))[0], 0);
2199 vect_free_slp_instance (instance);
2200 slp_instances.ordered_remove (i);
2202 else
2204 /* Compute the costs of the SLP instance. */
2205 vect_analyze_slp_cost (instance, data);
2206 i++;
2210 if (!slp_instances.length ())
2211 return false;
2213 return true;
2217 /* Compute the scalar cost of the SLP node NODE and its children
2218 and return it. Do not account defs that are marked in LIFE and
2219 update LIFE according to uses of NODE. */
2221 static unsigned
2222 vect_bb_slp_scalar_cost (basic_block bb,
2223 slp_tree node, vec<bool, va_heap> *life)
2225 unsigned scalar_cost = 0;
2226 unsigned i;
2227 gimple *stmt;
2228 slp_tree child;
2230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2232 unsigned stmt_cost;
2233 ssa_op_iter op_iter;
2234 def_operand_p def_p;
2235 stmt_vec_info stmt_info;
2237 if ((*life)[i])
2238 continue;
2240 /* If there is a non-vectorized use of the defs then the scalar
2241 stmt is kept live in which case we do not account it or any
2242 required defs in the SLP children in the scalar cost. This
2243 way we make the vectorization more costly when compared to
2244 the scalar cost. */
2245 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2247 imm_use_iterator use_iter;
2248 gimple *use_stmt;
2249 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2250 if (!is_gimple_debug (use_stmt)
2251 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2252 use_stmt)
2253 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2255 (*life)[i] = true;
2256 BREAK_FROM_IMM_USE_STMT (use_iter);
2259 if ((*life)[i])
2260 continue;
2262 stmt_info = vinfo_for_stmt (stmt);
2263 if (STMT_VINFO_DATA_REF (stmt_info))
2265 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2266 stmt_cost = vect_get_stmt_cost (scalar_load);
2267 else
2268 stmt_cost = vect_get_stmt_cost (scalar_store);
2270 else
2271 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2273 scalar_cost += stmt_cost;
2276 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2277 if (child)
2278 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2280 return scalar_cost;
2283 /* Check if vectorization of the basic block is profitable. */
2285 static bool
2286 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2288 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2289 slp_instance instance;
2290 int i;
2291 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2292 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2294 /* Calculate scalar cost. */
2295 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2297 auto_vec<bool, 20> life;
2298 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2299 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2300 SLP_INSTANCE_TREE (instance),
2301 &life);
2304 /* Complete the target-specific cost calculation. */
2305 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2306 &vec_inside_cost, &vec_epilogue_cost);
2308 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2310 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2313 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2314 vec_inside_cost);
2315 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2316 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2317 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2320 /* Vectorization is profitable if its cost is less than the cost of scalar
2321 version. */
2322 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2323 return false;
2325 return true;
2328 /* Check if the basic block can be vectorized. */
2330 static bb_vec_info
2331 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2332 gimple_stmt_iterator region_end,
2333 vec<data_reference_p> datarefs, int n_stmts)
2335 bb_vec_info bb_vinfo;
2336 vec<slp_instance> slp_instances;
2337 slp_instance instance;
2338 int i;
2339 int min_vf = 2;
2341 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2343 if (dump_enabled_p ())
2344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2345 "not vectorized: too many instructions in "
2346 "basic block.\n");
2347 free_data_refs (datarefs);
2348 return NULL;
2351 bb_vinfo = new_bb_vec_info (region_begin, region_end);
2352 if (!bb_vinfo)
2353 return NULL;
2355 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2357 /* Analyze the data references. */
2359 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2363 "not vectorized: unhandled data-ref in basic "
2364 "block.\n");
2366 destroy_bb_vec_info (bb_vinfo);
2367 return NULL;
2370 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2372 if (dump_enabled_p ())
2373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374 "not vectorized: not enough data-refs in "
2375 "basic block.\n");
2377 destroy_bb_vec_info (bb_vinfo);
2378 return NULL;
2381 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2383 if (dump_enabled_p ())
2384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2385 "not vectorized: unhandled data access in "
2386 "basic block.\n");
2388 destroy_bb_vec_info (bb_vinfo);
2389 return NULL;
2392 vect_pattern_recog (bb_vinfo);
2394 if (!vect_analyze_data_refs_alignment (bb_vinfo))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2398 "not vectorized: bad data alignment in basic "
2399 "block.\n");
2401 destroy_bb_vec_info (bb_vinfo);
2402 return NULL;
2405 /* Check the SLP opportunities in the basic block, analyze and build SLP
2406 trees. */
2407 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2409 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2412 "Failed to SLP the basic block.\n");
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "not vectorized: failed to find SLP opportunities "
2415 "in basic block.\n");
2418 destroy_bb_vec_info (bb_vinfo);
2419 return NULL;
2422 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2424 /* Mark all the statements that we want to vectorize as pure SLP and
2425 relevant. */
2426 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2428 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2429 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2432 /* Mark all the statements that we do not want to vectorize. */
2433 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2434 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2436 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2437 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2438 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2441 /* Analyze dependences. At this point all stmts not participating in
2442 vectorization have to be marked. Dependence analysis assumes
2443 that we either vectorize all SLP instances or none at all. */
2444 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not vectorized: unhandled data dependence "
2449 "in basic block.\n");
2451 destroy_bb_vec_info (bb_vinfo);
2452 return NULL;
2455 if (!vect_verify_datarefs_alignment (bb_vinfo))
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "not vectorized: unsupported alignment in basic "
2460 "block.\n");
2461 destroy_bb_vec_info (bb_vinfo);
2462 return NULL;
2465 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2466 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "not vectorized: bad operation in basic block.\n");
2472 destroy_bb_vec_info (bb_vinfo);
2473 return NULL;
2476 /* Cost model: check if the vectorization is worthwhile. */
2477 if (!unlimited_cost_model (NULL)
2478 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2482 "not vectorized: vectorization is not "
2483 "profitable.\n");
2485 destroy_bb_vec_info (bb_vinfo);
2486 return NULL;
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_NOTE, vect_location,
2491 "Basic block will be vectorized using SLP\n");
2493 return bb_vinfo;
2497 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2498 true if anything in the basic-block was vectorized. */
2500 bool
2501 vect_slp_bb (basic_block bb)
2503 bb_vec_info bb_vinfo;
2504 gimple_stmt_iterator gsi;
2505 unsigned int vector_sizes;
2506 bool any_vectorized = false;
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2511 /* Autodetect first vector size we try. */
2512 current_vector_size = 0;
2513 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2515 gsi = gsi_start_bb (bb);
2517 while (1)
2519 if (gsi_end_p (gsi))
2520 break;
2522 gimple_stmt_iterator region_begin = gsi;
2523 vec<data_reference_p> datarefs = vNULL;
2524 int insns = 0;
2526 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2528 gimple *stmt = gsi_stmt (gsi);
2529 if (is_gimple_debug (stmt))
2530 continue;
2531 insns++;
2533 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2534 vect_location = gimple_location (stmt);
2536 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2537 break;
2540 /* Skip leading unhandled stmts. */
2541 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2543 gsi_next (&gsi);
2544 continue;
2547 gimple_stmt_iterator region_end = gsi;
2549 bool vectorized = false;
2550 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2551 datarefs, insns);
2552 if (bb_vinfo
2553 && dbg_cnt (vect_slp))
2555 if (dump_enabled_p ())
2556 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2558 vect_schedule_slp (bb_vinfo);
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_NOTE, vect_location,
2562 "basic block part vectorized\n");
2564 destroy_bb_vec_info (bb_vinfo);
2566 vectorized = true;
2568 else
2569 destroy_bb_vec_info (bb_vinfo);
2571 any_vectorized |= vectorized;
2573 vector_sizes &= ~current_vector_size;
2574 if (vectorized
2575 || vector_sizes == 0
2576 || current_vector_size == 0)
2578 if (gsi_end_p (region_end))
2579 break;
2581 /* Skip the unhandled stmt. */
2582 gsi_next (&gsi);
2584 /* And reset vector sizes. */
2585 current_vector_size = 0;
2586 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2588 else
2590 /* Try the next biggest vector size. */
2591 current_vector_size = 1 << floor_log2 (vector_sizes);
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_NOTE, vect_location,
2594 "***** Re-trying analysis with "
2595 "vector size %d\n", current_vector_size);
2597 /* Start over. */
2598 gsi = region_begin;
2602 return any_vectorized;
2606 /* For constant and loop invariant defs of SLP_NODE this function returns
2607 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2608 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2609 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2610 REDUC_INDEX is the index of the reduction operand in the statements, unless
2611 it is -1. */
2613 static void
2614 vect_get_constant_vectors (tree op, slp_tree slp_node,
2615 vec<tree> *vec_oprnds,
2616 unsigned int op_num, unsigned int number_of_vectors,
2617 int reduc_index)
2619 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2620 gimple *stmt = stmts[0];
2621 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2622 unsigned nunits;
2623 tree vec_cst;
2624 tree *elts;
2625 unsigned j, number_of_places_left_in_vector;
2626 tree vector_type;
2627 tree vop;
2628 int group_size = stmts.length ();
2629 unsigned int vec_num, i;
2630 unsigned number_of_copies = 1;
2631 vec<tree> voprnds;
2632 voprnds.create (number_of_vectors);
2633 bool constant_p, is_store;
2634 tree neutral_op = NULL;
2635 enum tree_code code = gimple_expr_code (stmt);
2636 gimple *def_stmt;
2637 struct loop *loop;
2638 gimple_seq ctor_seq = NULL;
2640 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2641 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2643 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2644 && reduc_index != -1)
2646 op_num = reduc_index;
2647 op = gimple_op (stmt, op_num + 1);
2648 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2649 we need either neutral operands or the original operands. See
2650 get_initial_def_for_reduction() for details. */
2651 switch (code)
2653 case WIDEN_SUM_EXPR:
2654 case DOT_PROD_EXPR:
2655 case SAD_EXPR:
2656 case PLUS_EXPR:
2657 case MINUS_EXPR:
2658 case BIT_IOR_EXPR:
2659 case BIT_XOR_EXPR:
2660 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2661 neutral_op = build_real (TREE_TYPE (op), dconst0);
2662 else
2663 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2665 break;
2667 case MULT_EXPR:
2668 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2669 neutral_op = build_real (TREE_TYPE (op), dconst1);
2670 else
2671 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2673 break;
2675 case BIT_AND_EXPR:
2676 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2677 break;
2679 /* For MIN/MAX we don't have an easy neutral operand but
2680 the initial values can be used fine here. Only for
2681 a reduction chain we have to force a neutral element. */
2682 case MAX_EXPR:
2683 case MIN_EXPR:
2684 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2685 neutral_op = NULL;
2686 else
2688 def_stmt = SSA_NAME_DEF_STMT (op);
2689 loop = (gimple_bb (stmt))->loop_father;
2690 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2691 loop_preheader_edge (loop));
2693 break;
2695 default:
2696 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2697 neutral_op = NULL;
2701 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2703 is_store = true;
2704 op = gimple_assign_rhs1 (stmt);
2706 else
2707 is_store = false;
2709 gcc_assert (op);
2711 if (CONSTANT_CLASS_P (op))
2712 constant_p = true;
2713 else
2714 constant_p = false;
2716 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2717 created vectors. It is greater than 1 if unrolling is performed.
2719 For example, we have two scalar operands, s1 and s2 (e.g., group of
2720 strided accesses of size two), while NUNITS is four (i.e., four scalars
2721 of this type can be packed in a vector). The output vector will contain
2722 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2723 will be 2).
2725 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2726 containing the operands.
2728 For example, NUNITS is four as before, and the group size is 8
2729 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2730 {s5, s6, s7, s8}. */
2732 number_of_copies = nunits * number_of_vectors / group_size;
2734 number_of_places_left_in_vector = nunits;
2735 elts = XALLOCAVEC (tree, nunits);
2736 bool place_after_defs = false;
2737 for (j = 0; j < number_of_copies; j++)
2739 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2741 if (is_store)
2742 op = gimple_assign_rhs1 (stmt);
2743 else
2745 switch (code)
2747 case COND_EXPR:
2748 if (op_num == 0 || op_num == 1)
2750 tree cond = gimple_assign_rhs1 (stmt);
2751 op = TREE_OPERAND (cond, op_num);
2753 else
2755 if (op_num == 2)
2756 op = gimple_assign_rhs2 (stmt);
2757 else
2758 op = gimple_assign_rhs3 (stmt);
2760 break;
2762 case CALL_EXPR:
2763 op = gimple_call_arg (stmt, op_num);
2764 break;
2766 case LSHIFT_EXPR:
2767 case RSHIFT_EXPR:
2768 case LROTATE_EXPR:
2769 case RROTATE_EXPR:
2770 op = gimple_op (stmt, op_num + 1);
2771 /* Unlike the other binary operators, shifts/rotates have
2772 the shift count being int, instead of the same type as
2773 the lhs, so make sure the scalar is the right type if
2774 we are dealing with vectors of
2775 long long/long/short/char. */
2776 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2777 op = fold_convert (TREE_TYPE (vector_type), op);
2778 break;
2780 default:
2781 op = gimple_op (stmt, op_num + 1);
2782 break;
2786 if (reduc_index != -1)
2788 loop = (gimple_bb (stmt))->loop_father;
2789 def_stmt = SSA_NAME_DEF_STMT (op);
2791 gcc_assert (loop);
2793 /* Get the def before the loop. In reduction chain we have only
2794 one initial value. */
2795 if ((j != (number_of_copies - 1)
2796 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2797 && i != 0))
2798 && neutral_op)
2799 op = neutral_op;
2800 else
2801 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2802 loop_preheader_edge (loop));
2805 /* Create 'vect_ = {op0,op1,...,opn}'. */
2806 number_of_places_left_in_vector--;
2807 tree orig_op = op;
2808 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2810 if (CONSTANT_CLASS_P (op))
2812 op = fold_unary (VIEW_CONVERT_EXPR,
2813 TREE_TYPE (vector_type), op);
2814 gcc_assert (op && CONSTANT_CLASS_P (op));
2816 else
2818 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2819 gimple *init_stmt;
2820 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2821 init_stmt
2822 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2823 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2824 op = new_temp;
2827 elts[number_of_places_left_in_vector] = op;
2828 if (!CONSTANT_CLASS_P (op))
2829 constant_p = false;
2830 if (TREE_CODE (orig_op) == SSA_NAME
2831 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2832 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2833 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2834 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2835 place_after_defs = true;
2837 if (number_of_places_left_in_vector == 0)
2839 number_of_places_left_in_vector = nunits;
2841 if (constant_p)
2842 vec_cst = build_vector (vector_type, elts);
2843 else
2845 vec<constructor_elt, va_gc> *v;
2846 unsigned k;
2847 vec_alloc (v, nunits);
2848 for (k = 0; k < nunits; ++k)
2849 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2850 vec_cst = build_constructor (vector_type, v);
2852 tree init;
2853 gimple_stmt_iterator gsi;
2854 if (place_after_defs)
2856 gsi = gsi_for_stmt
2857 (vect_find_last_scalar_stmt_in_slp (slp_node));
2858 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2860 else
2861 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2862 if (ctor_seq != NULL)
2864 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2865 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2866 GSI_SAME_STMT);
2867 ctor_seq = NULL;
2869 voprnds.quick_push (init);
2870 place_after_defs = false;
2875 /* Since the vectors are created in the reverse order, we should invert
2876 them. */
2877 vec_num = voprnds.length ();
2878 for (j = vec_num; j != 0; j--)
2880 vop = voprnds[j - 1];
2881 vec_oprnds->quick_push (vop);
2884 voprnds.release ();
2886 /* In case that VF is greater than the unrolling factor needed for the SLP
2887 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2888 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2889 to replicate the vectors. */
2890 while (number_of_vectors > vec_oprnds->length ())
2892 tree neutral_vec = NULL;
2894 if (neutral_op)
2896 if (!neutral_vec)
2897 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2899 vec_oprnds->quick_push (neutral_vec);
2901 else
2903 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2904 vec_oprnds->quick_push (vop);
2910 /* Get vectorized definitions from SLP_NODE that contains corresponding
2911 vectorized def-stmts. */
2913 static void
2914 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2916 tree vec_oprnd;
2917 gimple *vec_def_stmt;
2918 unsigned int i;
2920 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2922 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2924 gcc_assert (vec_def_stmt);
2925 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2926 vec_oprnds->quick_push (vec_oprnd);
2931 /* Get vectorized definitions for SLP_NODE.
2932 If the scalar definitions are loop invariants or constants, collect them and
2933 call vect_get_constant_vectors() to create vector stmts.
2934 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2935 must be stored in the corresponding child of SLP_NODE, and we call
2936 vect_get_slp_vect_defs () to retrieve them. */
2938 void
2939 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2940 vec<vec<tree> > *vec_oprnds, int reduc_index)
2942 gimple *first_stmt;
2943 int number_of_vects = 0, i;
2944 unsigned int child_index = 0;
2945 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2946 slp_tree child = NULL;
2947 vec<tree> vec_defs;
2948 tree oprnd;
2949 bool vectorized_defs;
2951 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2952 FOR_EACH_VEC_ELT (ops, i, oprnd)
2954 /* For each operand we check if it has vectorized definitions in a child
2955 node or we need to create them (for invariants and constants). We
2956 check if the LHS of the first stmt of the next child matches OPRND.
2957 If it does, we found the correct child. Otherwise, we call
2958 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2959 to check this child node for the next operand. */
2960 vectorized_defs = false;
2961 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2963 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2965 /* We have to check both pattern and original def, if available. */
2966 if (child)
2968 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2969 gimple *related
2970 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2972 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2973 || (related
2974 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2976 /* The number of vector defs is determined by the number of
2977 vector statements in the node from which we get those
2978 statements. */
2979 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2980 vectorized_defs = true;
2981 child_index++;
2984 else
2985 child_index++;
2988 if (!vectorized_defs)
2990 if (i == 0)
2992 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2993 /* Number of vector stmts was calculated according to LHS in
2994 vect_schedule_slp_instance (), fix it by replacing LHS with
2995 RHS, if necessary. See vect_get_smallest_scalar_type () for
2996 details. */
2997 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2998 &rhs_size_unit);
2999 if (rhs_size_unit != lhs_size_unit)
3001 number_of_vects *= rhs_size_unit;
3002 number_of_vects /= lhs_size_unit;
3007 /* Allocate memory for vectorized defs. */
3008 vec_defs = vNULL;
3009 vec_defs.create (number_of_vects);
3011 /* For reduction defs we call vect_get_constant_vectors (), since we are
3012 looking for initial loop invariant values. */
3013 if (vectorized_defs && reduc_index == -1)
3014 /* The defs are already vectorized. */
3015 vect_get_slp_vect_defs (child, &vec_defs);
3016 else
3017 /* Build vectors from scalar defs. */
3018 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3019 number_of_vects, reduc_index);
3021 vec_oprnds->quick_push (vec_defs);
3023 /* For reductions, we only need initial values. */
3024 if (reduc_index != -1)
3025 return;
3030 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3031 building a vector of type MASK_TYPE from it) and two input vectors placed in
3032 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3033 shifting by STRIDE elements of DR_CHAIN for every copy.
3034 (STRIDE is the number of vectorized stmts for NODE divided by the number of
3035 copies).
3036 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3037 the created stmts must be inserted. */
3039 static inline void
3040 vect_create_mask_and_perm (gimple *stmt,
3041 tree mask, int first_vec_indx, int second_vec_indx,
3042 gimple_stmt_iterator *gsi, slp_tree node,
3043 tree vectype, vec<tree> dr_chain,
3044 int ncopies, int vect_stmts_counter)
3046 tree perm_dest;
3047 gimple *perm_stmt = NULL;
3048 int i, stride;
3049 tree first_vec, second_vec, data_ref;
3051 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3053 /* Initialize the vect stmts of NODE to properly insert the generated
3054 stmts later. */
3055 for (i = SLP_TREE_VEC_STMTS (node).length ();
3056 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3057 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3059 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3060 for (i = 0; i < ncopies; i++)
3062 first_vec = dr_chain[first_vec_indx];
3063 second_vec = dr_chain[second_vec_indx];
3065 /* Generate the permute statement. */
3066 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3067 first_vec, second_vec, mask);
3068 data_ref = make_ssa_name (perm_dest, perm_stmt);
3069 gimple_set_lhs (perm_stmt, data_ref);
3070 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3072 /* Store the vector statement in NODE. */
3073 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3075 first_vec_indx += stride;
3076 second_vec_indx += stride;
3081 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3082 return in CURRENT_MASK_ELEMENT its equivalent in target specific
3083 representation. Check that the mask is valid and return FALSE if not.
3084 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3085 the next vector, i.e., the current first vector is not needed. */
3087 static bool
3088 vect_get_mask_element (gimple *stmt, int first_mask_element, int m,
3089 int mask_nunits, bool only_one_vec, int index,
3090 unsigned char *mask, int *current_mask_element,
3091 bool *need_next_vector, int *number_of_mask_fixes,
3092 bool *mask_fixed, bool *needs_first_vector)
3094 int i;
3096 /* Convert to target specific representation. */
3097 *current_mask_element = first_mask_element + m;
3098 /* Adjust the value in case it's a mask for second and third vectors. */
3099 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3101 if (*current_mask_element < 0)
3103 if (dump_enabled_p ())
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106 "permutation requires past vector ");
3107 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3108 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3110 return false;
3113 if (*current_mask_element < mask_nunits)
3114 *needs_first_vector = true;
3116 /* We have only one input vector to permute but the mask accesses values in
3117 the next vector as well. */
3118 if (only_one_vec && *current_mask_element >= mask_nunits)
3120 if (dump_enabled_p ())
3122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3123 "permutation requires at least two vectors ");
3124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3125 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3128 return false;
3131 /* The mask requires the next vector. */
3132 while (*current_mask_element >= mask_nunits * 2)
3134 if (*needs_first_vector || *mask_fixed)
3136 /* We either need the first vector too or have already moved to the
3137 next vector. In both cases, this permutation needs three
3138 vectors. */
3139 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142 "permutation requires at "
3143 "least three vectors ");
3144 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3145 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3148 return false;
3151 /* We move to the next vector, dropping the first one and working with
3152 the second and the third - we need to adjust the values of the mask
3153 accordingly. */
3154 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3156 for (i = 0; i < index; i++)
3157 mask[i] -= mask_nunits * *number_of_mask_fixes;
3159 (*number_of_mask_fixes)++;
3160 *mask_fixed = true;
3163 *need_next_vector = *mask_fixed;
3165 /* This was the last element of this mask. Start a new one. */
3166 if (index == mask_nunits - 1)
3168 *number_of_mask_fixes = 1;
3169 *mask_fixed = false;
3170 *needs_first_vector = false;
3173 return true;
3177 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3178 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3179 permute statements for the SLP node NODE of the SLP instance
3180 SLP_NODE_INSTANCE. */
3182 bool
3183 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3184 gimple_stmt_iterator *gsi, int vf,
3185 slp_instance slp_node_instance, bool analyze_only)
3187 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3189 tree mask_element_type = NULL_TREE, mask_type;
3190 int i, j, k, nunits, vec_index = 0;
3191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3192 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3193 int first_mask_element;
3194 int index, unroll_factor, current_mask_element, ncopies;
3195 unsigned char *mask;
3196 bool only_one_vec = false, need_next_vector = false;
3197 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3198 int number_of_mask_fixes = 1;
3199 bool mask_fixed = false;
3200 bool needs_first_vector = false;
3201 machine_mode mode;
3203 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3204 return false;
3206 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3208 mode = TYPE_MODE (vectype);
3210 if (!can_vec_perm_p (mode, false, NULL))
3212 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3215 "no vect permute for ");
3216 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3217 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3219 return false;
3222 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3223 same size as the vector element being permuted. */
3224 mask_element_type = lang_hooks.types.type_for_mode
3225 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3226 mask_type = get_vectype_for_scalar_type (mask_element_type);
3227 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3228 mask = XALLOCAVEC (unsigned char, nunits);
3229 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3231 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3232 unrolling factor. */
3233 orig_vec_stmts_num
3234 = (STMT_VINFO_GROUP_SIZE (stmt_info)
3235 * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
3236 + nunits - 1) / nunits;
3237 if (orig_vec_stmts_num == 1)
3238 only_one_vec = true;
3240 /* Number of copies is determined by the final vectorization factor
3241 relatively to SLP_NODE_INSTANCE unrolling factor. */
3242 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3244 /* Generate permutation masks for every NODE. Number of masks for each NODE
3245 is equal to GROUP_SIZE.
3246 E.g., we have a group of three nodes with three loads from the same
3247 location in each node, and the vector size is 4. I.e., we have a
3248 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3249 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3250 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3253 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3254 The last mask is illegal since we assume two operands for permute
3255 operation, and the mask element values can't be outside that range.
3256 Hence, the last mask must be converted into {2,5,5,5}.
3257 For the first two permutations we need the first and the second input
3258 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3259 we need the second and the third vectors: {b1,c1,a2,b2} and
3260 {c2,a3,b3,c3}. */
3263 index = 0;
3264 vect_stmts_counter = 0;
3265 vec_index = 0;
3266 first_vec_index = vec_index++;
3267 if (only_one_vec)
3268 second_vec_index = first_vec_index;
3269 else
3270 second_vec_index = vec_index++;
3272 for (j = 0; j < unroll_factor; j++)
3274 for (k = 0; k < group_size; k++)
3276 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3277 first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
3278 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3279 nunits, only_one_vec, index,
3280 mask, &current_mask_element,
3281 &need_next_vector,
3282 &number_of_mask_fixes, &mask_fixed,
3283 &needs_first_vector))
3284 return false;
3285 gcc_assert (current_mask_element >= 0
3286 && current_mask_element < 2 * nunits);
3287 mask[index++] = current_mask_element;
3289 if (index == nunits)
3291 index = 0;
3292 if (!can_vec_perm_p (mode, false, mask))
3294 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3297 vect_location,
3298 "unsupported vect permute { ");
3299 for (i = 0; i < nunits; ++i)
3300 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3301 mask[i]);
3302 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3304 return false;
3307 if (!analyze_only)
3309 int l;
3310 tree mask_vec, *mask_elts;
3311 mask_elts = XALLOCAVEC (tree, nunits);
3312 for (l = 0; l < nunits; ++l)
3313 mask_elts[l] = build_int_cst (mask_element_type,
3314 mask[l]);
3315 mask_vec = build_vector (mask_type, mask_elts);
3317 if (need_next_vector)
3319 first_vec_index = second_vec_index;
3320 second_vec_index = vec_index;
3323 vect_create_mask_and_perm (stmt,
3324 mask_vec, first_vec_index, second_vec_index,
3325 gsi, node, vectype, dr_chain,
3326 ncopies, vect_stmts_counter++);
3333 return true;
3338 /* Vectorize SLP instance tree in postorder. */
3340 static bool
3341 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3342 unsigned int vectorization_factor)
3344 gimple *stmt;
3345 bool grouped_store, is_store;
3346 gimple_stmt_iterator si;
3347 stmt_vec_info stmt_info;
3348 unsigned int vec_stmts_size, nunits, group_size;
3349 tree vectype;
3350 int i;
3351 slp_tree child;
3353 if (!node)
3354 return false;
3356 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3357 vect_schedule_slp_instance (child, instance, vectorization_factor);
3359 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3360 stmt_info = vinfo_for_stmt (stmt);
3362 /* VECTYPE is the type of the destination. */
3363 vectype = STMT_VINFO_VECTYPE (stmt_info);
3364 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3365 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3367 /* For each SLP instance calculate number of vector stmts to be created
3368 for the scalar stmts in each node of the SLP tree. Number of vector
3369 elements in one vector iteration is the number of scalar elements in
3370 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3371 size.
3372 Unless this is a SLP reduction in which case the number of vector
3373 stmts is equal to the number of vector stmts of the children. */
3374 if (GROUP_FIRST_ELEMENT (stmt_info)
3375 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3376 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3377 else
3378 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3380 if (!SLP_TREE_VEC_STMTS (node).exists ())
3382 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3383 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3386 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_NOTE,vect_location,
3389 "------>vectorizing SLP node starting from: ");
3390 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3391 dump_printf (MSG_NOTE, "\n");
3394 /* Vectorized stmts go before the last scalar stmt which is where
3395 all uses are ready. */
3396 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3398 /* Mark the first element of the reduction chain as reduction to properly
3399 transform the node. In the analysis phase only the last element of the
3400 chain is marked as reduction. */
3401 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3402 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3404 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3405 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3408 /* Handle two-operation SLP nodes by vectorizing the group with
3409 both operations and then performing a merge. */
3410 if (SLP_TREE_TWO_OPERATORS (node))
3412 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3413 enum tree_code ocode;
3414 gimple *ostmt;
3415 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3416 bool allsame = true;
3417 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3418 if (gimple_assign_rhs_code (ostmt) != code0)
3420 mask[i] = 1;
3421 allsame = false;
3422 ocode = gimple_assign_rhs_code (ostmt);
3424 else
3425 mask[i] = 0;
3426 if (!allsame)
3428 vec<gimple *> v0;
3429 vec<gimple *> v1;
3430 unsigned j;
3431 tree tmask = NULL_TREE;
3432 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3433 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3434 SLP_TREE_VEC_STMTS (node).truncate (0);
3435 gimple_assign_set_rhs_code (stmt, ocode);
3436 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3437 gimple_assign_set_rhs_code (stmt, code0);
3438 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3439 SLP_TREE_VEC_STMTS (node).truncate (0);
3440 tree meltype = build_nonstandard_integer_type
3441 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3442 tree mvectype = get_same_sized_vectype (meltype, vectype);
3443 unsigned k = 0, l;
3444 for (j = 0; j < v0.length (); ++j)
3446 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3447 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3449 if (k >= group_size)
3450 k = 0;
3451 melts[l] = build_int_cst
3452 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3454 tmask = build_vector (mvectype, melts);
3456 /* ??? Not all targets support a VEC_PERM_EXPR with a
3457 constant mask that would translate to a vec_merge RTX
3458 (with their vec_perm_const_ok). We can either not
3459 vectorize in that case or let veclower do its job.
3460 Unfortunately that isn't too great and at least for
3461 plus/minus we'd eventually like to match targets
3462 vector addsub instructions. */
3463 gimple *vstmt;
3464 vstmt = gimple_build_assign (make_ssa_name (vectype),
3465 VEC_PERM_EXPR,
3466 gimple_assign_lhs (v0[j]),
3467 gimple_assign_lhs (v1[j]), tmask);
3468 vect_finish_stmt_generation (stmt, vstmt, &si);
3469 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3471 v0.release ();
3472 v1.release ();
3473 return false;
3476 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3477 return is_store;
3480 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3481 For loop vectorization this is done in vectorizable_call, but for SLP
3482 it needs to be deferred until end of vect_schedule_slp, because multiple
3483 SLP instances may refer to the same scalar stmt. */
3485 static void
3486 vect_remove_slp_scalar_calls (slp_tree node)
3488 gimple *stmt, *new_stmt;
3489 gimple_stmt_iterator gsi;
3490 int i;
3491 slp_tree child;
3492 tree lhs;
3493 stmt_vec_info stmt_info;
3495 if (!node)
3496 return;
3498 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3499 vect_remove_slp_scalar_calls (child);
3501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3503 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3504 continue;
3505 stmt_info = vinfo_for_stmt (stmt);
3506 if (stmt_info == NULL
3507 || is_pattern_stmt_p (stmt_info)
3508 || !PURE_SLP_STMT (stmt_info))
3509 continue;
3510 lhs = gimple_call_lhs (stmt);
3511 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3512 set_vinfo_for_stmt (new_stmt, stmt_info);
3513 set_vinfo_for_stmt (stmt, NULL);
3514 STMT_VINFO_STMT (stmt_info) = new_stmt;
3515 gsi = gsi_for_stmt (stmt);
3516 gsi_replace (&gsi, new_stmt, false);
3517 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3521 /* Generate vector code for all SLP instances in the loop/basic block. */
3523 bool
3524 vect_schedule_slp (vec_info *vinfo)
3526 vec<slp_instance> slp_instances;
3527 slp_instance instance;
3528 unsigned int i, vf;
3529 bool is_store = false;
3531 slp_instances = vinfo->slp_instances;
3532 if (is_a <loop_vec_info> (vinfo))
3533 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3534 else
3535 vf = 1;
3537 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3539 /* Schedule the tree of INSTANCE. */
3540 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3541 instance, vf);
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_NOTE, vect_location,
3544 "vectorizing stmts using SLP.\n");
3547 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3549 slp_tree root = SLP_INSTANCE_TREE (instance);
3550 gimple *store;
3551 unsigned int j;
3552 gimple_stmt_iterator gsi;
3554 /* Remove scalar call stmts. Do not do this for basic-block
3555 vectorization as not all uses may be vectorized.
3556 ??? Why should this be necessary? DCE should be able to
3557 remove the stmts itself.
3558 ??? For BB vectorization we can as well remove scalar
3559 stmts starting from the SLP tree root if they have no
3560 uses. */
3561 if (is_a <loop_vec_info> (vinfo))
3562 vect_remove_slp_scalar_calls (root);
3564 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3565 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3567 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3568 break;
3570 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3571 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3572 /* Free the attached stmt_vec_info and remove the stmt. */
3573 gsi = gsi_for_stmt (store);
3574 unlink_stmt_vdef (store);
3575 gsi_remove (&gsi, true);
3576 release_defs (store);
3577 free_stmt_vec_info (store);
3581 return is_store;