poly_int: vectoriser vf and uf
[official-gcc.git] / gcc / tree-vect-slp.c
blob4f12995f8496cc21a48e50680a481b1863b4768e
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
50 static void
51 vect_free_slp_tree (slp_tree node)
53 int i;
54 slp_tree child;
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
57 vect_free_slp_tree (child);
59 gimple *stmt;
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 SLP_TREE_CHILDREN (node).release ();
69 SLP_TREE_SCALAR_STMTS (node).release ();
70 SLP_TREE_VEC_STMTS (node).release ();
71 SLP_TREE_LOAD_PERMUTATION (node).release ();
73 free (node);
77 /* Free the memory allocated for the SLP instance. */
79 void
80 vect_free_slp_instance (slp_instance instance)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
83 SLP_INSTANCE_LOADS (instance).release ();
84 free (instance);
88 /* Create an SLP node for SCALAR_STMTS. */
90 static slp_tree
91 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
93 slp_tree node;
94 gimple *stmt = scalar_stmts[0];
95 unsigned int nops;
97 if (is_gimple_call (stmt))
98 nops = gimple_call_num_args (stmt);
99 else if (is_gimple_assign (stmt))
101 nops = gimple_num_ops (stmt) - 1;
102 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
103 nops++;
105 else if (gimple_code (stmt) == GIMPLE_PHI)
106 nops = 0;
107 else
108 return NULL;
110 node = XNEW (struct _slp_tree);
111 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
112 SLP_TREE_VEC_STMTS (node).create (0);
113 SLP_TREE_CHILDREN (node).create (nops);
114 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
115 SLP_TREE_TWO_OPERATORS (node) = false;
116 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
118 unsigned i;
119 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
122 return node;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
128 node. */
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec<gimple *> def_stmts;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
135 stmt. */
136 tree first_op_type;
137 enum vect_def_type first_dt;
138 bool first_pattern;
139 bool second_pattern;
140 } *slp_oprnd_info;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 operand. */
145 static vec<slp_oprnd_info>
146 vect_create_oprnd_info (int nops, int group_size)
148 int i;
149 slp_oprnd_info oprnd_info;
150 vec<slp_oprnd_info> oprnds_info;
152 oprnds_info.create (nops);
153 for (i = 0; i < nops; i++)
155 oprnd_info = XNEW (struct _slp_oprnd_info);
156 oprnd_info->def_stmts.create (group_size);
157 oprnd_info->first_dt = vect_uninitialized_def;
158 oprnd_info->first_op_type = NULL_TREE;
159 oprnd_info->first_pattern = false;
160 oprnd_info->second_pattern = false;
161 oprnds_info.quick_push (oprnd_info);
164 return oprnds_info;
168 /* Free operands info. */
170 static void
171 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
173 int i;
174 slp_oprnd_info oprnd_info;
176 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
178 oprnd_info->def_stmts.release ();
179 XDELETE (oprnd_info);
182 oprnds_info.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
189 static int
190 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
192 gimple *next_stmt = first_stmt;
193 int result = 0;
195 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
196 return -1;
200 if (next_stmt == stmt)
201 return result;
202 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
203 if (next_stmt)
204 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
206 while (next_stmt);
208 return -1;
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
222 ok return 0. */
224 static int
225 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
226 gimple *stmt, unsigned stmt_num,
227 vec<slp_oprnd_info> *oprnds_info)
229 tree oprnd;
230 unsigned int i, number_of_oprnds;
231 gimple *def_stmt;
232 enum vect_def_type dt = vect_uninitialized_def;
233 bool pattern = false;
234 slp_oprnd_info oprnd_info;
235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
238 bool first = stmt_num == 0;
239 bool second = stmt_num == 1;
241 if (is_gimple_call (stmt))
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
246 else if (is_gimple_assign (stmt))
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
255 first_op_cond = true;
256 number_of_oprnds++;
258 else
259 commutative = commutative_tree_code (code);
261 else
262 return -1;
264 bool swapped = (*swap != 0);
265 gcc_assert (!swapped || first_op_cond);
266 for (i = 0; i < number_of_oprnds; i++)
268 again:
269 if (first_op_cond)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map = maps[*swap];
275 if (i < 2)
276 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
277 else
278 oprnd = gimple_op (stmt, map[i]);
280 else
281 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
283 oprnd_info = (*oprnds_info)[i];
285 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
295 return -1;
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
300 pattern. */
301 if (def_stmt && gimple_bb (def_stmt)
302 && vect_stmt_in_region_p (vinfo, def_stmt)
303 && vinfo_for_stmt (def_stmt)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
308 pattern = true;
309 if (!first && !oprnd_info->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info->first_dt != vect_reduction_def
313 || (!second && !oprnd_info->second_pattern)))
315 if (i == 0
316 && !swapped
317 && commutative)
319 swapped = true;
320 goto again;
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
332 return 1;
335 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
336 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
338 if (dt == vect_unknown_def_type)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
342 "Unsupported pattern.\n");
343 return -1;
346 switch (gimple_code (def_stmt))
348 case GIMPLE_PHI:
349 case GIMPLE_ASSIGN:
350 break;
352 default:
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "unsupported defining stmt:\n");
356 return -1;
360 if (second)
361 oprnd_info->second_pattern = pattern;
363 if (first)
365 oprnd_info->first_dt = dt;
366 oprnd_info->first_pattern = pattern;
367 oprnd_info->first_op_type = TREE_TYPE (oprnd);
369 else
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info->first_dt != dt
377 && !(oprnd_info->first_dt == vect_reduction_def
378 && dt == vect_internal_def)
379 && !((oprnd_info->first_dt == vect_external_def
380 || oprnd_info->first_dt == vect_constant_def)
381 && (dt == vect_external_def
382 || dt == vect_constant_def)))
383 || !types_compatible_p (oprnd_info->first_op_type,
384 TREE_TYPE (oprnd))))
386 /* Try swapping operands if we got a mismatch. */
387 if (i == 0
388 && !swapped
389 && commutative)
391 swapped = true;
392 goto again;
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: different types\n");
399 return 1;
403 /* Check the types of the definitions. */
404 switch (dt)
406 case vect_constant_def:
407 case vect_external_def:
408 break;
410 case vect_reduction_def:
411 case vect_induction_def:
412 case vect_internal_def:
413 oprnd_info->def_stmts.quick_push (def_stmt);
414 break;
416 default:
417 /* FORNOW: Not supported. */
418 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "Build SLP failed: illegal type of def ");
422 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
423 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
426 return -1;
430 /* Swap operands. */
431 if (swapped)
433 /* If there are already uses of this stmt in a SLP instance then
434 we've committed to the operand order and can't swap it. */
435 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
437 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 "Build SLP failed: cannot swap operands of "
441 "shared stmt ");
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
444 return -1;
447 if (first_op_cond)
449 tree cond = gimple_assign_rhs1 (stmt);
450 enum tree_code code = TREE_CODE (cond);
452 /* Swap. */
453 if (*swap == 1)
455 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
456 &TREE_OPERAND (cond, 1));
457 TREE_SET_CODE (cond, swap_tree_comparison (code));
459 /* Invert. */
460 else
462 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
463 gimple_assign_rhs3_ptr (stmt));
464 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
465 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
466 gcc_assert (code != ERROR_MARK);
467 TREE_SET_CODE (cond, code);
470 else
471 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
472 gimple_assign_rhs2_ptr (stmt));
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location,
476 "swapped operands to match def types in ");
477 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
481 *swap = swapped;
482 return 0;
485 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
486 caller's attempt to find the vector type in STMT with the narrowest
487 element type. Return true if VECTYPE is nonnull and if it is valid
488 for VINFO. When returning true, update MAX_NUNITS to reflect the
489 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
490 as for vect_build_slp_tree. */
492 static bool
493 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
494 tree vectype, unsigned int *max_nunits)
496 if (!vectype)
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "Build SLP failed: unsupported data-type in ");
502 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
505 /* Fatal mismatch. */
506 return false;
509 /* If populating the vector type requires unrolling then fail
510 before adjusting *max_nunits for basic-block vectorization. */
511 if (is_a <bb_vec_info> (vinfo)
512 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 "Build SLP failed: unrolling required "
516 "in basic block SLP\n");
517 /* Fatal mismatch. */
518 return false;
521 /* In case of multiple types we need to detect the smallest type. */
522 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
523 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
525 return true;
528 /* Verify if the scalar stmts STMTS are isomorphic, require data
529 permutation or are of unsupported types of operation. Return
530 true if they are, otherwise return false and indicate in *MATCHES
531 which stmts are not isomorphic to the first one. If MATCHES[0]
532 is false then this indicates the comparison could not be
533 carried out or the stmts will never be vectorized by SLP.
535 Note COND_EXPR is possibly ismorphic to another one after swapping its
536 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
537 the first stmt by swapping the two operands of comparison; set SWAP[i]
538 to 2 if stmt I is isormorphic to the first stmt by inverting the code
539 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
540 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
542 static bool
543 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
544 vec<gimple *> stmts, unsigned int group_size,
545 unsigned nops, unsigned int *max_nunits,
546 bool *matches, bool *two_operators)
548 unsigned int i;
549 gimple *first_stmt = stmts[0], *stmt = stmts[0];
550 enum tree_code first_stmt_code = ERROR_MARK;
551 enum tree_code alt_stmt_code = ERROR_MARK;
552 enum tree_code rhs_code = ERROR_MARK;
553 enum tree_code first_cond_code = ERROR_MARK;
554 tree lhs;
555 bool need_same_oprnds = false;
556 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
557 optab optab;
558 int icode;
559 machine_mode optab_op2_mode;
560 machine_mode vec_mode;
561 HOST_WIDE_INT dummy;
562 gimple *first_load = NULL, *prev_first_load = NULL;
564 /* For every stmt in NODE find its def stmt/s. */
565 FOR_EACH_VEC_ELT (stmts, i, stmt)
567 swap[i] = 0;
568 matches[i] = false;
570 if (dump_enabled_p ())
572 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
573 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
576 /* Fail to vectorize statements marked as unvectorizable. */
577 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
579 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 "Build SLP failed: unvectorizable statement ");
583 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
585 /* Fatal mismatch. */
586 matches[0] = false;
587 return false;
590 lhs = gimple_get_lhs (stmt);
591 if (lhs == NULL_TREE)
593 if (dump_enabled_p ())
595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
596 "Build SLP failed: not GIMPLE_ASSIGN nor "
597 "GIMPLE_CALL ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
600 /* Fatal mismatch. */
601 matches[0] = false;
602 return false;
605 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
606 vectype = get_vectype_for_scalar_type (scalar_type);
607 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
608 max_nunits))
610 /* Fatal mismatch. */
611 matches[0] = false;
612 return false;
615 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
617 rhs_code = CALL_EXPR;
618 if (gimple_call_internal_p (call_stmt)
619 || gimple_call_tail_p (call_stmt)
620 || gimple_call_noreturn_p (call_stmt)
621 || !gimple_call_nothrow_p (call_stmt)
622 || gimple_call_chain (call_stmt))
624 if (dump_enabled_p ())
626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
627 "Build SLP failed: unsupported call type ");
628 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
629 call_stmt, 0);
631 /* Fatal mismatch. */
632 matches[0] = false;
633 return false;
636 else
637 rhs_code = gimple_assign_rhs_code (stmt);
639 /* Check the operation. */
640 if (i == 0)
642 first_stmt_code = rhs_code;
644 /* Shift arguments should be equal in all the packed stmts for a
645 vector shift with scalar shift operand. */
646 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
647 || rhs_code == LROTATE_EXPR
648 || rhs_code == RROTATE_EXPR)
650 vec_mode = TYPE_MODE (vectype);
652 /* First see if we have a vector/vector shift. */
653 optab = optab_for_tree_code (rhs_code, vectype,
654 optab_vector);
656 if (!optab
657 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
659 /* No vector/vector shift, try for a vector/scalar shift. */
660 optab = optab_for_tree_code (rhs_code, vectype,
661 optab_scalar);
663 if (!optab)
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
667 "Build SLP failed: no optab.\n");
668 /* Fatal mismatch. */
669 matches[0] = false;
670 return false;
672 icode = (int) optab_handler (optab, vec_mode);
673 if (icode == CODE_FOR_nothing)
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
677 "Build SLP failed: "
678 "op not supported by target.\n");
679 /* Fatal mismatch. */
680 matches[0] = false;
681 return false;
683 optab_op2_mode = insn_data[icode].operand[2].mode;
684 if (!VECTOR_MODE_P (optab_op2_mode))
686 need_same_oprnds = true;
687 first_op1 = gimple_assign_rhs2 (stmt);
691 else if (rhs_code == WIDEN_LSHIFT_EXPR)
693 need_same_oprnds = true;
694 first_op1 = gimple_assign_rhs2 (stmt);
697 else
699 if (first_stmt_code != rhs_code
700 && alt_stmt_code == ERROR_MARK)
701 alt_stmt_code = rhs_code;
702 if (first_stmt_code != rhs_code
703 && (first_stmt_code != IMAGPART_EXPR
704 || rhs_code != REALPART_EXPR)
705 && (first_stmt_code != REALPART_EXPR
706 || rhs_code != IMAGPART_EXPR)
707 /* Handle mismatches in plus/minus by computing both
708 and merging the results. */
709 && !((first_stmt_code == PLUS_EXPR
710 || first_stmt_code == MINUS_EXPR)
711 && (alt_stmt_code == PLUS_EXPR
712 || alt_stmt_code == MINUS_EXPR)
713 && rhs_code == alt_stmt_code)
714 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
715 && (first_stmt_code == ARRAY_REF
716 || first_stmt_code == BIT_FIELD_REF
717 || first_stmt_code == INDIRECT_REF
718 || first_stmt_code == COMPONENT_REF
719 || first_stmt_code == MEM_REF)))
721 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "Build SLP failed: different operation "
725 "in stmt ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
728 "original stmt ");
729 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
730 first_stmt, 0);
732 /* Mismatch. */
733 continue;
736 if (need_same_oprnds
737 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different shift "
743 "arguments in ");
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
746 /* Mismatch. */
747 continue;
750 if (rhs_code == CALL_EXPR)
752 gimple *first_stmt = stmts[0];
753 if (gimple_call_num_args (stmt) != nops
754 || !operand_equal_p (gimple_call_fn (first_stmt),
755 gimple_call_fn (stmt), 0)
756 || gimple_call_fntype (first_stmt)
757 != gimple_call_fntype (stmt))
759 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "Build SLP failed: different calls in ");
763 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
764 stmt, 0);
766 /* Mismatch. */
767 continue;
772 /* Grouped store or load. */
773 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
775 if (REFERENCE_CLASS_P (lhs))
777 /* Store. */
780 else
782 /* Load. */
783 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
784 if (prev_first_load)
786 /* Check that there are no loads from different interleaving
787 chains in the same node. */
788 if (prev_first_load != first_load)
790 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
793 vect_location,
794 "Build SLP failed: different "
795 "interleaving chains in one node ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
797 stmt, 0);
799 /* Mismatch. */
800 continue;
803 else
804 prev_first_load = first_load;
806 } /* Grouped access. */
807 else
809 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
811 /* Not grouped load. */
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: not grouped load ");
816 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 /* FORNOW: Not grouped loads are not supported. */
820 /* Fatal mismatch. */
821 matches[0] = false;
822 return false;
825 /* Not memory operation. */
826 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
827 && TREE_CODE_CLASS (rhs_code) != tcc_unary
828 && TREE_CODE_CLASS (rhs_code) != tcc_expression
829 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
830 && rhs_code != CALL_EXPR)
832 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
835 "Build SLP failed: operation");
836 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
837 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
839 /* Fatal mismatch. */
840 matches[0] = false;
841 return false;
844 if (rhs_code == COND_EXPR)
846 tree cond_expr = gimple_assign_rhs1 (stmt);
847 enum tree_code cond_code = TREE_CODE (cond_expr);
848 enum tree_code swap_code = ERROR_MARK;
849 enum tree_code invert_code = ERROR_MARK;
851 if (i == 0)
852 first_cond_code = TREE_CODE (cond_expr);
853 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
855 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
856 swap_code = swap_tree_comparison (cond_code);
857 invert_code = invert_tree_comparison (cond_code, honor_nans);
860 if (first_cond_code == cond_code)
862 /* Isomorphic can be achieved by swapping. */
863 else if (first_cond_code == swap_code)
864 swap[i] = 1;
865 /* Isomorphic can be achieved by inverting. */
866 else if (first_cond_code == invert_code)
867 swap[i] = 2;
868 else
870 if (dump_enabled_p ())
872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
873 "Build SLP failed: different"
874 " operation");
875 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
876 stmt, 0);
878 /* Mismatch. */
879 continue;
884 matches[i] = true;
887 for (i = 0; i < group_size; ++i)
888 if (!matches[i])
889 return false;
891 /* If we allowed a two-operation SLP node verify the target can cope
892 with the permute we are going to use. */
893 if (alt_stmt_code != ERROR_MARK
894 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
896 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
897 vec_perm_builder sel (count, count, 1);
898 for (i = 0; i < count; ++i)
900 unsigned int elt = i;
901 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
902 elt += count;
903 sel.quick_push (elt);
905 vec_perm_indices indices (sel, 2, count);
906 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
908 for (i = 0; i < group_size; ++i)
909 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
911 matches[i] = false;
912 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
915 "Build SLP failed: different operation "
916 "in stmt ");
917 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
918 stmts[i], 0);
919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
920 "original stmt ");
921 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
922 first_stmt, 0);
925 return false;
927 *two_operators = true;
930 return true;
933 /* Traits for the hash_set to record failed SLP builds for a stmt set.
934 Note we never remove apart from at destruction time so we do not
935 need a special value for deleted that differs from empty. */
936 struct bst_traits
938 typedef vec <gimple *> value_type;
939 typedef vec <gimple *> compare_type;
940 static inline hashval_t hash (value_type);
941 static inline bool equal (value_type existing, value_type candidate);
942 static inline bool is_empty (value_type x) { return !x.exists (); }
943 static inline bool is_deleted (value_type x) { return !x.exists (); }
944 static inline void mark_empty (value_type &x) { x.release (); }
945 static inline void mark_deleted (value_type &x) { x.release (); }
946 static inline void remove (value_type &x) { x.release (); }
948 inline hashval_t
949 bst_traits::hash (value_type x)
951 inchash::hash h;
952 for (unsigned i = 0; i < x.length (); ++i)
953 h.add_int (gimple_uid (x[i]));
954 return h.end ();
956 inline bool
957 bst_traits::equal (value_type existing, value_type candidate)
959 if (existing.length () != candidate.length ())
960 return false;
961 for (unsigned i = 0; i < existing.length (); ++i)
962 if (existing[i] != candidate[i])
963 return false;
964 return true;
967 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
968 static scalar_stmts_set_t *bst_fail;
970 static slp_tree
971 vect_build_slp_tree_2 (vec_info *vinfo,
972 vec<gimple *> stmts, unsigned int group_size,
973 unsigned int *max_nunits,
974 vec<slp_tree> *loads,
975 bool *matches, unsigned *npermutes, unsigned *tree_size,
976 unsigned max_tree_size);
978 static slp_tree
979 vect_build_slp_tree (vec_info *vinfo,
980 vec<gimple *> stmts, unsigned int group_size,
981 unsigned int *max_nunits,
982 vec<slp_tree> *loads,
983 bool *matches, unsigned *npermutes, unsigned *tree_size,
984 unsigned max_tree_size)
986 if (bst_fail->contains (stmts))
987 return NULL;
988 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
989 loads, matches, npermutes, tree_size,
990 max_tree_size);
991 /* When SLP build fails for stmts record this, otherwise SLP build
992 can be exponential in time when we allow to construct parts from
993 scalars, see PR81723. */
994 if (! res)
996 vec <gimple *> x;
997 x.create (stmts.length ());
998 x.splice (stmts);
999 bst_fail->add (x);
1001 return res;
1004 /* Recursively build an SLP tree starting from NODE.
1005 Fail (and return a value not equal to zero) if def-stmts are not
1006 isomorphic, require data permutation or are of unsupported types of
1007 operation. Otherwise, return 0.
1008 The value returned is the depth in the SLP tree where a mismatch
1009 was found. */
1011 static slp_tree
1012 vect_build_slp_tree_2 (vec_info *vinfo,
1013 vec<gimple *> stmts, unsigned int group_size,
1014 unsigned int *max_nunits,
1015 vec<slp_tree> *loads,
1016 bool *matches, unsigned *npermutes, unsigned *tree_size,
1017 unsigned max_tree_size)
1019 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
1020 gimple *stmt;
1021 slp_tree node;
1023 matches[0] = false;
1025 stmt = stmts[0];
1026 if (is_gimple_call (stmt))
1027 nops = gimple_call_num_args (stmt);
1028 else if (is_gimple_assign (stmt))
1030 nops = gimple_num_ops (stmt) - 1;
1031 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1032 nops++;
1034 else if (gimple_code (stmt) == GIMPLE_PHI)
1035 nops = 0;
1036 else
1037 return NULL;
1039 /* If the SLP node is a PHI (induction or reduction), terminate
1040 the recursion. */
1041 if (gimple_code (stmt) == GIMPLE_PHI)
1043 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1044 tree vectype = get_vectype_for_scalar_type (scalar_type);
1045 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1046 max_nunits))
1047 return NULL;
1049 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1050 /* Induction from different IVs is not supported. */
1051 if (def_type == vect_induction_def)
1053 FOR_EACH_VEC_ELT (stmts, i, stmt)
1054 if (stmt != stmts[0])
1055 return NULL;
1057 else
1059 /* Else def types have to match. */
1060 FOR_EACH_VEC_ELT (stmts, i, stmt)
1062 /* But for reduction chains only check on the first stmt. */
1063 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1065 continue;
1066 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1067 return NULL;
1070 node = vect_create_new_slp_node (stmts);
1071 return node;
1075 bool two_operators = false;
1076 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1077 if (!vect_build_slp_tree_1 (vinfo, swap,
1078 stmts, group_size, nops,
1079 &this_max_nunits, matches, &two_operators))
1080 return NULL;
1082 /* If the SLP node is a load, terminate the recursion. */
1083 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1084 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1086 *max_nunits = this_max_nunits;
1087 node = vect_create_new_slp_node (stmts);
1088 loads->safe_push (node);
1089 return node;
1092 /* Get at the operands, verifying they are compatible. */
1093 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1094 slp_oprnd_info oprnd_info;
1095 FOR_EACH_VEC_ELT (stmts, i, stmt)
1097 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1098 stmt, i, &oprnds_info);
1099 if (res != 0)
1100 matches[(res == -1) ? 0 : i] = false;
1101 if (!matches[0])
1102 break;
1104 for (i = 0; i < group_size; ++i)
1105 if (!matches[i])
1107 vect_free_oprnd_info (oprnds_info);
1108 return NULL;
1111 auto_vec<slp_tree, 4> children;
1112 auto_vec<slp_tree> this_loads;
1114 stmt = stmts[0];
1116 if (tree_size)
1117 max_tree_size -= *tree_size;
1119 /* Create SLP_TREE nodes for the definition node/s. */
1120 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1122 slp_tree child;
1123 unsigned old_nloads = this_loads.length ();
1124 unsigned old_tree_size = this_tree_size;
1125 unsigned int j;
1127 if (oprnd_info->first_dt != vect_internal_def
1128 && oprnd_info->first_dt != vect_reduction_def
1129 && oprnd_info->first_dt != vect_induction_def)
1130 continue;
1132 if (++this_tree_size > max_tree_size)
1134 FOR_EACH_VEC_ELT (children, j, child)
1135 vect_free_slp_tree (child);
1136 vect_free_oprnd_info (oprnds_info);
1137 return NULL;
1140 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1141 group_size, &this_max_nunits,
1142 &this_loads, matches, npermutes,
1143 &this_tree_size,
1144 max_tree_size)) != NULL)
1146 /* If we have all children of child built up from scalars then just
1147 throw that away and build it up this node from scalars. */
1148 if (!SLP_TREE_CHILDREN (child).is_empty ()
1149 /* ??? Rejecting patterns this way doesn't work. We'd have to
1150 do extra work to cancel the pattern so the uses see the
1151 scalar version. */
1152 && !is_pattern_stmt_p
1153 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1155 slp_tree grandchild;
1157 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1158 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1159 break;
1160 if (!grandchild)
1162 /* Roll back. */
1163 this_loads.truncate (old_nloads);
1164 this_tree_size = old_tree_size;
1165 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1166 vect_free_slp_tree (grandchild);
1167 SLP_TREE_CHILDREN (child).truncate (0);
1169 dump_printf_loc (MSG_NOTE, vect_location,
1170 "Building parent vector operands from "
1171 "scalars instead\n");
1172 oprnd_info->def_stmts = vNULL;
1173 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1174 children.safe_push (child);
1175 continue;
1179 oprnd_info->def_stmts = vNULL;
1180 children.safe_push (child);
1181 continue;
1184 /* If the SLP build failed fatally and we analyze a basic-block
1185 simply treat nodes we fail to build as externally defined
1186 (and thus build vectors from the scalar defs).
1187 The cost model will reject outright expensive cases.
1188 ??? This doesn't treat cases where permutation ultimatively
1189 fails (or we don't try permutation below). Ideally we'd
1190 even compute a permutation that will end up with the maximum
1191 SLP tree size... */
1192 if (is_a <bb_vec_info> (vinfo)
1193 && !matches[0]
1194 /* ??? Rejecting patterns this way doesn't work. We'd have to
1195 do extra work to cancel the pattern so the uses see the
1196 scalar version. */
1197 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1199 dump_printf_loc (MSG_NOTE, vect_location,
1200 "Building vector operands from scalars\n");
1201 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1202 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1203 children.safe_push (child);
1204 oprnd_info->def_stmts = vNULL;
1205 continue;
1208 /* If the SLP build for operand zero failed and operand zero
1209 and one can be commutated try that for the scalar stmts
1210 that failed the match. */
1211 if (i == 0
1212 /* A first scalar stmt mismatch signals a fatal mismatch. */
1213 && matches[0]
1214 /* ??? For COND_EXPRs we can swap the comparison operands
1215 as well as the arms under some constraints. */
1216 && nops == 2
1217 && oprnds_info[1]->first_dt == vect_internal_def
1218 && is_gimple_assign (stmt)
1219 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1220 && ! two_operators
1221 /* Do so only if the number of not successful permutes was nor more
1222 than a cut-ff as re-trying the recursive match on
1223 possibly each level of the tree would expose exponential
1224 behavior. */
1225 && *npermutes < 4)
1227 /* Verify if we can safely swap or if we committed to a specific
1228 operand order already. */
1229 for (j = 0; j < group_size; ++j)
1230 if (!matches[j]
1231 && (swap[j] != 0
1232 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1234 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1237 "Build SLP failed: cannot swap operands "
1238 "of shared stmt ");
1239 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1240 stmts[j], 0);
1242 goto fail;
1245 /* Swap mismatched definition stmts. */
1246 dump_printf_loc (MSG_NOTE, vect_location,
1247 "Re-trying with swapped operands of stmts ");
1248 for (j = 0; j < group_size; ++j)
1249 if (!matches[j])
1251 std::swap (oprnds_info[0]->def_stmts[j],
1252 oprnds_info[1]->def_stmts[j]);
1253 dump_printf (MSG_NOTE, "%d ", j);
1255 dump_printf (MSG_NOTE, "\n");
1256 /* And try again with scratch 'matches' ... */
1257 bool *tem = XALLOCAVEC (bool, group_size);
1258 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1259 group_size, &this_max_nunits,
1260 &this_loads, tem, npermutes,
1261 &this_tree_size,
1262 max_tree_size)) != NULL)
1264 /* ... so if successful we can apply the operand swapping
1265 to the GIMPLE IL. This is necessary because for example
1266 vect_get_slp_defs uses operand indexes and thus expects
1267 canonical operand order. This is also necessary even
1268 if we end up building the operand from scalars as
1269 we'll continue to process swapped operand two. */
1270 for (j = 0; j < group_size; ++j)
1272 gimple *stmt = stmts[j];
1273 gimple_set_plf (stmt, GF_PLF_1, false);
1275 for (j = 0; j < group_size; ++j)
1277 gimple *stmt = stmts[j];
1278 if (!matches[j])
1280 /* Avoid swapping operands twice. */
1281 if (gimple_plf (stmt, GF_PLF_1))
1282 continue;
1283 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1284 gimple_assign_rhs2_ptr (stmt));
1285 gimple_set_plf (stmt, GF_PLF_1, true);
1288 /* Verify we swap all duplicates or none. */
1289 if (flag_checking)
1290 for (j = 0; j < group_size; ++j)
1292 gimple *stmt = stmts[j];
1293 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1296 /* If we have all children of child built up from scalars then
1297 just throw that away and build it up this node from scalars. */
1298 if (!SLP_TREE_CHILDREN (child).is_empty ()
1299 /* ??? Rejecting patterns this way doesn't work. We'd have
1300 to do extra work to cancel the pattern so the uses see the
1301 scalar version. */
1302 && !is_pattern_stmt_p
1303 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1305 unsigned int j;
1306 slp_tree grandchild;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1309 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1310 break;
1311 if (!grandchild)
1313 /* Roll back. */
1314 this_loads.truncate (old_nloads);
1315 this_tree_size = old_tree_size;
1316 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1317 vect_free_slp_tree (grandchild);
1318 SLP_TREE_CHILDREN (child).truncate (0);
1320 dump_printf_loc (MSG_NOTE, vect_location,
1321 "Building parent vector operands from "
1322 "scalars instead\n");
1323 oprnd_info->def_stmts = vNULL;
1324 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1325 children.safe_push (child);
1326 continue;
1330 oprnd_info->def_stmts = vNULL;
1331 children.safe_push (child);
1332 continue;
1335 ++*npermutes;
1338 fail:
1339 gcc_assert (child == NULL);
1340 FOR_EACH_VEC_ELT (children, j, child)
1341 vect_free_slp_tree (child);
1342 vect_free_oprnd_info (oprnds_info);
1343 return NULL;
1346 vect_free_oprnd_info (oprnds_info);
1348 if (tree_size)
1349 *tree_size += this_tree_size;
1350 *max_nunits = this_max_nunits;
1351 loads->safe_splice (this_loads);
1353 node = vect_create_new_slp_node (stmts);
1354 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1355 SLP_TREE_CHILDREN (node).splice (children);
1356 return node;
1359 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1361 static void
1362 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1364 int i;
1365 gimple *stmt;
1366 slp_tree child;
1368 dump_printf_loc (dump_kind, loc, "node%s\n",
1369 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1370 ? " (external)" : "");
1371 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1373 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1374 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1376 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1377 vect_print_slp_tree (dump_kind, loc, child);
1381 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1382 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1383 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1384 stmts in NODE are to be marked. */
1386 static void
1387 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1389 int i;
1390 gimple *stmt;
1391 slp_tree child;
1393 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1394 return;
1396 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1397 if (j < 0 || i == j)
1398 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1401 vect_mark_slp_stmts (child, mark, j);
1405 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1407 static void
1408 vect_mark_slp_stmts_relevant (slp_tree node)
1410 int i;
1411 gimple *stmt;
1412 stmt_vec_info stmt_info;
1413 slp_tree child;
1415 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1416 return;
1418 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1420 stmt_info = vinfo_for_stmt (stmt);
1421 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1422 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1423 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1426 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1427 vect_mark_slp_stmts_relevant (child);
1431 /* Rearrange the statements of NODE according to PERMUTATION. */
1433 static void
1434 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1435 vec<unsigned> permutation)
1437 gimple *stmt;
1438 vec<gimple *> tmp_stmts;
1439 unsigned int i;
1440 slp_tree child;
1442 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1443 vect_slp_rearrange_stmts (child, group_size, permutation);
1445 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1446 tmp_stmts.create (group_size);
1447 tmp_stmts.quick_grow_cleared (group_size);
1449 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1450 tmp_stmts[permutation[i]] = stmt;
1452 SLP_TREE_SCALAR_STMTS (node).release ();
1453 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1457 /* Attempt to reorder stmts in a reduction chain so that we don't
1458 require any load permutation. Return true if that was possible,
1459 otherwise return false. */
1461 static bool
1462 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1464 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1465 unsigned int i, j;
1466 unsigned int lidx;
1467 slp_tree node, load;
1469 /* Compare all the permutation sequences to the first one. We know
1470 that at least one load is permuted. */
1471 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1472 if (!node->load_permutation.exists ())
1473 return false;
1474 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1476 if (!load->load_permutation.exists ())
1477 return false;
1478 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1479 if (lidx != node->load_permutation[j])
1480 return false;
1483 /* Check that the loads in the first sequence are different and there
1484 are no gaps between them. */
1485 auto_sbitmap load_index (group_size);
1486 bitmap_clear (load_index);
1487 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1489 if (lidx >= group_size)
1490 return false;
1491 if (bitmap_bit_p (load_index, lidx))
1492 return false;
1494 bitmap_set_bit (load_index, lidx);
1496 for (i = 0; i < group_size; i++)
1497 if (!bitmap_bit_p (load_index, i))
1498 return false;
1500 /* This permutation is valid for reduction. Since the order of the
1501 statements in the nodes is not important unless they are memory
1502 accesses, we can rearrange the statements in all the nodes
1503 according to the order of the loads. */
1504 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1505 node->load_permutation);
1507 /* We are done, no actual permutations need to be generated. */
1508 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1509 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1511 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1512 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1513 /* But we have to keep those permutations that are required because
1514 of handling of gaps. */
1515 if (known_eq (unrolling_factor, 1U)
1516 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1517 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1518 SLP_TREE_LOAD_PERMUTATION (node).release ();
1519 else
1520 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1521 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1524 return true;
1527 /* Check if the required load permutations in the SLP instance
1528 SLP_INSTN are supported. */
1530 static bool
1531 vect_supported_load_permutation_p (slp_instance slp_instn)
1533 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1534 unsigned int i, j, k, next;
1535 slp_tree node;
1536 gimple *stmt, *load, *next_load;
1538 if (dump_enabled_p ())
1540 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1541 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1542 if (node->load_permutation.exists ())
1543 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1544 dump_printf (MSG_NOTE, "%d ", next);
1545 else
1546 for (k = 0; k < group_size; ++k)
1547 dump_printf (MSG_NOTE, "%d ", k);
1548 dump_printf (MSG_NOTE, "\n");
1551 /* In case of reduction every load permutation is allowed, since the order
1552 of the reduction statements is not important (as opposed to the case of
1553 grouped stores). The only condition we need to check is that all the
1554 load nodes are of the same size and have the same permutation (and then
1555 rearrange all the nodes of the SLP instance according to this
1556 permutation). */
1558 /* Check that all the load nodes are of the same size. */
1559 /* ??? Can't we assert this? */
1560 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1561 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1562 return false;
1564 node = SLP_INSTANCE_TREE (slp_instn);
1565 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1567 /* Reduction (there are no data-refs in the root).
1568 In reduction chain the order of the loads is not important. */
1569 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1570 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1571 vect_attempt_slp_rearrange_stmts (slp_instn);
1573 /* In basic block vectorization we allow any subchain of an interleaving
1574 chain.
1575 FORNOW: not supported in loop SLP because of realignment compications. */
1576 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1578 /* Check whether the loads in an instance form a subchain and thus
1579 no permutation is necessary. */
1580 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1582 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1583 continue;
1584 bool subchain_p = true;
1585 next_load = NULL;
1586 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1588 if (j != 0
1589 && (next_load != load
1590 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1592 subchain_p = false;
1593 break;
1595 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1597 if (subchain_p)
1598 SLP_TREE_LOAD_PERMUTATION (node).release ();
1599 else
1601 stmt_vec_info group_info
1602 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1603 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1604 unsigned nunits
1605 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1606 unsigned k, maxk = 0;
1607 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1608 if (k > maxk)
1609 maxk = k;
1610 /* In BB vectorization we may not actually use a loaded vector
1611 accessing elements in excess of GROUP_SIZE. */
1612 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1615 "BB vectorization with gaps at the end of "
1616 "a load is not supported\n");
1617 return false;
1620 /* Verify the permutation can be generated. */
1621 vec<tree> tem;
1622 unsigned n_perms;
1623 if (!vect_transform_slp_perm_load (node, tem, NULL,
1624 1, slp_instn, true, &n_perms))
1626 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1627 vect_location,
1628 "unsupported load permutation\n");
1629 return false;
1633 return true;
1636 /* For loop vectorization verify we can generate the permutation. Be
1637 conservative about the vectorization factor, there are permutations
1638 that will use three vector inputs only starting from a specific factor
1639 and the vectorization factor is not yet final.
1640 ??? The SLP instance unrolling factor might not be the maximum one. */
1641 unsigned n_perms;
1642 poly_uint64 test_vf
1643 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1644 LOOP_VINFO_VECT_FACTOR
1645 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1646 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1647 if (node->load_permutation.exists ()
1648 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1649 slp_instn, true, &n_perms))
1650 return false;
1652 return true;
1656 /* Find the last store in SLP INSTANCE. */
1658 gimple *
1659 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1661 gimple *last = NULL, *stmt;
1663 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1665 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1666 if (is_pattern_stmt_p (stmt_vinfo))
1667 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1668 else
1669 last = get_later_stmt (stmt, last);
1672 return last;
1675 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1677 static void
1678 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1679 stmt_vector_for_cost *prologue_cost_vec,
1680 stmt_vector_for_cost *body_cost_vec,
1681 unsigned ncopies_for_cost,
1682 scalar_stmts_set_t* visited)
1684 unsigned i, j;
1685 slp_tree child;
1686 gimple *stmt;
1687 stmt_vec_info stmt_info;
1688 tree lhs;
1690 /* If we already costed the exact same set of scalar stmts we're done.
1691 We share the generated vector stmts for those. */
1692 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1693 return;
1695 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1697 /* Recurse down the SLP tree. */
1698 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1699 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1700 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1701 body_cost_vec, ncopies_for_cost, visited);
1703 /* Look at the first scalar stmt to determine the cost. */
1704 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1705 stmt_info = vinfo_for_stmt (stmt);
1706 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1708 vect_memory_access_type memory_access_type
1709 = (STMT_VINFO_STRIDED_P (stmt_info)
1710 ? VMAT_STRIDED_SLP
1711 : VMAT_CONTIGUOUS);
1712 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1713 vect_model_store_cost (stmt_info, ncopies_for_cost,
1714 memory_access_type, vect_uninitialized_def,
1715 node, prologue_cost_vec, body_cost_vec);
1716 else
1718 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1719 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1721 /* If the load is permuted then the alignment is determined by
1722 the first group element not by the first scalar stmt DR. */
1723 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1724 stmt_info = vinfo_for_stmt (stmt);
1725 /* Record the cost for the permutation. */
1726 unsigned n_perms;
1727 vect_transform_slp_perm_load (node, vNULL, NULL,
1728 ncopies_for_cost, instance, true,
1729 &n_perms);
1730 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1731 stmt_info, 0, vect_body);
1732 unsigned nunits
1733 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1734 /* And adjust the number of loads performed. This handles
1735 redundancies as well as loads that are later dead. */
1736 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1737 bitmap_clear (perm);
1738 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1739 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1740 ncopies_for_cost = 0;
1741 bool load_seen = false;
1742 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1744 if (i % nunits == 0)
1746 if (load_seen)
1747 ncopies_for_cost++;
1748 load_seen = false;
1750 if (bitmap_bit_p (perm, i))
1751 load_seen = true;
1753 if (load_seen)
1754 ncopies_for_cost++;
1755 gcc_assert (ncopies_for_cost
1756 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1757 + nunits - 1) / nunits);
1758 poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1759 ncopies_for_cost *= estimated_poly_value (uf);
1761 /* Record the cost for the vector loads. */
1762 vect_model_load_cost (stmt_info, ncopies_for_cost,
1763 memory_access_type, node, prologue_cost_vec,
1764 body_cost_vec);
1765 return;
1768 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1770 /* ncopies_for_cost is the number of IVs we generate. */
1771 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1772 stmt_info, 0, vect_body);
1774 /* Prologue cost for the initial values and step vector. */
1775 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1776 CONSTANT_CLASS_P
1777 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1778 (stmt_info))
1779 ? vector_load : vec_construct,
1780 stmt_info, 0, vect_prologue);
1781 record_stmt_cost (prologue_cost_vec, 1,
1782 CONSTANT_CLASS_P
1783 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1784 ? vector_load : vec_construct,
1785 stmt_info, 0, vect_prologue);
1787 /* ??? No easy way to get at the actual number of vector stmts
1788 to be geneated and thus the derived IVs. */
1790 else
1792 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1793 stmt_info, 0, vect_body);
1794 if (SLP_TREE_TWO_OPERATORS (node))
1796 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1797 stmt_info, 0, vect_body);
1798 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1799 stmt_info, 0, vect_body);
1803 /* Push SLP node def-type to stmts. */
1804 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1805 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1806 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1807 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1809 /* Scan operands and account for prologue cost of constants/externals.
1810 ??? This over-estimates cost for multiple uses and should be
1811 re-engineered. */
1812 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1813 lhs = gimple_get_lhs (stmt);
1814 for (i = 0; i < gimple_num_ops (stmt); ++i)
1816 tree op = gimple_op (stmt, i);
1817 gimple *def_stmt;
1818 enum vect_def_type dt;
1819 if (!op || op == lhs)
1820 continue;
1821 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1823 /* Without looking at the actual initializer a vector of
1824 constants can be implemented as load from the constant pool.
1825 ??? We need to pass down stmt_info for a vector type
1826 even if it points to the wrong stmt. */
1827 if (dt == vect_constant_def)
1828 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1829 stmt_info, 0, vect_prologue);
1830 else if (dt == vect_external_def)
1831 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1832 stmt_info, 0, vect_prologue);
1836 /* Restore stmt def-types. */
1837 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1838 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1839 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1840 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1843 /* Compute the cost for the SLP instance INSTANCE. */
1845 static void
1846 vect_analyze_slp_cost (slp_instance instance, void *data)
1848 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1849 unsigned ncopies_for_cost;
1850 stmt_info_for_cost *si;
1851 unsigned i;
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_NOTE, vect_location,
1855 "=== vect_analyze_slp_cost ===\n");
1857 /* Calculate the number of vector stmts to create based on the unrolling
1858 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1859 GROUP_SIZE / NUNITS otherwise. */
1860 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1861 slp_tree node = SLP_INSTANCE_TREE (instance);
1862 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1863 /* Get the estimated vectorization factor, which is always one for
1864 basic-block vectorization. */
1865 unsigned int assumed_vf;
1866 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1867 assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
1868 else
1869 assumed_vf = 1;
1870 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1871 /* For reductions look at a reduction operand in case the reduction
1872 operation is widening like DOT_PROD or SAD. */
1873 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1876 switch (gimple_assign_rhs_code (stmt))
1878 case DOT_PROD_EXPR:
1879 case SAD_EXPR:
1880 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1881 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1882 break;
1883 default:;
1886 ncopies_for_cost = least_common_multiple (nunits,
1887 group_size * assumed_vf) / nunits;
1889 prologue_cost_vec.create (10);
1890 body_cost_vec.create (10);
1891 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1892 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1893 &prologue_cost_vec, &body_cost_vec,
1894 ncopies_for_cost, visited);
1895 delete visited;
1897 /* Record the prologue costs, which were delayed until we were
1898 sure that SLP was successful. */
1899 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1901 struct _stmt_vec_info *stmt_info
1902 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1903 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1904 si->misalign, vect_prologue);
1907 /* Record the instance's instructions in the target cost model. */
1908 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1910 struct _stmt_vec_info *stmt_info
1911 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1912 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1913 si->misalign, vect_body);
1916 prologue_cost_vec.release ();
1917 body_cost_vec.release ();
1920 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1921 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1922 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1923 containing the remainder.
1924 Return the first stmt in the second group. */
1926 static gimple *
1927 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1929 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1930 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1931 gcc_assert (group1_size > 0);
1932 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1933 gcc_assert (group2_size > 0);
1934 GROUP_SIZE (first_vinfo) = group1_size;
1936 gimple *stmt = first_stmt;
1937 for (unsigned i = group1_size; i > 1; i--)
1939 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1940 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1942 /* STMT is now the last element of the first group. */
1943 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1944 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1946 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1947 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1949 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1950 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1953 /* For the second group, the GROUP_GAP is that before the original group,
1954 plus skipping over the first vector. */
1955 GROUP_GAP (vinfo_for_stmt (group2)) =
1956 GROUP_GAP (first_vinfo) + group1_size;
1958 /* GROUP_GAP of the first group now has to skip over the second group too. */
1959 GROUP_GAP (first_vinfo) += group2_size;
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1963 group1_size, group2_size);
1965 return group2;
1968 /* Analyze an SLP instance starting from a group of grouped stores. Call
1969 vect_build_slp_tree to build a tree of packed stmts if possible.
1970 Return FALSE if it's impossible to SLP any stmt in the loop. */
1972 static bool
1973 vect_analyze_slp_instance (vec_info *vinfo,
1974 gimple *stmt, unsigned max_tree_size)
1976 slp_instance new_instance;
1977 slp_tree node;
1978 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1979 unsigned int nunits;
1980 tree vectype, scalar_type = NULL_TREE;
1981 gimple *next;
1982 unsigned int i;
1983 unsigned int max_nunits = 0;
1984 vec<slp_tree> loads;
1985 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1986 vec<gimple *> scalar_stmts;
1988 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1990 if (dr)
1992 scalar_type = TREE_TYPE (DR_REF (dr));
1993 vectype = get_vectype_for_scalar_type (scalar_type);
1995 else
1997 gcc_assert (is_a <loop_vec_info> (vinfo));
1998 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2001 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2003 else
2005 gcc_assert (is_a <loop_vec_info> (vinfo));
2006 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2007 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2010 if (!vectype)
2012 if (dump_enabled_p ())
2014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2015 "Build SLP failed: unsupported data-type ");
2016 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2017 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2020 return false;
2022 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2024 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2025 scalar_stmts.create (group_size);
2026 next = stmt;
2027 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2029 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2030 while (next)
2032 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2033 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2034 scalar_stmts.safe_push (
2035 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2036 else
2037 scalar_stmts.safe_push (next);
2038 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2040 /* Mark the first element of the reduction chain as reduction to properly
2041 transform the node. In the reduction analysis phase only the last
2042 element of the chain is marked as reduction. */
2043 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2044 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2046 else
2048 /* Collect reduction statements. */
2049 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2050 for (i = 0; reductions.iterate (i, &next); i++)
2051 scalar_stmts.safe_push (next);
2054 loads.create (group_size);
2056 /* Build the tree for the SLP instance. */
2057 bool *matches = XALLOCAVEC (bool, group_size);
2058 unsigned npermutes = 0;
2059 bst_fail = new scalar_stmts_set_t ();
2060 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2061 &max_nunits, &loads, matches, &npermutes,
2062 NULL, max_tree_size);
2063 delete bst_fail;
2064 if (node != NULL)
2066 /* Calculate the unrolling factor based on the smallest type. */
2067 poly_uint64 unrolling_factor
2068 = least_common_multiple (max_nunits, group_size) / group_size;
2070 if (maybe_ne (unrolling_factor, 1U)
2071 && is_a <bb_vec_info> (vinfo))
2074 if (max_nunits > group_size)
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "Build SLP failed: store group "
2078 "size not a multiple of the vector size "
2079 "in basic block SLP\n");
2080 vect_free_slp_tree (node);
2081 loads.release ();
2082 return false;
2084 /* Fatal mismatch. */
2085 matches[group_size/max_nunits * max_nunits] = false;
2086 vect_free_slp_tree (node);
2087 loads.release ();
2089 else
2091 /* Create a new SLP instance. */
2092 new_instance = XNEW (struct _slp_instance);
2093 SLP_INSTANCE_TREE (new_instance) = node;
2094 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2095 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2096 SLP_INSTANCE_LOADS (new_instance) = loads;
2098 /* Compute the load permutation. */
2099 slp_tree load_node;
2100 bool loads_permuted = false;
2101 FOR_EACH_VEC_ELT (loads, i, load_node)
2103 vec<unsigned> load_permutation;
2104 int j;
2105 gimple *load, *first_stmt;
2106 bool this_load_permuted = false;
2107 load_permutation.create (group_size);
2108 first_stmt = GROUP_FIRST_ELEMENT
2109 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2110 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2112 int load_place = vect_get_place_in_interleaving_chain
2113 (load, first_stmt);
2114 gcc_assert (load_place != -1);
2115 if (load_place != j)
2116 this_load_permuted = true;
2117 load_permutation.safe_push (load_place);
2119 if (!this_load_permuted
2120 /* The load requires permutation when unrolling exposes
2121 a gap either because the group is larger than the SLP
2122 group-size or because there is a gap between the groups. */
2123 && (known_eq (unrolling_factor, 1U)
2124 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2125 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2127 load_permutation.release ();
2128 continue;
2130 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2131 loads_permuted = true;
2134 if (loads_permuted)
2136 if (!vect_supported_load_permutation_p (new_instance))
2138 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141 "Build SLP failed: unsupported load "
2142 "permutation ");
2143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2144 TDF_SLIM, stmt, 0);
2146 vect_free_slp_instance (new_instance);
2147 return false;
2151 /* If the loads and stores can be handled with load/store-lan
2152 instructions do not generate this SLP instance. */
2153 if (is_a <loop_vec_info> (vinfo)
2154 && loads_permuted
2155 && dr && vect_store_lanes_supported (vectype, group_size))
2157 slp_tree load_node;
2158 FOR_EACH_VEC_ELT (loads, i, load_node)
2160 gimple *first_stmt = GROUP_FIRST_ELEMENT
2161 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2162 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2163 /* Use SLP for strided accesses (or if we
2164 can't load-lanes). */
2165 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2166 || ! vect_load_lanes_supported
2167 (STMT_VINFO_VECTYPE (stmt_vinfo),
2168 GROUP_SIZE (stmt_vinfo)))
2169 break;
2171 if (i == loads.length ())
2173 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2175 "Built SLP cancelled: can use "
2176 "load/store-lanes\n");
2177 vect_free_slp_instance (new_instance);
2178 return false;
2182 vinfo->slp_instances.safe_push (new_instance);
2184 if (dump_enabled_p ())
2186 dump_printf_loc (MSG_NOTE, vect_location,
2187 "Final SLP tree for instance:\n");
2188 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2191 return true;
2194 else
2196 /* Failed to SLP. */
2197 /* Free the allocated memory. */
2198 scalar_stmts.release ();
2199 loads.release ();
2202 /* For basic block SLP, try to break the group up into multiples of the
2203 vector size. */
2204 if (is_a <bb_vec_info> (vinfo)
2205 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2206 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2208 /* We consider breaking the group only on VF boundaries from the existing
2209 start. */
2210 for (i = 0; i < group_size; i++)
2211 if (!matches[i]) break;
2213 if (i >= nunits && i < group_size)
2215 /* Split into two groups at the first vector boundary before i. */
2216 gcc_assert ((nunits & (nunits - 1)) == 0);
2217 unsigned group1_size = i & ~(nunits - 1);
2219 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2220 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2221 /* If the first non-match was in the middle of a vector,
2222 skip the rest of that vector. */
2223 if (group1_size < i)
2225 i = group1_size + nunits;
2226 if (i < group_size)
2227 rest = vect_split_slp_store_group (rest, nunits);
2229 if (i < group_size)
2230 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2231 return res;
2233 /* Even though the first vector did not all match, we might be able to SLP
2234 (some) of the remainder. FORNOW ignore this possibility. */
2237 return false;
2241 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2242 trees of packed scalar stmts if SLP is possible. */
2244 bool
2245 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2247 unsigned int i;
2248 gimple *first_element;
2250 if (dump_enabled_p ())
2251 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2253 /* Find SLP sequences starting from groups of grouped stores. */
2254 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2255 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2257 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2259 if (loop_vinfo->reduction_chains.length () > 0)
2261 /* Find SLP sequences starting from reduction chains. */
2262 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2263 if (! vect_analyze_slp_instance (vinfo, first_element,
2264 max_tree_size))
2266 /* Dissolve reduction chain group. */
2267 gimple *next, *stmt = first_element;
2268 while (stmt)
2270 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2271 next = GROUP_NEXT_ELEMENT (vinfo);
2272 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2273 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2274 stmt = next;
2276 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2277 = vect_internal_def;
2281 /* Find SLP sequences starting from groups of reductions. */
2282 if (loop_vinfo->reductions.length () > 1)
2283 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2284 max_tree_size);
2287 return true;
2291 /* For each possible SLP instance decide whether to SLP it and calculate overall
2292 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2293 least one instance. */
2295 bool
2296 vect_make_slp_decision (loop_vec_info loop_vinfo)
2298 unsigned int i;
2299 poly_uint64 unrolling_factor = 1;
2300 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2301 slp_instance instance;
2302 int decided_to_slp = 0;
2304 if (dump_enabled_p ())
2305 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2306 "\n");
2308 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2310 /* FORNOW: SLP if you can. */
2311 /* All unroll factors have the form current_vector_size * X for some
2312 rational X, so they must have a common multiple. */
2313 unrolling_factor
2314 = force_common_multiple (unrolling_factor,
2315 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2317 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2318 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2319 loop-based vectorization. Such stmts will be marked as HYBRID. */
2320 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2321 decided_to_slp++;
2324 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2326 if (decided_to_slp && dump_enabled_p ())
2328 dump_printf_loc (MSG_NOTE, vect_location,
2329 "Decided to SLP %d instances. Unrolling factor ",
2330 decided_to_slp);
2331 dump_dec (MSG_NOTE, unrolling_factor);
2332 dump_printf (MSG_NOTE, "\n");
2335 return (decided_to_slp > 0);
2339 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2340 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2342 static void
2343 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2345 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2346 imm_use_iterator imm_iter;
2347 gimple *use_stmt;
2348 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2349 slp_tree child;
2350 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2351 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2352 int j;
2354 /* Propagate hybrid down the SLP tree. */
2355 if (stype == hybrid)
2357 else if (HYBRID_SLP_STMT (stmt_vinfo))
2358 stype = hybrid;
2359 else
2361 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2362 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2363 /* If we get a pattern stmt here we have to use the LHS of the
2364 original stmt for immediate uses. */
2365 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2366 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2367 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2368 tree def;
2369 if (gimple_code (stmt) == GIMPLE_PHI)
2370 def = gimple_phi_result (stmt);
2371 else
2372 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2373 if (def)
2374 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2376 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2377 continue;
2378 use_vinfo = vinfo_for_stmt (use_stmt);
2379 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2380 && STMT_VINFO_RELATED_STMT (use_vinfo))
2381 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2382 if (!STMT_SLP_TYPE (use_vinfo)
2383 && (STMT_VINFO_RELEVANT (use_vinfo)
2384 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2385 && !(gimple_code (use_stmt) == GIMPLE_PHI
2386 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2388 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2391 "def in non-SLP stmt: ");
2392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2394 stype = hybrid;
2399 if (stype == hybrid
2400 && !HYBRID_SLP_STMT (stmt_vinfo))
2402 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2405 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2407 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2410 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2411 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2412 vect_detect_hybrid_slp_stmts (child, i, stype);
2415 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2417 static tree
2418 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2420 walk_stmt_info *wi = (walk_stmt_info *)data;
2421 struct loop *loopp = (struct loop *)wi->info;
2423 if (wi->is_lhs)
2424 return NULL_TREE;
2426 if (TREE_CODE (*tp) == SSA_NAME
2427 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2429 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2430 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2431 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2433 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2438 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2442 return NULL_TREE;
2445 static tree
2446 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2447 walk_stmt_info *)
2449 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2450 /* If the stmt is in a SLP instance then this isn't a reason
2451 to mark use definitions in other SLP instances as hybrid. */
2452 if (! STMT_SLP_TYPE (use_vinfo)
2453 && (STMT_VINFO_RELEVANT (use_vinfo)
2454 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2455 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2456 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2458 else
2459 *handled = true;
2460 return NULL_TREE;
2463 /* Find stmts that must be both vectorized and SLPed. */
2465 void
2466 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2468 unsigned int i;
2469 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2470 slp_instance instance;
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2474 "\n");
2476 /* First walk all pattern stmt in the loop and mark defs of uses as
2477 hybrid because immediate uses in them are not recorded. */
2478 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2480 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2481 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2482 gsi_next (&gsi))
2484 gimple *stmt = gsi_stmt (gsi);
2485 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2486 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2488 walk_stmt_info wi;
2489 memset (&wi, 0, sizeof (wi));
2490 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2491 gimple_stmt_iterator gsi2
2492 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2493 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2494 vect_detect_hybrid_slp_1, &wi);
2495 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2496 vect_detect_hybrid_slp_2,
2497 vect_detect_hybrid_slp_1, &wi);
2502 /* Then walk the SLP instance trees marking stmts with uses in
2503 non-SLP stmts as hybrid, also propagating hybrid down the
2504 SLP tree, collecting the above info on-the-fly. */
2505 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2507 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2508 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2509 i, pure_slp);
2514 /* Initialize a bb_vec_info struct for the statements between
2515 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2517 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2518 gimple_stmt_iterator region_end_in)
2519 : vec_info (vec_info::bb, init_cost (NULL)),
2520 bb (gsi_bb (region_begin_in)),
2521 region_begin (region_begin_in),
2522 region_end (region_end_in)
2524 gimple_stmt_iterator gsi;
2526 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2527 gsi_next (&gsi))
2529 gimple *stmt = gsi_stmt (gsi);
2530 gimple_set_uid (stmt, 0);
2531 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2534 bb->aux = this;
2538 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2539 stmts in the basic block. */
2541 _bb_vec_info::~_bb_vec_info ()
2543 for (gimple_stmt_iterator si = region_begin;
2544 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2546 gimple *stmt = gsi_stmt (si);
2547 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2549 if (stmt_info)
2550 /* Free stmt_vec_info. */
2551 free_stmt_vec_info (stmt);
2553 /* Reset region marker. */
2554 gimple_set_uid (stmt, -1);
2557 bb->aux = NULL;
2561 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2562 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2564 Return true if the operations are supported. */
2566 static bool
2567 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2568 slp_instance node_instance)
2570 bool dummy;
2571 int i, j;
2572 gimple *stmt;
2573 slp_tree child;
2575 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2576 return true;
2578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2579 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2580 return false;
2582 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2584 gcc_assert (stmt_info);
2585 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2587 /* For BB vectorization vector types are assigned here.
2588 Memory accesses already got their vector type assigned
2589 in vect_analyze_data_refs. */
2590 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2591 if (bb_vinfo
2592 && ! STMT_VINFO_DATA_REF (stmt_info))
2594 gcc_assert (PURE_SLP_STMT (stmt_info));
2596 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2597 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE, vect_location,
2600 "get vectype for scalar type: ");
2601 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2602 dump_printf (MSG_NOTE, "\n");
2605 tree vectype = get_vectype_for_scalar_type (scalar_type);
2606 if (!vectype)
2608 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "not SLPed: unsupported data-type ");
2612 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2613 scalar_type);
2614 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2616 return false;
2619 if (dump_enabled_p ())
2621 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2622 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2623 dump_printf (MSG_NOTE, "\n");
2626 gimple *sstmt;
2627 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2628 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2631 /* Calculate the number of vector statements to be created for the
2632 scalar stmts in this node. For SLP reductions it is equal to the
2633 number of vector statements in the children (which has already been
2634 calculated by the recursive call). Otherwise it is the number of
2635 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2636 VF divided by the number of elements in a vector. */
2637 if (GROUP_FIRST_ELEMENT (stmt_info)
2638 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2639 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2640 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2641 else
2643 poly_uint64 vf;
2644 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2645 vf = loop_vinfo->vectorization_factor;
2646 else
2647 vf = 1;
2648 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2649 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2650 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2651 = vect_get_num_vectors (vf * group_size, vectype);
2654 /* Push SLP node def-type to stmt operands. */
2655 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2656 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2657 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2658 = SLP_TREE_DEF_TYPE (child);
2659 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2660 /* Restore def-types. */
2661 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2662 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2663 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2664 = vect_internal_def;
2665 if (! res)
2666 return false;
2668 return true;
2672 /* Analyze statements in SLP instances of VINFO. Return true if the
2673 operations are supported. */
2675 bool
2676 vect_slp_analyze_operations (vec_info *vinfo)
2678 slp_instance instance;
2679 int i;
2681 if (dump_enabled_p ())
2682 dump_printf_loc (MSG_NOTE, vect_location,
2683 "=== vect_slp_analyze_operations ===\n");
2685 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2687 if (!vect_slp_analyze_node_operations (vinfo,
2688 SLP_INSTANCE_TREE (instance),
2689 instance))
2691 dump_printf_loc (MSG_NOTE, vect_location,
2692 "removing SLP instance operations starting from: ");
2693 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2694 SLP_TREE_SCALAR_STMTS
2695 (SLP_INSTANCE_TREE (instance))[0], 0);
2696 vect_free_slp_instance (instance);
2697 vinfo->slp_instances.ordered_remove (i);
2699 else
2701 /* Compute the costs of the SLP instance. */
2702 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2703 i++;
2707 return !vinfo->slp_instances.is_empty ();
2711 /* Compute the scalar cost of the SLP node NODE and its children
2712 and return it. Do not account defs that are marked in LIFE and
2713 update LIFE according to uses of NODE. */
2715 static unsigned
2716 vect_bb_slp_scalar_cost (basic_block bb,
2717 slp_tree node, vec<bool, va_heap> *life)
2719 unsigned scalar_cost = 0;
2720 unsigned i;
2721 gimple *stmt;
2722 slp_tree child;
2724 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2726 unsigned stmt_cost;
2727 ssa_op_iter op_iter;
2728 def_operand_p def_p;
2729 stmt_vec_info stmt_info;
2731 if ((*life)[i])
2732 continue;
2734 /* If there is a non-vectorized use of the defs then the scalar
2735 stmt is kept live in which case we do not account it or any
2736 required defs in the SLP children in the scalar cost. This
2737 way we make the vectorization more costly when compared to
2738 the scalar cost. */
2739 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2741 imm_use_iterator use_iter;
2742 gimple *use_stmt;
2743 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2744 if (!is_gimple_debug (use_stmt)
2745 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2746 use_stmt)
2747 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2749 (*life)[i] = true;
2750 BREAK_FROM_IMM_USE_STMT (use_iter);
2753 if ((*life)[i])
2754 continue;
2756 /* Count scalar stmts only once. */
2757 if (gimple_visited_p (stmt))
2758 continue;
2759 gimple_set_visited (stmt, true);
2761 stmt_info = vinfo_for_stmt (stmt);
2762 if (STMT_VINFO_DATA_REF (stmt_info))
2764 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2765 stmt_cost = vect_get_stmt_cost (scalar_load);
2766 else
2767 stmt_cost = vect_get_stmt_cost (scalar_store);
2769 else
2770 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2772 scalar_cost += stmt_cost;
2775 auto_vec<bool, 20> subtree_life;
2776 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2778 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2780 /* Do not directly pass LIFE to the recursive call, copy it to
2781 confine changes in the callee to the current child/subtree. */
2782 subtree_life.safe_splice (*life);
2783 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2784 subtree_life.truncate (0);
2788 return scalar_cost;
2791 /* Check if vectorization of the basic block is profitable. */
2793 static bool
2794 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2796 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2797 slp_instance instance;
2798 int i;
2799 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2800 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2802 /* Calculate scalar cost. */
2803 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2805 auto_vec<bool, 20> life;
2806 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2807 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2808 SLP_INSTANCE_TREE (instance),
2809 &life);
2812 /* Unset visited flag. */
2813 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2814 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2815 gimple_set_visited (gsi_stmt (gsi), false);
2817 /* Complete the target-specific cost calculation. */
2818 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2819 &vec_inside_cost, &vec_epilogue_cost);
2821 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2823 if (dump_enabled_p ())
2825 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2826 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2827 vec_inside_cost);
2828 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2829 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2830 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2833 /* Vectorization is profitable if its cost is more than the cost of scalar
2834 version. Note that we err on the vector side for equal cost because
2835 the cost estimate is otherwise quite pessimistic (constant uses are
2836 free on the scalar side but cost a load on the vector side for
2837 example). */
2838 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2839 return false;
2841 return true;
2844 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2845 if so and sets fatal to true if failure is independent of
2846 current_vector_size. */
2848 static bb_vec_info
2849 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2850 gimple_stmt_iterator region_end,
2851 vec<data_reference_p> datarefs, int n_stmts,
2852 bool &fatal)
2854 bb_vec_info bb_vinfo;
2855 slp_instance instance;
2856 int i;
2857 poly_uint64 min_vf = 2;
2859 /* The first group of checks is independent of the vector size. */
2860 fatal = true;
2862 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2864 if (dump_enabled_p ())
2865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2866 "not vectorized: too many instructions in "
2867 "basic block.\n");
2868 free_data_refs (datarefs);
2869 return NULL;
2872 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2873 if (!bb_vinfo)
2874 return NULL;
2876 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2878 /* Analyze the data references. */
2880 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2882 if (dump_enabled_p ())
2883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2884 "not vectorized: unhandled data-ref in basic "
2885 "block.\n");
2887 delete bb_vinfo;
2888 return NULL;
2891 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2893 if (dump_enabled_p ())
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2895 "not vectorized: not enough data-refs in "
2896 "basic block.\n");
2898 delete bb_vinfo;
2899 return NULL;
2902 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2906 "not vectorized: unhandled data access in "
2907 "basic block.\n");
2909 delete bb_vinfo;
2910 return NULL;
2913 /* If there are no grouped stores in the region there is no need
2914 to continue with pattern recog as vect_analyze_slp will fail
2915 anyway. */
2916 if (bb_vinfo->grouped_stores.is_empty ())
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2920 "not vectorized: no grouped stores in "
2921 "basic block.\n");
2923 delete bb_vinfo;
2924 return NULL;
2927 /* While the rest of the analysis below depends on it in some way. */
2928 fatal = false;
2930 vect_pattern_recog (bb_vinfo);
2932 /* Check the SLP opportunities in the basic block, analyze and build SLP
2933 trees. */
2934 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2936 if (dump_enabled_p ())
2938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2939 "Failed to SLP the basic block.\n");
2940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2941 "not vectorized: failed to find SLP opportunities "
2942 "in basic block.\n");
2945 delete bb_vinfo;
2946 return NULL;
2949 vect_record_base_alignments (bb_vinfo);
2951 /* Analyze and verify the alignment of data references and the
2952 dependence in the SLP instances. */
2953 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2955 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2956 || ! vect_slp_analyze_instance_dependence (instance))
2958 dump_printf_loc (MSG_NOTE, vect_location,
2959 "removing SLP instance operations starting from: ");
2960 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2961 SLP_TREE_SCALAR_STMTS
2962 (SLP_INSTANCE_TREE (instance))[0], 0);
2963 vect_free_slp_instance (instance);
2964 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2965 continue;
2968 /* Mark all the statements that we want to vectorize as pure SLP and
2969 relevant. */
2970 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2971 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2973 i++;
2975 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2977 delete bb_vinfo;
2978 return NULL;
2981 if (!vect_slp_analyze_operations (bb_vinfo))
2983 if (dump_enabled_p ())
2984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2985 "not vectorized: bad operation in basic block.\n");
2987 delete bb_vinfo;
2988 return NULL;
2991 /* Cost model: check if the vectorization is worthwhile. */
2992 if (!unlimited_cost_model (NULL)
2993 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2995 if (dump_enabled_p ())
2996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2997 "not vectorized: vectorization is not "
2998 "profitable.\n");
3000 delete bb_vinfo;
3001 return NULL;
3004 if (dump_enabled_p ())
3005 dump_printf_loc (MSG_NOTE, vect_location,
3006 "Basic block will be vectorized using SLP\n");
3008 return bb_vinfo;
3012 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3013 true if anything in the basic-block was vectorized. */
3015 bool
3016 vect_slp_bb (basic_block bb)
3018 bb_vec_info bb_vinfo;
3019 gimple_stmt_iterator gsi;
3020 unsigned int vector_sizes;
3021 bool any_vectorized = false;
3023 if (dump_enabled_p ())
3024 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3026 /* Autodetect first vector size we try. */
3027 current_vector_size = 0;
3028 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3030 gsi = gsi_start_bb (bb);
3032 while (1)
3034 if (gsi_end_p (gsi))
3035 break;
3037 gimple_stmt_iterator region_begin = gsi;
3038 vec<data_reference_p> datarefs = vNULL;
3039 int insns = 0;
3041 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3043 gimple *stmt = gsi_stmt (gsi);
3044 if (is_gimple_debug (stmt))
3045 continue;
3046 insns++;
3048 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3049 vect_location = gimple_location (stmt);
3051 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3052 break;
3055 /* Skip leading unhandled stmts. */
3056 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3058 gsi_next (&gsi);
3059 continue;
3062 gimple_stmt_iterator region_end = gsi;
3064 bool vectorized = false;
3065 bool fatal = false;
3066 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3067 datarefs, insns, fatal);
3068 if (bb_vinfo
3069 && dbg_cnt (vect_slp))
3071 if (dump_enabled_p ())
3072 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3074 vect_schedule_slp (bb_vinfo);
3076 if (dump_enabled_p ())
3077 dump_printf_loc (MSG_NOTE, vect_location,
3078 "basic block part vectorized\n");
3080 vectorized = true;
3082 delete bb_vinfo;
3084 any_vectorized |= vectorized;
3086 vector_sizes &= ~current_vector_size;
3087 if (vectorized
3088 || vector_sizes == 0
3089 || current_vector_size == 0
3090 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3091 vector sizes will fail do not bother iterating. */
3092 || fatal)
3094 if (gsi_end_p (region_end))
3095 break;
3097 /* Skip the unhandled stmt. */
3098 gsi_next (&gsi);
3100 /* And reset vector sizes. */
3101 current_vector_size = 0;
3102 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3104 else
3106 /* Try the next biggest vector size. */
3107 current_vector_size = 1 << floor_log2 (vector_sizes);
3108 if (dump_enabled_p ())
3109 dump_printf_loc (MSG_NOTE, vect_location,
3110 "***** Re-trying analysis with "
3111 "vector size %d\n", current_vector_size);
3113 /* Start over. */
3114 gsi = region_begin;
3118 return any_vectorized;
3122 /* Return 1 if vector type of boolean constant which is OPNUM
3123 operand in statement STMT is a boolean vector. */
3125 static bool
3126 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3128 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3129 enum tree_code code = gimple_expr_code (stmt);
3130 tree op, vectype;
3131 gimple *def_stmt;
3132 enum vect_def_type dt;
3134 /* For comparison and COND_EXPR type is chosen depending
3135 on the other comparison operand. */
3136 if (TREE_CODE_CLASS (code) == tcc_comparison)
3138 if (opnum)
3139 op = gimple_assign_rhs1 (stmt);
3140 else
3141 op = gimple_assign_rhs2 (stmt);
3143 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3144 &dt, &vectype))
3145 gcc_unreachable ();
3147 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3150 if (code == COND_EXPR)
3152 tree cond = gimple_assign_rhs1 (stmt);
3154 if (TREE_CODE (cond) == SSA_NAME)
3155 op = cond;
3156 else if (opnum)
3157 op = TREE_OPERAND (cond, 1);
3158 else
3159 op = TREE_OPERAND (cond, 0);
3161 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3162 &dt, &vectype))
3163 gcc_unreachable ();
3165 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3168 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3172 /* For constant and loop invariant defs of SLP_NODE this function returns
3173 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3174 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3175 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3176 REDUC_INDEX is the index of the reduction operand in the statements, unless
3177 it is -1. */
3179 static void
3180 vect_get_constant_vectors (tree op, slp_tree slp_node,
3181 vec<tree> *vec_oprnds,
3182 unsigned int op_num, unsigned int number_of_vectors)
3184 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3185 gimple *stmt = stmts[0];
3186 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3187 unsigned nunits;
3188 tree vec_cst;
3189 unsigned j, number_of_places_left_in_vector;
3190 tree vector_type;
3191 tree vop;
3192 int group_size = stmts.length ();
3193 unsigned int vec_num, i;
3194 unsigned number_of_copies = 1;
3195 vec<tree> voprnds;
3196 voprnds.create (number_of_vectors);
3197 bool constant_p, is_store;
3198 tree neutral_op = NULL;
3199 enum tree_code code = gimple_expr_code (stmt);
3200 gimple_seq ctor_seq = NULL;
3202 /* Check if vector type is a boolean vector. */
3203 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3204 && vect_mask_constant_operand_p (stmt, op_num))
3205 vector_type
3206 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3207 else
3208 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3209 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3211 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3213 is_store = true;
3214 op = gimple_assign_rhs1 (stmt);
3216 else
3217 is_store = false;
3219 gcc_assert (op);
3221 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3222 created vectors. It is greater than 1 if unrolling is performed.
3224 For example, we have two scalar operands, s1 and s2 (e.g., group of
3225 strided accesses of size two), while NUNITS is four (i.e., four scalars
3226 of this type can be packed in a vector). The output vector will contain
3227 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3228 will be 2).
3230 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3231 containing the operands.
3233 For example, NUNITS is four as before, and the group size is 8
3234 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3235 {s5, s6, s7, s8}. */
3237 number_of_copies = nunits * number_of_vectors / group_size;
3239 number_of_places_left_in_vector = nunits;
3240 constant_p = true;
3241 tree_vector_builder elts (vector_type, nunits, 1);
3242 elts.quick_grow (nunits);
3243 bool place_after_defs = false;
3244 for (j = 0; j < number_of_copies; j++)
3246 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3248 if (is_store)
3249 op = gimple_assign_rhs1 (stmt);
3250 else
3252 switch (code)
3254 case COND_EXPR:
3256 tree cond = gimple_assign_rhs1 (stmt);
3257 if (TREE_CODE (cond) == SSA_NAME)
3258 op = gimple_op (stmt, op_num + 1);
3259 else if (op_num == 0 || op_num == 1)
3260 op = TREE_OPERAND (cond, op_num);
3261 else
3263 if (op_num == 2)
3264 op = gimple_assign_rhs2 (stmt);
3265 else
3266 op = gimple_assign_rhs3 (stmt);
3269 break;
3271 case CALL_EXPR:
3272 op = gimple_call_arg (stmt, op_num);
3273 break;
3275 case LSHIFT_EXPR:
3276 case RSHIFT_EXPR:
3277 case LROTATE_EXPR:
3278 case RROTATE_EXPR:
3279 op = gimple_op (stmt, op_num + 1);
3280 /* Unlike the other binary operators, shifts/rotates have
3281 the shift count being int, instead of the same type as
3282 the lhs, so make sure the scalar is the right type if
3283 we are dealing with vectors of
3284 long long/long/short/char. */
3285 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3286 op = fold_convert (TREE_TYPE (vector_type), op);
3287 break;
3289 default:
3290 op = gimple_op (stmt, op_num + 1);
3291 break;
3295 /* Create 'vect_ = {op0,op1,...,opn}'. */
3296 number_of_places_left_in_vector--;
3297 tree orig_op = op;
3298 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3300 if (CONSTANT_CLASS_P (op))
3302 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3304 /* Can't use VIEW_CONVERT_EXPR for booleans because
3305 of possibly different sizes of scalar value and
3306 vector element. */
3307 if (integer_zerop (op))
3308 op = build_int_cst (TREE_TYPE (vector_type), 0);
3309 else if (integer_onep (op))
3310 op = build_all_ones_cst (TREE_TYPE (vector_type));
3311 else
3312 gcc_unreachable ();
3314 else
3315 op = fold_unary (VIEW_CONVERT_EXPR,
3316 TREE_TYPE (vector_type), op);
3317 gcc_assert (op && CONSTANT_CLASS_P (op));
3319 else
3321 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3322 gimple *init_stmt;
3323 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3325 tree true_val
3326 = build_all_ones_cst (TREE_TYPE (vector_type));
3327 tree false_val
3328 = build_zero_cst (TREE_TYPE (vector_type));
3329 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3330 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3331 op, true_val,
3332 false_val);
3334 else
3336 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3337 op);
3338 init_stmt
3339 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3340 op);
3342 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3343 op = new_temp;
3346 elts[number_of_places_left_in_vector] = op;
3347 if (!CONSTANT_CLASS_P (op))
3348 constant_p = false;
3349 if (TREE_CODE (orig_op) == SSA_NAME
3350 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3351 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3352 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3353 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3354 place_after_defs = true;
3356 if (number_of_places_left_in_vector == 0)
3358 if (constant_p)
3359 vec_cst = elts.build ();
3360 else
3362 vec<constructor_elt, va_gc> *v;
3363 unsigned k;
3364 vec_alloc (v, nunits);
3365 for (k = 0; k < nunits; ++k)
3366 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3367 vec_cst = build_constructor (vector_type, v);
3369 tree init;
3370 gimple_stmt_iterator gsi;
3371 if (place_after_defs)
3373 gsi = gsi_for_stmt
3374 (vect_find_last_scalar_stmt_in_slp (slp_node));
3375 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3377 else
3378 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3379 if (ctor_seq != NULL)
3381 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3382 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3383 GSI_SAME_STMT);
3384 ctor_seq = NULL;
3386 voprnds.quick_push (init);
3387 place_after_defs = false;
3388 number_of_places_left_in_vector = nunits;
3389 constant_p = true;
3390 elts.new_vector (vector_type, nunits, 1);
3391 elts.quick_grow (nunits);
3396 /* Since the vectors are created in the reverse order, we should invert
3397 them. */
3398 vec_num = voprnds.length ();
3399 for (j = vec_num; j != 0; j--)
3401 vop = voprnds[j - 1];
3402 vec_oprnds->quick_push (vop);
3405 voprnds.release ();
3407 /* In case that VF is greater than the unrolling factor needed for the SLP
3408 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3409 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3410 to replicate the vectors. */
3411 while (number_of_vectors > vec_oprnds->length ())
3413 tree neutral_vec = NULL;
3415 if (neutral_op)
3417 if (!neutral_vec)
3418 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3420 vec_oprnds->quick_push (neutral_vec);
3422 else
3424 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3425 vec_oprnds->quick_push (vop);
3431 /* Get vectorized definitions from SLP_NODE that contains corresponding
3432 vectorized def-stmts. */
3434 static void
3435 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3437 tree vec_oprnd;
3438 gimple *vec_def_stmt;
3439 unsigned int i;
3441 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3443 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3445 gcc_assert (vec_def_stmt);
3446 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3447 vec_oprnd = gimple_phi_result (vec_def_stmt);
3448 else
3449 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3450 vec_oprnds->quick_push (vec_oprnd);
3455 /* Get vectorized definitions for SLP_NODE.
3456 If the scalar definitions are loop invariants or constants, collect them and
3457 call vect_get_constant_vectors() to create vector stmts.
3458 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3459 must be stored in the corresponding child of SLP_NODE, and we call
3460 vect_get_slp_vect_defs () to retrieve them. */
3462 void
3463 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3464 vec<vec<tree> > *vec_oprnds)
3466 gimple *first_stmt;
3467 int number_of_vects = 0, i;
3468 unsigned int child_index = 0;
3469 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3470 slp_tree child = NULL;
3471 vec<tree> vec_defs;
3472 tree oprnd;
3473 bool vectorized_defs;
3475 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3476 FOR_EACH_VEC_ELT (ops, i, oprnd)
3478 /* For each operand we check if it has vectorized definitions in a child
3479 node or we need to create them (for invariants and constants). We
3480 check if the LHS of the first stmt of the next child matches OPRND.
3481 If it does, we found the correct child. Otherwise, we call
3482 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3483 to check this child node for the next operand. */
3484 vectorized_defs = false;
3485 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3487 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3489 /* We have to check both pattern and original def, if available. */
3490 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3492 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3493 gimple *related
3494 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3495 tree first_def_op;
3497 if (gimple_code (first_def) == GIMPLE_PHI)
3498 first_def_op = gimple_phi_result (first_def);
3499 else
3500 first_def_op = gimple_get_lhs (first_def);
3501 if (operand_equal_p (oprnd, first_def_op, 0)
3502 || (related
3503 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3505 /* The number of vector defs is determined by the number of
3506 vector statements in the node from which we get those
3507 statements. */
3508 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3509 vectorized_defs = true;
3510 child_index++;
3513 else
3514 child_index++;
3517 if (!vectorized_defs)
3519 if (i == 0)
3521 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3522 /* Number of vector stmts was calculated according to LHS in
3523 vect_schedule_slp_instance (), fix it by replacing LHS with
3524 RHS, if necessary. See vect_get_smallest_scalar_type () for
3525 details. */
3526 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3527 &rhs_size_unit);
3528 if (rhs_size_unit != lhs_size_unit)
3530 number_of_vects *= rhs_size_unit;
3531 number_of_vects /= lhs_size_unit;
3536 /* Allocate memory for vectorized defs. */
3537 vec_defs = vNULL;
3538 vec_defs.create (number_of_vects);
3540 /* For reduction defs we call vect_get_constant_vectors (), since we are
3541 looking for initial loop invariant values. */
3542 if (vectorized_defs)
3543 /* The defs are already vectorized. */
3544 vect_get_slp_vect_defs (child, &vec_defs);
3545 else
3546 /* Build vectors from scalar defs. */
3547 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3548 number_of_vects);
3550 vec_oprnds->quick_push (vec_defs);
3554 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3555 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3556 permute statements for the SLP node NODE of the SLP instance
3557 SLP_NODE_INSTANCE. */
3559 bool
3560 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3561 gimple_stmt_iterator *gsi, poly_uint64 vf,
3562 slp_instance slp_node_instance, bool analyze_only,
3563 unsigned *n_perms)
3565 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3567 tree mask_element_type = NULL_TREE, mask_type;
3568 int nunits, vec_index = 0;
3569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3570 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3571 int mask_element;
3572 machine_mode mode;
3573 unsigned HOST_WIDE_INT const_vf;
3575 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3576 return false;
3578 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3580 mode = TYPE_MODE (vectype);
3582 /* At the moment, all permutations are represented using per-element
3583 indices, so we can't cope with variable vectorization factors. */
3584 if (!vf.is_constant (&const_vf))
3585 return false;
3587 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3588 same size as the vector element being permuted. */
3589 mask_element_type = lang_hooks.types.type_for_mode
3590 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3591 mask_type = get_vectype_for_scalar_type (mask_element_type);
3592 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3593 vec_perm_builder mask (nunits, nunits, 1);
3594 mask.quick_grow (nunits);
3595 vec_perm_indices indices;
3597 /* Initialize the vect stmts of NODE to properly insert the generated
3598 stmts later. */
3599 if (! analyze_only)
3600 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3601 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3602 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3604 /* Generate permutation masks for every NODE. Number of masks for each NODE
3605 is equal to GROUP_SIZE.
3606 E.g., we have a group of three nodes with three loads from the same
3607 location in each node, and the vector size is 4. I.e., we have a
3608 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3609 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3610 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3613 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3614 The last mask is illegal since we assume two operands for permute
3615 operation, and the mask element values can't be outside that range.
3616 Hence, the last mask must be converted into {2,5,5,5}.
3617 For the first two permutations we need the first and the second input
3618 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3619 we need the second and the third vectors: {b1,c1,a2,b2} and
3620 {c2,a3,b3,c3}. */
3622 int vect_stmts_counter = 0;
3623 int index = 0;
3624 int first_vec_index = -1;
3625 int second_vec_index = -1;
3626 bool noop_p = true;
3627 *n_perms = 0;
3629 for (unsigned int j = 0; j < const_vf; j++)
3631 for (int k = 0; k < group_size; k++)
3633 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3634 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3635 vec_index = i / nunits;
3636 mask_element = i % nunits;
3637 if (vec_index == first_vec_index
3638 || first_vec_index == -1)
3640 first_vec_index = vec_index;
3642 else if (vec_index == second_vec_index
3643 || second_vec_index == -1)
3645 second_vec_index = vec_index;
3646 mask_element += nunits;
3648 else
3650 if (dump_enabled_p ())
3652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3653 "permutation requires at "
3654 "least three vectors ");
3655 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3656 stmt, 0);
3658 gcc_assert (analyze_only);
3659 return false;
3662 gcc_assert (mask_element >= 0
3663 && mask_element < 2 * nunits);
3664 if (mask_element != index)
3665 noop_p = false;
3666 mask[index++] = mask_element;
3668 if (index == nunits && !noop_p)
3670 indices.new_vector (mask, 2, nunits);
3671 if (!can_vec_perm_const_p (mode, indices))
3673 if (dump_enabled_p ())
3675 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3676 vect_location,
3677 "unsupported vect permute { ");
3678 for (i = 0; i < nunits; ++i)
3679 dump_printf (MSG_MISSED_OPTIMIZATION,
3680 HOST_WIDE_INT_PRINT_DEC " ", mask[i]);
3681 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3683 gcc_assert (analyze_only);
3684 return false;
3687 ++*n_perms;
3690 if (index == nunits)
3692 if (!analyze_only)
3694 tree mask_vec = NULL_TREE;
3696 if (! noop_p)
3697 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3699 if (second_vec_index == -1)
3700 second_vec_index = first_vec_index;
3702 /* Generate the permute statement if necessary. */
3703 tree first_vec = dr_chain[first_vec_index];
3704 tree second_vec = dr_chain[second_vec_index];
3705 gimple *perm_stmt;
3706 if (! noop_p)
3708 tree perm_dest
3709 = vect_create_destination_var (gimple_assign_lhs (stmt),
3710 vectype);
3711 perm_dest = make_ssa_name (perm_dest);
3712 perm_stmt = gimple_build_assign (perm_dest,
3713 VEC_PERM_EXPR,
3714 first_vec, second_vec,
3715 mask_vec);
3716 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3718 else
3719 /* If mask was NULL_TREE generate the requested
3720 identity transform. */
3721 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3723 /* Store the vector statement in NODE. */
3724 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3727 index = 0;
3728 first_vec_index = -1;
3729 second_vec_index = -1;
3730 noop_p = true;
3735 return true;
3738 typedef hash_map <vec <gimple *>, slp_tree,
3739 simple_hashmap_traits <bst_traits, slp_tree> >
3740 scalar_stmts_to_slp_tree_map_t;
3742 /* Vectorize SLP instance tree in postorder. */
3744 static bool
3745 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3746 scalar_stmts_to_slp_tree_map_t *bst_map)
3748 gimple *stmt;
3749 bool grouped_store, is_store;
3750 gimple_stmt_iterator si;
3751 stmt_vec_info stmt_info;
3752 unsigned int group_size;
3753 tree vectype;
3754 int i, j;
3755 slp_tree child;
3757 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3758 return false;
3760 /* See if we have already vectorized the same set of stmts and reuse their
3761 vectorized stmts. */
3762 slp_tree &leader
3763 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3764 if (leader)
3766 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3767 return false;
3770 leader = node;
3771 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3772 vect_schedule_slp_instance (child, instance, bst_map);
3774 /* Push SLP node def-type to stmts. */
3775 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3776 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3778 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3780 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3781 stmt_info = vinfo_for_stmt (stmt);
3783 /* VECTYPE is the type of the destination. */
3784 vectype = STMT_VINFO_VECTYPE (stmt_info);
3785 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3787 if (!SLP_TREE_VEC_STMTS (node).exists ())
3788 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3790 if (dump_enabled_p ())
3792 dump_printf_loc (MSG_NOTE,vect_location,
3793 "------>vectorizing SLP node starting from: ");
3794 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3797 /* Vectorized stmts go before the last scalar stmt which is where
3798 all uses are ready. */
3799 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3801 /* Mark the first element of the reduction chain as reduction to properly
3802 transform the node. In the analysis phase only the last element of the
3803 chain is marked as reduction. */
3804 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3805 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3807 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3808 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3811 /* Handle two-operation SLP nodes by vectorizing the group with
3812 both operations and then performing a merge. */
3813 if (SLP_TREE_TWO_OPERATORS (node))
3815 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3816 enum tree_code ocode = ERROR_MARK;
3817 gimple *ostmt;
3818 vec_perm_builder mask (group_size, group_size, 1);
3819 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3820 if (gimple_assign_rhs_code (ostmt) != code0)
3822 mask.quick_push (1);
3823 ocode = gimple_assign_rhs_code (ostmt);
3825 else
3826 mask.quick_push (0);
3827 if (ocode != ERROR_MARK)
3829 vec<gimple *> v0;
3830 vec<gimple *> v1;
3831 unsigned j;
3832 tree tmask = NULL_TREE;
3833 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3834 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3835 SLP_TREE_VEC_STMTS (node).truncate (0);
3836 gimple_assign_set_rhs_code (stmt, ocode);
3837 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3838 gimple_assign_set_rhs_code (stmt, code0);
3839 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3840 SLP_TREE_VEC_STMTS (node).truncate (0);
3841 tree meltype = build_nonstandard_integer_type
3842 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3843 tree mvectype = get_same_sized_vectype (meltype, vectype);
3844 unsigned k = 0, l;
3845 for (j = 0; j < v0.length (); ++j)
3847 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3848 tree_vector_builder melts (mvectype, nunits, 1);
3849 for (l = 0; l < nunits; ++l)
3851 if (k >= group_size)
3852 k = 0;
3853 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3854 melts.quick_push (t);
3856 tmask = melts.build ();
3858 /* ??? Not all targets support a VEC_PERM_EXPR with a
3859 constant mask that would translate to a vec_merge RTX
3860 (with their vec_perm_const_ok). We can either not
3861 vectorize in that case or let veclower do its job.
3862 Unfortunately that isn't too great and at least for
3863 plus/minus we'd eventually like to match targets
3864 vector addsub instructions. */
3865 gimple *vstmt;
3866 vstmt = gimple_build_assign (make_ssa_name (vectype),
3867 VEC_PERM_EXPR,
3868 gimple_assign_lhs (v0[j]),
3869 gimple_assign_lhs (v1[j]), tmask);
3870 vect_finish_stmt_generation (stmt, vstmt, &si);
3871 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3873 v0.release ();
3874 v1.release ();
3875 return false;
3878 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3880 /* Restore stmt def-types. */
3881 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3882 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3883 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3884 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3886 return is_store;
3889 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3890 For loop vectorization this is done in vectorizable_call, but for SLP
3891 it needs to be deferred until end of vect_schedule_slp, because multiple
3892 SLP instances may refer to the same scalar stmt. */
3894 static void
3895 vect_remove_slp_scalar_calls (slp_tree node)
3897 gimple *stmt, *new_stmt;
3898 gimple_stmt_iterator gsi;
3899 int i;
3900 slp_tree child;
3901 tree lhs;
3902 stmt_vec_info stmt_info;
3904 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3905 return;
3907 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3908 vect_remove_slp_scalar_calls (child);
3910 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3912 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3913 continue;
3914 stmt_info = vinfo_for_stmt (stmt);
3915 if (stmt_info == NULL
3916 || is_pattern_stmt_p (stmt_info)
3917 || !PURE_SLP_STMT (stmt_info))
3918 continue;
3919 lhs = gimple_call_lhs (stmt);
3920 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3921 set_vinfo_for_stmt (new_stmt, stmt_info);
3922 set_vinfo_for_stmt (stmt, NULL);
3923 STMT_VINFO_STMT (stmt_info) = new_stmt;
3924 gsi = gsi_for_stmt (stmt);
3925 gsi_replace (&gsi, new_stmt, false);
3926 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3930 /* Generate vector code for all SLP instances in the loop/basic block. */
3932 bool
3933 vect_schedule_slp (vec_info *vinfo)
3935 vec<slp_instance> slp_instances;
3936 slp_instance instance;
3937 unsigned int i;
3938 bool is_store = false;
3940 slp_instances = vinfo->slp_instances;
3941 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3943 /* Schedule the tree of INSTANCE. */
3944 scalar_stmts_to_slp_tree_map_t *bst_map
3945 = new scalar_stmts_to_slp_tree_map_t ();
3946 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3947 instance, bst_map);
3948 delete bst_map;
3949 if (dump_enabled_p ())
3950 dump_printf_loc (MSG_NOTE, vect_location,
3951 "vectorizing stmts using SLP.\n");
3954 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3956 slp_tree root = SLP_INSTANCE_TREE (instance);
3957 gimple *store;
3958 unsigned int j;
3959 gimple_stmt_iterator gsi;
3961 /* Remove scalar call stmts. Do not do this for basic-block
3962 vectorization as not all uses may be vectorized.
3963 ??? Why should this be necessary? DCE should be able to
3964 remove the stmts itself.
3965 ??? For BB vectorization we can as well remove scalar
3966 stmts starting from the SLP tree root if they have no
3967 uses. */
3968 if (is_a <loop_vec_info> (vinfo))
3969 vect_remove_slp_scalar_calls (root);
3971 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3972 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3974 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3975 break;
3977 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3978 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3979 /* Free the attached stmt_vec_info and remove the stmt. */
3980 gsi = gsi_for_stmt (store);
3981 unlink_stmt_vdef (store);
3982 gsi_remove (&gsi, true);
3983 release_defs (store);
3984 free_stmt_vec_info (store);
3988 return is_store;