poly_int: vect_get_constant_vectors
[official-gcc.git] / gcc / tree-vect-slp.c
blob4aa816f7886305e103c7807219003ce38613c5f3
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
50 static void
51 vect_free_slp_tree (slp_tree node)
53 int i;
54 slp_tree child;
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
57 vect_free_slp_tree (child);
59 gimple *stmt;
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 SLP_TREE_CHILDREN (node).release ();
69 SLP_TREE_SCALAR_STMTS (node).release ();
70 SLP_TREE_VEC_STMTS (node).release ();
71 SLP_TREE_LOAD_PERMUTATION (node).release ();
73 free (node);
77 /* Free the memory allocated for the SLP instance. */
79 void
80 vect_free_slp_instance (slp_instance instance)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
83 SLP_INSTANCE_LOADS (instance).release ();
84 free (instance);
88 /* Create an SLP node for SCALAR_STMTS. */
90 static slp_tree
91 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
93 slp_tree node;
94 gimple *stmt = scalar_stmts[0];
95 unsigned int nops;
97 if (is_gimple_call (stmt))
98 nops = gimple_call_num_args (stmt);
99 else if (is_gimple_assign (stmt))
101 nops = gimple_num_ops (stmt) - 1;
102 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
103 nops++;
105 else if (gimple_code (stmt) == GIMPLE_PHI)
106 nops = 0;
107 else
108 return NULL;
110 node = XNEW (struct _slp_tree);
111 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
112 SLP_TREE_VEC_STMTS (node).create (0);
113 SLP_TREE_CHILDREN (node).create (nops);
114 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
115 SLP_TREE_TWO_OPERATORS (node) = false;
116 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
118 unsigned i;
119 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
122 return node;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
128 node. */
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec<gimple *> def_stmts;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
135 stmt. */
136 tree first_op_type;
137 enum vect_def_type first_dt;
138 bool first_pattern;
139 bool second_pattern;
140 } *slp_oprnd_info;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 operand. */
145 static vec<slp_oprnd_info>
146 vect_create_oprnd_info (int nops, int group_size)
148 int i;
149 slp_oprnd_info oprnd_info;
150 vec<slp_oprnd_info> oprnds_info;
152 oprnds_info.create (nops);
153 for (i = 0; i < nops; i++)
155 oprnd_info = XNEW (struct _slp_oprnd_info);
156 oprnd_info->def_stmts.create (group_size);
157 oprnd_info->first_dt = vect_uninitialized_def;
158 oprnd_info->first_op_type = NULL_TREE;
159 oprnd_info->first_pattern = false;
160 oprnd_info->second_pattern = false;
161 oprnds_info.quick_push (oprnd_info);
164 return oprnds_info;
168 /* Free operands info. */
170 static void
171 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
173 int i;
174 slp_oprnd_info oprnd_info;
176 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
178 oprnd_info->def_stmts.release ();
179 XDELETE (oprnd_info);
182 oprnds_info.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
189 static int
190 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
192 gimple *next_stmt = first_stmt;
193 int result = 0;
195 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
196 return -1;
200 if (next_stmt == stmt)
201 return result;
202 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
203 if (next_stmt)
204 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
206 while (next_stmt);
208 return -1;
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
222 ok return 0. */
224 static int
225 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
226 gimple *stmt, unsigned stmt_num,
227 vec<slp_oprnd_info> *oprnds_info)
229 tree oprnd;
230 unsigned int i, number_of_oprnds;
231 gimple *def_stmt;
232 enum vect_def_type dt = vect_uninitialized_def;
233 bool pattern = false;
234 slp_oprnd_info oprnd_info;
235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
238 bool first = stmt_num == 0;
239 bool second = stmt_num == 1;
241 if (is_gimple_call (stmt))
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
246 else if (is_gimple_assign (stmt))
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
255 first_op_cond = true;
256 number_of_oprnds++;
258 else
259 commutative = commutative_tree_code (code);
261 else
262 return -1;
264 bool swapped = (*swap != 0);
265 gcc_assert (!swapped || first_op_cond);
266 for (i = 0; i < number_of_oprnds; i++)
268 again:
269 if (first_op_cond)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map = maps[*swap];
275 if (i < 2)
276 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
277 else
278 oprnd = gimple_op (stmt, map[i]);
280 else
281 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
283 oprnd_info = (*oprnds_info)[i];
285 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
295 return -1;
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
300 pattern. */
301 if (def_stmt && gimple_bb (def_stmt)
302 && vect_stmt_in_region_p (vinfo, def_stmt)
303 && vinfo_for_stmt (def_stmt)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
308 pattern = true;
309 if (!first && !oprnd_info->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info->first_dt != vect_reduction_def
313 || (!second && !oprnd_info->second_pattern)))
315 if (i == 0
316 && !swapped
317 && commutative)
319 swapped = true;
320 goto again;
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
332 return 1;
335 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
336 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
338 if (dt == vect_unknown_def_type)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
342 "Unsupported pattern.\n");
343 return -1;
346 switch (gimple_code (def_stmt))
348 case GIMPLE_PHI:
349 case GIMPLE_ASSIGN:
350 break;
352 default:
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "unsupported defining stmt:\n");
356 return -1;
360 if (second)
361 oprnd_info->second_pattern = pattern;
363 if (first)
365 oprnd_info->first_dt = dt;
366 oprnd_info->first_pattern = pattern;
367 oprnd_info->first_op_type = TREE_TYPE (oprnd);
369 else
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info->first_dt != dt
377 && !(oprnd_info->first_dt == vect_reduction_def
378 && dt == vect_internal_def)
379 && !((oprnd_info->first_dt == vect_external_def
380 || oprnd_info->first_dt == vect_constant_def)
381 && (dt == vect_external_def
382 || dt == vect_constant_def)))
383 || !types_compatible_p (oprnd_info->first_op_type,
384 TREE_TYPE (oprnd))))
386 /* Try swapping operands if we got a mismatch. */
387 if (i == 0
388 && !swapped
389 && commutative)
391 swapped = true;
392 goto again;
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: different types\n");
399 return 1;
403 /* Check the types of the definitions. */
404 switch (dt)
406 case vect_constant_def:
407 case vect_external_def:
408 /* We must already have set a vector size by now. */
409 gcc_checking_assert (maybe_ne (current_vector_size, 0U));
410 if (!current_vector_size.is_constant ())
412 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: invalid type of def "
416 "for variable-length SLP ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
418 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
420 return -1;
422 break;
424 case vect_reduction_def:
425 case vect_induction_def:
426 case vect_internal_def:
427 oprnd_info->def_stmts.quick_push (def_stmt);
428 break;
430 default:
431 /* FORNOW: Not supported. */
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435 "Build SLP failed: illegal type of def ");
436 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
437 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
440 return -1;
444 /* Swap operands. */
445 if (swapped)
447 /* If there are already uses of this stmt in a SLP instance then
448 we've committed to the operand order and can't swap it. */
449 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
451 if (dump_enabled_p ())
453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
454 "Build SLP failed: cannot swap operands of "
455 "shared stmt ");
456 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
458 return -1;
461 if (first_op_cond)
463 tree cond = gimple_assign_rhs1 (stmt);
464 enum tree_code code = TREE_CODE (cond);
466 /* Swap. */
467 if (*swap == 1)
469 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
470 &TREE_OPERAND (cond, 1));
471 TREE_SET_CODE (cond, swap_tree_comparison (code));
473 /* Invert. */
474 else
476 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
477 gimple_assign_rhs3_ptr (stmt));
478 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
479 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
480 gcc_assert (code != ERROR_MARK);
481 TREE_SET_CODE (cond, code);
484 else
485 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
486 gimple_assign_rhs2_ptr (stmt));
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location,
490 "swapped operands to match def types in ");
491 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
495 *swap = swapped;
496 return 0;
499 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
500 caller's attempt to find the vector type in STMT with the narrowest
501 element type. Return true if VECTYPE is nonnull and if it is valid
502 for VINFO. When returning true, update MAX_NUNITS to reflect the
503 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
504 as for vect_build_slp_tree. */
506 static bool
507 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
508 tree vectype, poly_uint64 *max_nunits)
510 if (!vectype)
512 if (dump_enabled_p ())
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 "Build SLP failed: unsupported data-type in ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
519 /* Fatal mismatch. */
520 return false;
523 /* If populating the vector type requires unrolling then fail
524 before adjusting *max_nunits for basic-block vectorization. */
525 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
526 unsigned HOST_WIDE_INT const_nunits;
527 if (is_a <bb_vec_info> (vinfo)
528 && (!nunits.is_constant (&const_nunits)
529 || const_nunits > group_size))
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "Build SLP failed: unrolling required "
533 "in basic block SLP\n");
534 /* Fatal mismatch. */
535 return false;
538 /* In case of multiple types we need to detect the smallest type. */
539 vect_update_max_nunits (max_nunits, vectype);
540 return true;
543 /* Verify if the scalar stmts STMTS are isomorphic, require data
544 permutation or are of unsupported types of operation. Return
545 true if they are, otherwise return false and indicate in *MATCHES
546 which stmts are not isomorphic to the first one. If MATCHES[0]
547 is false then this indicates the comparison could not be
548 carried out or the stmts will never be vectorized by SLP.
550 Note COND_EXPR is possibly ismorphic to another one after swapping its
551 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
552 the first stmt by swapping the two operands of comparison; set SWAP[i]
553 to 2 if stmt I is isormorphic to the first stmt by inverting the code
554 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
555 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
557 static bool
558 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
559 vec<gimple *> stmts, unsigned int group_size,
560 unsigned nops, poly_uint64 *max_nunits,
561 bool *matches, bool *two_operators)
563 unsigned int i;
564 gimple *first_stmt = stmts[0], *stmt = stmts[0];
565 enum tree_code first_stmt_code = ERROR_MARK;
566 enum tree_code alt_stmt_code = ERROR_MARK;
567 enum tree_code rhs_code = ERROR_MARK;
568 enum tree_code first_cond_code = ERROR_MARK;
569 tree lhs;
570 bool need_same_oprnds = false;
571 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
572 optab optab;
573 int icode;
574 machine_mode optab_op2_mode;
575 machine_mode vec_mode;
576 HOST_WIDE_INT dummy;
577 gimple *first_load = NULL, *prev_first_load = NULL;
579 /* For every stmt in NODE find its def stmt/s. */
580 FOR_EACH_VEC_ELT (stmts, i, stmt)
582 swap[i] = 0;
583 matches[i] = false;
585 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
588 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
591 /* Fail to vectorize statements marked as unvectorizable. */
592 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: unvectorizable statement ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
600 /* Fatal mismatch. */
601 matches[0] = false;
602 return false;
605 lhs = gimple_get_lhs (stmt);
606 if (lhs == NULL_TREE)
608 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: not GIMPLE_ASSIGN nor "
612 "GIMPLE_CALL ");
613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
615 /* Fatal mismatch. */
616 matches[0] = false;
617 return false;
620 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
621 vectype = get_vectype_for_scalar_type (scalar_type);
622 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
623 max_nunits))
625 /* Fatal mismatch. */
626 matches[0] = false;
627 return false;
630 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
632 rhs_code = CALL_EXPR;
633 if (gimple_call_internal_p (call_stmt)
634 || gimple_call_tail_p (call_stmt)
635 || gimple_call_noreturn_p (call_stmt)
636 || !gimple_call_nothrow_p (call_stmt)
637 || gimple_call_chain (call_stmt))
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unsupported call type ");
643 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
644 call_stmt, 0);
646 /* Fatal mismatch. */
647 matches[0] = false;
648 return false;
651 else
652 rhs_code = gimple_assign_rhs_code (stmt);
654 /* Check the operation. */
655 if (i == 0)
657 first_stmt_code = rhs_code;
659 /* Shift arguments should be equal in all the packed stmts for a
660 vector shift with scalar shift operand. */
661 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
662 || rhs_code == LROTATE_EXPR
663 || rhs_code == RROTATE_EXPR)
665 vec_mode = TYPE_MODE (vectype);
667 /* First see if we have a vector/vector shift. */
668 optab = optab_for_tree_code (rhs_code, vectype,
669 optab_vector);
671 if (!optab
672 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
674 /* No vector/vector shift, try for a vector/scalar shift. */
675 optab = optab_for_tree_code (rhs_code, vectype,
676 optab_scalar);
678 if (!optab)
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "Build SLP failed: no optab.\n");
683 /* Fatal mismatch. */
684 matches[0] = false;
685 return false;
687 icode = (int) optab_handler (optab, vec_mode);
688 if (icode == CODE_FOR_nothing)
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: "
693 "op not supported by target.\n");
694 /* Fatal mismatch. */
695 matches[0] = false;
696 return false;
698 optab_op2_mode = insn_data[icode].operand[2].mode;
699 if (!VECTOR_MODE_P (optab_op2_mode))
701 need_same_oprnds = true;
702 first_op1 = gimple_assign_rhs2 (stmt);
706 else if (rhs_code == WIDEN_LSHIFT_EXPR)
708 need_same_oprnds = true;
709 first_op1 = gimple_assign_rhs2 (stmt);
712 else
714 if (first_stmt_code != rhs_code
715 && alt_stmt_code == ERROR_MARK)
716 alt_stmt_code = rhs_code;
717 if (first_stmt_code != rhs_code
718 && (first_stmt_code != IMAGPART_EXPR
719 || rhs_code != REALPART_EXPR)
720 && (first_stmt_code != REALPART_EXPR
721 || rhs_code != IMAGPART_EXPR)
722 /* Handle mismatches in plus/minus by computing both
723 and merging the results. */
724 && !((first_stmt_code == PLUS_EXPR
725 || first_stmt_code == MINUS_EXPR)
726 && (alt_stmt_code == PLUS_EXPR
727 || alt_stmt_code == MINUS_EXPR)
728 && rhs_code == alt_stmt_code)
729 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
730 && (first_stmt_code == ARRAY_REF
731 || first_stmt_code == BIT_FIELD_REF
732 || first_stmt_code == INDIRECT_REF
733 || first_stmt_code == COMPONENT_REF
734 || first_stmt_code == MEM_REF)))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: different operation "
740 "in stmt ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "original stmt ");
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
745 first_stmt, 0);
747 /* Mismatch. */
748 continue;
751 if (need_same_oprnds
752 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: different shift "
758 "arguments in ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
761 /* Mismatch. */
762 continue;
765 if (rhs_code == CALL_EXPR)
767 gimple *first_stmt = stmts[0];
768 if (gimple_call_num_args (stmt) != nops
769 || !operand_equal_p (gimple_call_fn (first_stmt),
770 gimple_call_fn (stmt), 0)
771 || gimple_call_fntype (first_stmt)
772 != gimple_call_fntype (stmt))
774 if (dump_enabled_p ())
776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
777 "Build SLP failed: different calls in ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
779 stmt, 0);
781 /* Mismatch. */
782 continue;
787 /* Grouped store or load. */
788 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
790 if (REFERENCE_CLASS_P (lhs))
792 /* Store. */
795 else
797 /* Load. */
798 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
799 if (prev_first_load)
801 /* Check that there are no loads from different interleaving
802 chains in the same node. */
803 if (prev_first_load != first_load)
805 if (dump_enabled_p ())
807 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
808 vect_location,
809 "Build SLP failed: different "
810 "interleaving chains in one node ");
811 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
812 stmt, 0);
814 /* Mismatch. */
815 continue;
818 else
819 prev_first_load = first_load;
821 } /* Grouped access. */
822 else
824 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
826 /* Not grouped load. */
827 if (dump_enabled_p ())
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
830 "Build SLP failed: not grouped load ");
831 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
834 /* FORNOW: Not grouped loads are not supported. */
835 /* Fatal mismatch. */
836 matches[0] = false;
837 return false;
840 /* Not memory operation. */
841 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
842 && TREE_CODE_CLASS (rhs_code) != tcc_unary
843 && TREE_CODE_CLASS (rhs_code) != tcc_expression
844 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
845 && rhs_code != CALL_EXPR)
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: operation");
851 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
854 /* Fatal mismatch. */
855 matches[0] = false;
856 return false;
859 if (rhs_code == COND_EXPR)
861 tree cond_expr = gimple_assign_rhs1 (stmt);
862 enum tree_code cond_code = TREE_CODE (cond_expr);
863 enum tree_code swap_code = ERROR_MARK;
864 enum tree_code invert_code = ERROR_MARK;
866 if (i == 0)
867 first_cond_code = TREE_CODE (cond_expr);
868 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
870 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
871 swap_code = swap_tree_comparison (cond_code);
872 invert_code = invert_tree_comparison (cond_code, honor_nans);
875 if (first_cond_code == cond_code)
877 /* Isomorphic can be achieved by swapping. */
878 else if (first_cond_code == swap_code)
879 swap[i] = 1;
880 /* Isomorphic can be achieved by inverting. */
881 else if (first_cond_code == invert_code)
882 swap[i] = 2;
883 else
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "Build SLP failed: different"
889 " operation");
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
891 stmt, 0);
893 /* Mismatch. */
894 continue;
899 matches[i] = true;
902 for (i = 0; i < group_size; ++i)
903 if (!matches[i])
904 return false;
906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
908 if (alt_stmt_code != ERROR_MARK
909 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
911 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
912 vec_perm_builder sel (count, count, 1);
913 for (i = 0; i < count; ++i)
915 unsigned int elt = i;
916 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
917 elt += count;
918 sel.quick_push (elt);
920 vec_perm_indices indices (sel, 2, count);
921 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
923 for (i = 0; i < group_size; ++i)
924 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
926 matches[i] = false;
927 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
930 "Build SLP failed: different operation "
931 "in stmt ");
932 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
933 stmts[i], 0);
934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
935 "original stmt ");
936 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
937 first_stmt, 0);
940 return false;
942 *two_operators = true;
945 return true;
948 /* Traits for the hash_set to record failed SLP builds for a stmt set.
949 Note we never remove apart from at destruction time so we do not
950 need a special value for deleted that differs from empty. */
951 struct bst_traits
953 typedef vec <gimple *> value_type;
954 typedef vec <gimple *> compare_type;
955 static inline hashval_t hash (value_type);
956 static inline bool equal (value_type existing, value_type candidate);
957 static inline bool is_empty (value_type x) { return !x.exists (); }
958 static inline bool is_deleted (value_type x) { return !x.exists (); }
959 static inline void mark_empty (value_type &x) { x.release (); }
960 static inline void mark_deleted (value_type &x) { x.release (); }
961 static inline void remove (value_type &x) { x.release (); }
963 inline hashval_t
964 bst_traits::hash (value_type x)
966 inchash::hash h;
967 for (unsigned i = 0; i < x.length (); ++i)
968 h.add_int (gimple_uid (x[i]));
969 return h.end ();
971 inline bool
972 bst_traits::equal (value_type existing, value_type candidate)
974 if (existing.length () != candidate.length ())
975 return false;
976 for (unsigned i = 0; i < existing.length (); ++i)
977 if (existing[i] != candidate[i])
978 return false;
979 return true;
982 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
983 static scalar_stmts_set_t *bst_fail;
985 static slp_tree
986 vect_build_slp_tree_2 (vec_info *vinfo,
987 vec<gimple *> stmts, unsigned int group_size,
988 poly_uint64 *max_nunits,
989 vec<slp_tree> *loads,
990 bool *matches, unsigned *npermutes, unsigned *tree_size,
991 unsigned max_tree_size);
993 static slp_tree
994 vect_build_slp_tree (vec_info *vinfo,
995 vec<gimple *> stmts, unsigned int group_size,
996 poly_uint64 *max_nunits, vec<slp_tree> *loads,
997 bool *matches, unsigned *npermutes, unsigned *tree_size,
998 unsigned max_tree_size)
1000 if (bst_fail->contains (stmts))
1001 return NULL;
1002 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1003 loads, matches, npermutes, tree_size,
1004 max_tree_size);
1005 /* When SLP build fails for stmts record this, otherwise SLP build
1006 can be exponential in time when we allow to construct parts from
1007 scalars, see PR81723. */
1008 if (! res)
1010 vec <gimple *> x;
1011 x.create (stmts.length ());
1012 x.splice (stmts);
1013 bst_fail->add (x);
1015 return res;
1018 /* Recursively build an SLP tree starting from NODE.
1019 Fail (and return a value not equal to zero) if def-stmts are not
1020 isomorphic, require data permutation or are of unsupported types of
1021 operation. Otherwise, return 0.
1022 The value returned is the depth in the SLP tree where a mismatch
1023 was found. */
1025 static slp_tree
1026 vect_build_slp_tree_2 (vec_info *vinfo,
1027 vec<gimple *> stmts, unsigned int group_size,
1028 poly_uint64 *max_nunits,
1029 vec<slp_tree> *loads,
1030 bool *matches, unsigned *npermutes, unsigned *tree_size,
1031 unsigned max_tree_size)
1033 unsigned nops, i, this_tree_size = 0;
1034 poly_uint64 this_max_nunits = *max_nunits;
1035 gimple *stmt;
1036 slp_tree node;
1038 matches[0] = false;
1040 stmt = stmts[0];
1041 if (is_gimple_call (stmt))
1042 nops = gimple_call_num_args (stmt);
1043 else if (is_gimple_assign (stmt))
1045 nops = gimple_num_ops (stmt) - 1;
1046 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1047 nops++;
1049 else if (gimple_code (stmt) == GIMPLE_PHI)
1050 nops = 0;
1051 else
1052 return NULL;
1054 /* If the SLP node is a PHI (induction or reduction), terminate
1055 the recursion. */
1056 if (gimple_code (stmt) == GIMPLE_PHI)
1058 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1059 tree vectype = get_vectype_for_scalar_type (scalar_type);
1060 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1061 max_nunits))
1062 return NULL;
1064 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1065 /* Induction from different IVs is not supported. */
1066 if (def_type == vect_induction_def)
1068 FOR_EACH_VEC_ELT (stmts, i, stmt)
1069 if (stmt != stmts[0])
1070 return NULL;
1072 else
1074 /* Else def types have to match. */
1075 FOR_EACH_VEC_ELT (stmts, i, stmt)
1077 /* But for reduction chains only check on the first stmt. */
1078 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1079 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1080 continue;
1081 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1082 return NULL;
1085 node = vect_create_new_slp_node (stmts);
1086 return node;
1090 bool two_operators = false;
1091 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1092 if (!vect_build_slp_tree_1 (vinfo, swap,
1093 stmts, group_size, nops,
1094 &this_max_nunits, matches, &two_operators))
1095 return NULL;
1097 /* If the SLP node is a load, terminate the recursion. */
1098 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1099 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1101 *max_nunits = this_max_nunits;
1102 node = vect_create_new_slp_node (stmts);
1103 loads->safe_push (node);
1104 return node;
1107 /* Get at the operands, verifying they are compatible. */
1108 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1109 slp_oprnd_info oprnd_info;
1110 FOR_EACH_VEC_ELT (stmts, i, stmt)
1112 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1113 stmt, i, &oprnds_info);
1114 if (res != 0)
1115 matches[(res == -1) ? 0 : i] = false;
1116 if (!matches[0])
1117 break;
1119 for (i = 0; i < group_size; ++i)
1120 if (!matches[i])
1122 vect_free_oprnd_info (oprnds_info);
1123 return NULL;
1126 auto_vec<slp_tree, 4> children;
1127 auto_vec<slp_tree> this_loads;
1129 stmt = stmts[0];
1131 if (tree_size)
1132 max_tree_size -= *tree_size;
1134 /* Create SLP_TREE nodes for the definition node/s. */
1135 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1137 slp_tree child;
1138 unsigned old_nloads = this_loads.length ();
1139 unsigned old_tree_size = this_tree_size;
1140 unsigned int j;
1142 if (oprnd_info->first_dt != vect_internal_def
1143 && oprnd_info->first_dt != vect_reduction_def
1144 && oprnd_info->first_dt != vect_induction_def)
1145 continue;
1147 if (++this_tree_size > max_tree_size)
1149 FOR_EACH_VEC_ELT (children, j, child)
1150 vect_free_slp_tree (child);
1151 vect_free_oprnd_info (oprnds_info);
1152 return NULL;
1155 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1156 group_size, &this_max_nunits,
1157 &this_loads, matches, npermutes,
1158 &this_tree_size,
1159 max_tree_size)) != NULL)
1161 /* If we have all children of child built up from scalars then just
1162 throw that away and build it up this node from scalars. */
1163 if (!SLP_TREE_CHILDREN (child).is_empty ()
1164 /* ??? Rejecting patterns this way doesn't work. We'd have to
1165 do extra work to cancel the pattern so the uses see the
1166 scalar version. */
1167 && !is_pattern_stmt_p
1168 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1170 slp_tree grandchild;
1172 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1173 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1174 break;
1175 if (!grandchild)
1177 /* Roll back. */
1178 this_loads.truncate (old_nloads);
1179 this_tree_size = old_tree_size;
1180 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1181 vect_free_slp_tree (grandchild);
1182 SLP_TREE_CHILDREN (child).truncate (0);
1184 dump_printf_loc (MSG_NOTE, vect_location,
1185 "Building parent vector operands from "
1186 "scalars instead\n");
1187 oprnd_info->def_stmts = vNULL;
1188 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1189 children.safe_push (child);
1190 continue;
1194 oprnd_info->def_stmts = vNULL;
1195 children.safe_push (child);
1196 continue;
1199 /* If the SLP build failed fatally and we analyze a basic-block
1200 simply treat nodes we fail to build as externally defined
1201 (and thus build vectors from the scalar defs).
1202 The cost model will reject outright expensive cases.
1203 ??? This doesn't treat cases where permutation ultimatively
1204 fails (or we don't try permutation below). Ideally we'd
1205 even compute a permutation that will end up with the maximum
1206 SLP tree size... */
1207 if (is_a <bb_vec_info> (vinfo)
1208 && !matches[0]
1209 /* ??? Rejecting patterns this way doesn't work. We'd have to
1210 do extra work to cancel the pattern so the uses see the
1211 scalar version. */
1212 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "Building vector operands from scalars\n");
1216 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1217 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1218 children.safe_push (child);
1219 oprnd_info->def_stmts = vNULL;
1220 continue;
1223 /* If the SLP build for operand zero failed and operand zero
1224 and one can be commutated try that for the scalar stmts
1225 that failed the match. */
1226 if (i == 0
1227 /* A first scalar stmt mismatch signals a fatal mismatch. */
1228 && matches[0]
1229 /* ??? For COND_EXPRs we can swap the comparison operands
1230 as well as the arms under some constraints. */
1231 && nops == 2
1232 && oprnds_info[1]->first_dt == vect_internal_def
1233 && is_gimple_assign (stmt)
1234 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1235 && ! two_operators
1236 /* Do so only if the number of not successful permutes was nor more
1237 than a cut-ff as re-trying the recursive match on
1238 possibly each level of the tree would expose exponential
1239 behavior. */
1240 && *npermutes < 4)
1242 /* Verify if we can safely swap or if we committed to a specific
1243 operand order already. */
1244 for (j = 0; j < group_size; ++j)
1245 if (!matches[j]
1246 && (swap[j] != 0
1247 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1249 if (dump_enabled_p ())
1251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1252 "Build SLP failed: cannot swap operands "
1253 "of shared stmt ");
1254 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1255 stmts[j], 0);
1257 goto fail;
1260 /* Swap mismatched definition stmts. */
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "Re-trying with swapped operands of stmts ");
1263 for (j = 0; j < group_size; ++j)
1264 if (!matches[j])
1266 std::swap (oprnds_info[0]->def_stmts[j],
1267 oprnds_info[1]->def_stmts[j]);
1268 dump_printf (MSG_NOTE, "%d ", j);
1270 dump_printf (MSG_NOTE, "\n");
1271 /* And try again with scratch 'matches' ... */
1272 bool *tem = XALLOCAVEC (bool, group_size);
1273 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1274 group_size, &this_max_nunits,
1275 &this_loads, tem, npermutes,
1276 &this_tree_size,
1277 max_tree_size)) != NULL)
1279 /* ... so if successful we can apply the operand swapping
1280 to the GIMPLE IL. This is necessary because for example
1281 vect_get_slp_defs uses operand indexes and thus expects
1282 canonical operand order. This is also necessary even
1283 if we end up building the operand from scalars as
1284 we'll continue to process swapped operand two. */
1285 for (j = 0; j < group_size; ++j)
1287 gimple *stmt = stmts[j];
1288 gimple_set_plf (stmt, GF_PLF_1, false);
1290 for (j = 0; j < group_size; ++j)
1292 gimple *stmt = stmts[j];
1293 if (!matches[j])
1295 /* Avoid swapping operands twice. */
1296 if (gimple_plf (stmt, GF_PLF_1))
1297 continue;
1298 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1299 gimple_assign_rhs2_ptr (stmt));
1300 gimple_set_plf (stmt, GF_PLF_1, true);
1303 /* Verify we swap all duplicates or none. */
1304 if (flag_checking)
1305 for (j = 0; j < group_size; ++j)
1307 gimple *stmt = stmts[j];
1308 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1311 /* If we have all children of child built up from scalars then
1312 just throw that away and build it up this node from scalars. */
1313 if (!SLP_TREE_CHILDREN (child).is_empty ()
1314 /* ??? Rejecting patterns this way doesn't work. We'd have
1315 to do extra work to cancel the pattern so the uses see the
1316 scalar version. */
1317 && !is_pattern_stmt_p
1318 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1320 unsigned int j;
1321 slp_tree grandchild;
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1324 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1325 break;
1326 if (!grandchild)
1328 /* Roll back. */
1329 this_loads.truncate (old_nloads);
1330 this_tree_size = old_tree_size;
1331 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1332 vect_free_slp_tree (grandchild);
1333 SLP_TREE_CHILDREN (child).truncate (0);
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info->def_stmts = vNULL;
1339 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1340 children.safe_push (child);
1341 continue;
1345 oprnd_info->def_stmts = vNULL;
1346 children.safe_push (child);
1347 continue;
1350 ++*npermutes;
1353 fail:
1354 gcc_assert (child == NULL);
1355 FOR_EACH_VEC_ELT (children, j, child)
1356 vect_free_slp_tree (child);
1357 vect_free_oprnd_info (oprnds_info);
1358 return NULL;
1361 vect_free_oprnd_info (oprnds_info);
1363 if (tree_size)
1364 *tree_size += this_tree_size;
1365 *max_nunits = this_max_nunits;
1366 loads->safe_splice (this_loads);
1368 node = vect_create_new_slp_node (stmts);
1369 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1370 SLP_TREE_CHILDREN (node).splice (children);
1371 return node;
1374 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1376 static void
1377 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1379 int i;
1380 gimple *stmt;
1381 slp_tree child;
1383 dump_printf_loc (dump_kind, loc, "node%s\n",
1384 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1385 ? " (external)" : "");
1386 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1388 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1389 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1391 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1392 vect_print_slp_tree (dump_kind, loc, child);
1396 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1397 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1398 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1399 stmts in NODE are to be marked. */
1401 static void
1402 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1404 int i;
1405 gimple *stmt;
1406 slp_tree child;
1408 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1409 return;
1411 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1412 if (j < 0 || i == j)
1413 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1415 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1416 vect_mark_slp_stmts (child, mark, j);
1420 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1422 static void
1423 vect_mark_slp_stmts_relevant (slp_tree node)
1425 int i;
1426 gimple *stmt;
1427 stmt_vec_info stmt_info;
1428 slp_tree child;
1430 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1431 return;
1433 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1435 stmt_info = vinfo_for_stmt (stmt);
1436 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1437 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1438 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1441 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1442 vect_mark_slp_stmts_relevant (child);
1446 /* Rearrange the statements of NODE according to PERMUTATION. */
1448 static void
1449 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1450 vec<unsigned> permutation)
1452 gimple *stmt;
1453 vec<gimple *> tmp_stmts;
1454 unsigned int i;
1455 slp_tree child;
1457 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1458 vect_slp_rearrange_stmts (child, group_size, permutation);
1460 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1461 tmp_stmts.create (group_size);
1462 tmp_stmts.quick_grow_cleared (group_size);
1464 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1465 tmp_stmts[permutation[i]] = stmt;
1467 SLP_TREE_SCALAR_STMTS (node).release ();
1468 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1472 /* Attempt to reorder stmts in a reduction chain so that we don't
1473 require any load permutation. Return true if that was possible,
1474 otherwise return false. */
1476 static bool
1477 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1479 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1480 unsigned int i, j;
1481 unsigned int lidx;
1482 slp_tree node, load;
1484 /* Compare all the permutation sequences to the first one. We know
1485 that at least one load is permuted. */
1486 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1487 if (!node->load_permutation.exists ())
1488 return false;
1489 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1491 if (!load->load_permutation.exists ())
1492 return false;
1493 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1494 if (lidx != node->load_permutation[j])
1495 return false;
1498 /* Check that the loads in the first sequence are different and there
1499 are no gaps between them. */
1500 auto_sbitmap load_index (group_size);
1501 bitmap_clear (load_index);
1502 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1504 if (lidx >= group_size)
1505 return false;
1506 if (bitmap_bit_p (load_index, lidx))
1507 return false;
1509 bitmap_set_bit (load_index, lidx);
1511 for (i = 0; i < group_size; i++)
1512 if (!bitmap_bit_p (load_index, i))
1513 return false;
1515 /* This permutation is valid for reduction. Since the order of the
1516 statements in the nodes is not important unless they are memory
1517 accesses, we can rearrange the statements in all the nodes
1518 according to the order of the loads. */
1519 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1520 node->load_permutation);
1522 /* We are done, no actual permutations need to be generated. */
1523 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1524 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1526 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1527 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1528 /* But we have to keep those permutations that are required because
1529 of handling of gaps. */
1530 if (known_eq (unrolling_factor, 1U)
1531 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1532 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1533 SLP_TREE_LOAD_PERMUTATION (node).release ();
1534 else
1535 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1536 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1539 return true;
1542 /* Check if the required load permutations in the SLP instance
1543 SLP_INSTN are supported. */
1545 static bool
1546 vect_supported_load_permutation_p (slp_instance slp_instn)
1548 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1549 unsigned int i, j, k, next;
1550 slp_tree node;
1551 gimple *stmt, *load, *next_load;
1553 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1557 if (node->load_permutation.exists ())
1558 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1559 dump_printf (MSG_NOTE, "%d ", next);
1560 else
1561 for (k = 0; k < group_size; ++k)
1562 dump_printf (MSG_NOTE, "%d ", k);
1563 dump_printf (MSG_NOTE, "\n");
1566 /* In case of reduction every load permutation is allowed, since the order
1567 of the reduction statements is not important (as opposed to the case of
1568 grouped stores). The only condition we need to check is that all the
1569 load nodes are of the same size and have the same permutation (and then
1570 rearrange all the nodes of the SLP instance according to this
1571 permutation). */
1573 /* Check that all the load nodes are of the same size. */
1574 /* ??? Can't we assert this? */
1575 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1576 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1577 return false;
1579 node = SLP_INSTANCE_TREE (slp_instn);
1580 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1582 /* Reduction (there are no data-refs in the root).
1583 In reduction chain the order of the loads is not important. */
1584 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1585 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1586 vect_attempt_slp_rearrange_stmts (slp_instn);
1588 /* In basic block vectorization we allow any subchain of an interleaving
1589 chain.
1590 FORNOW: not supported in loop SLP because of realignment compications. */
1591 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1593 /* Check whether the loads in an instance form a subchain and thus
1594 no permutation is necessary. */
1595 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1597 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1598 continue;
1599 bool subchain_p = true;
1600 next_load = NULL;
1601 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1603 if (j != 0
1604 && (next_load != load
1605 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1607 subchain_p = false;
1608 break;
1610 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1612 if (subchain_p)
1613 SLP_TREE_LOAD_PERMUTATION (node).release ();
1614 else
1616 stmt_vec_info group_info
1617 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1618 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1619 unsigned nunits
1620 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1621 unsigned k, maxk = 0;
1622 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1623 if (k > maxk)
1624 maxk = k;
1625 /* In BB vectorization we may not actually use a loaded vector
1626 accessing elements in excess of GROUP_SIZE. */
1627 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630 "BB vectorization with gaps at the end of "
1631 "a load is not supported\n");
1632 return false;
1635 /* Verify the permutation can be generated. */
1636 vec<tree> tem;
1637 unsigned n_perms;
1638 if (!vect_transform_slp_perm_load (node, tem, NULL,
1639 1, slp_instn, true, &n_perms))
1641 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1642 vect_location,
1643 "unsupported load permutation\n");
1644 return false;
1648 return true;
1651 /* For loop vectorization verify we can generate the permutation. Be
1652 conservative about the vectorization factor, there are permutations
1653 that will use three vector inputs only starting from a specific factor
1654 and the vectorization factor is not yet final.
1655 ??? The SLP instance unrolling factor might not be the maximum one. */
1656 unsigned n_perms;
1657 poly_uint64 test_vf
1658 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1659 LOOP_VINFO_VECT_FACTOR
1660 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1661 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1662 if (node->load_permutation.exists ()
1663 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1664 slp_instn, true, &n_perms))
1665 return false;
1667 return true;
1671 /* Find the last store in SLP INSTANCE. */
1673 gimple *
1674 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1676 gimple *last = NULL, *stmt;
1678 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1680 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1681 if (is_pattern_stmt_p (stmt_vinfo))
1682 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1683 else
1684 last = get_later_stmt (stmt, last);
1687 return last;
1690 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1692 static void
1693 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1694 stmt_vector_for_cost *prologue_cost_vec,
1695 stmt_vector_for_cost *body_cost_vec,
1696 unsigned ncopies_for_cost,
1697 scalar_stmts_set_t* visited)
1699 unsigned i, j;
1700 slp_tree child;
1701 gimple *stmt;
1702 stmt_vec_info stmt_info;
1703 tree lhs;
1705 /* If we already costed the exact same set of scalar stmts we're done.
1706 We share the generated vector stmts for those. */
1707 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1708 return;
1710 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1712 /* Recurse down the SLP tree. */
1713 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1714 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1715 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1716 body_cost_vec, ncopies_for_cost, visited);
1718 /* Look at the first scalar stmt to determine the cost. */
1719 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1720 stmt_info = vinfo_for_stmt (stmt);
1721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1723 vect_memory_access_type memory_access_type
1724 = (STMT_VINFO_STRIDED_P (stmt_info)
1725 ? VMAT_STRIDED_SLP
1726 : VMAT_CONTIGUOUS);
1727 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1728 vect_model_store_cost (stmt_info, ncopies_for_cost,
1729 memory_access_type, vect_uninitialized_def,
1730 node, prologue_cost_vec, body_cost_vec);
1731 else
1733 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1734 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1736 /* If the load is permuted then the alignment is determined by
1737 the first group element not by the first scalar stmt DR. */
1738 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1739 stmt_info = vinfo_for_stmt (stmt);
1740 /* Record the cost for the permutation. */
1741 unsigned n_perms;
1742 vect_transform_slp_perm_load (node, vNULL, NULL,
1743 ncopies_for_cost, instance, true,
1744 &n_perms);
1745 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1746 stmt_info, 0, vect_body);
1747 unsigned assumed_nunits
1748 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1749 /* And adjust the number of loads performed. This handles
1750 redundancies as well as loads that are later dead. */
1751 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1752 bitmap_clear (perm);
1753 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1754 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1755 ncopies_for_cost = 0;
1756 bool load_seen = false;
1757 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1759 if (i % assumed_nunits == 0)
1761 if (load_seen)
1762 ncopies_for_cost++;
1763 load_seen = false;
1765 if (bitmap_bit_p (perm, i))
1766 load_seen = true;
1768 if (load_seen)
1769 ncopies_for_cost++;
1770 gcc_assert (ncopies_for_cost
1771 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1772 + assumed_nunits - 1) / assumed_nunits);
1773 poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1774 ncopies_for_cost *= estimated_poly_value (uf);
1776 /* Record the cost for the vector loads. */
1777 vect_model_load_cost (stmt_info, ncopies_for_cost,
1778 memory_access_type, node, prologue_cost_vec,
1779 body_cost_vec);
1780 return;
1783 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1785 /* ncopies_for_cost is the number of IVs we generate. */
1786 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1787 stmt_info, 0, vect_body);
1789 /* Prologue cost for the initial values and step vector. */
1790 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1791 CONSTANT_CLASS_P
1792 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1793 (stmt_info))
1794 ? vector_load : vec_construct,
1795 stmt_info, 0, vect_prologue);
1796 record_stmt_cost (prologue_cost_vec, 1,
1797 CONSTANT_CLASS_P
1798 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1799 ? vector_load : vec_construct,
1800 stmt_info, 0, vect_prologue);
1802 /* ??? No easy way to get at the actual number of vector stmts
1803 to be geneated and thus the derived IVs. */
1805 else
1807 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1808 stmt_info, 0, vect_body);
1809 if (SLP_TREE_TWO_OPERATORS (node))
1811 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1812 stmt_info, 0, vect_body);
1813 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1814 stmt_info, 0, vect_body);
1818 /* Push SLP node def-type to stmts. */
1819 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1820 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1821 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1822 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1824 /* Scan operands and account for prologue cost of constants/externals.
1825 ??? This over-estimates cost for multiple uses and should be
1826 re-engineered. */
1827 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1828 lhs = gimple_get_lhs (stmt);
1829 for (i = 0; i < gimple_num_ops (stmt); ++i)
1831 tree op = gimple_op (stmt, i);
1832 gimple *def_stmt;
1833 enum vect_def_type dt;
1834 if (!op || op == lhs)
1835 continue;
1836 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1838 /* Without looking at the actual initializer a vector of
1839 constants can be implemented as load from the constant pool.
1840 ??? We need to pass down stmt_info for a vector type
1841 even if it points to the wrong stmt. */
1842 if (dt == vect_constant_def)
1843 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1844 stmt_info, 0, vect_prologue);
1845 else if (dt == vect_external_def)
1846 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1847 stmt_info, 0, vect_prologue);
1851 /* Restore stmt def-types. */
1852 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1853 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1854 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1855 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1858 /* Compute the cost for the SLP instance INSTANCE. */
1860 static void
1861 vect_analyze_slp_cost (slp_instance instance, void *data)
1863 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1864 unsigned ncopies_for_cost;
1865 stmt_info_for_cost *si;
1866 unsigned i;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE, vect_location,
1870 "=== vect_analyze_slp_cost ===\n");
1872 /* Calculate the number of vector stmts to create based on the unrolling
1873 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1874 GROUP_SIZE / NUNITS otherwise. */
1875 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1876 slp_tree node = SLP_INSTANCE_TREE (instance);
1877 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1878 /* Get the estimated vectorization factor, which is always one for
1879 basic-block vectorization. */
1880 unsigned int assumed_vf;
1881 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1882 assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
1883 else
1884 assumed_vf = 1;
1885 /* For reductions look at a reduction operand in case the reduction
1886 operation is widening like DOT_PROD or SAD. */
1887 tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
1888 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1890 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1891 switch (gimple_assign_rhs_code (stmt))
1893 case DOT_PROD_EXPR:
1894 case SAD_EXPR:
1895 vectype_for_cost = get_vectype_for_scalar_type
1896 (TREE_TYPE (gimple_assign_rhs1 (stmt)));
1897 break;
1898 default:;
1901 unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
1902 ncopies_for_cost = (least_common_multiple (assumed_nunits,
1903 group_size * assumed_vf)
1904 / assumed_nunits);
1906 prologue_cost_vec.create (10);
1907 body_cost_vec.create (10);
1908 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1909 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1910 &prologue_cost_vec, &body_cost_vec,
1911 ncopies_for_cost, visited);
1912 delete visited;
1914 /* Record the prologue costs, which were delayed until we were
1915 sure that SLP was successful. */
1916 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1918 struct _stmt_vec_info *stmt_info
1919 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1920 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1921 si->misalign, vect_prologue);
1924 /* Record the instance's instructions in the target cost model. */
1925 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1927 struct _stmt_vec_info *stmt_info
1928 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1929 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1930 si->misalign, vect_body);
1933 prologue_cost_vec.release ();
1934 body_cost_vec.release ();
1937 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1938 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1939 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1940 containing the remainder.
1941 Return the first stmt in the second group. */
1943 static gimple *
1944 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1946 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1947 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1948 gcc_assert (group1_size > 0);
1949 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1950 gcc_assert (group2_size > 0);
1951 GROUP_SIZE (first_vinfo) = group1_size;
1953 gimple *stmt = first_stmt;
1954 for (unsigned i = group1_size; i > 1; i--)
1956 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1957 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1959 /* STMT is now the last element of the first group. */
1960 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1961 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1963 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1964 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1966 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1967 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1970 /* For the second group, the GROUP_GAP is that before the original group,
1971 plus skipping over the first vector. */
1972 GROUP_GAP (vinfo_for_stmt (group2)) =
1973 GROUP_GAP (first_vinfo) + group1_size;
1975 /* GROUP_GAP of the first group now has to skip over the second group too. */
1976 GROUP_GAP (first_vinfo) += group2_size;
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1980 group1_size, group2_size);
1982 return group2;
1985 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1986 statements and a vector of NUNITS elements. */
1988 static poly_uint64
1989 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1991 return exact_div (common_multiple (nunits, group_size), group_size);
1994 /* Analyze an SLP instance starting from a group of grouped stores. Call
1995 vect_build_slp_tree to build a tree of packed stmts if possible.
1996 Return FALSE if it's impossible to SLP any stmt in the loop. */
1998 static bool
1999 vect_analyze_slp_instance (vec_info *vinfo,
2000 gimple *stmt, unsigned max_tree_size)
2002 slp_instance new_instance;
2003 slp_tree node;
2004 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2005 tree vectype, scalar_type = NULL_TREE;
2006 gimple *next;
2007 unsigned int i;
2008 vec<slp_tree> loads;
2009 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2010 vec<gimple *> scalar_stmts;
2012 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2014 if (dr)
2016 scalar_type = TREE_TYPE (DR_REF (dr));
2017 vectype = get_vectype_for_scalar_type (scalar_type);
2019 else
2021 gcc_assert (is_a <loop_vec_info> (vinfo));
2022 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2025 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2027 else
2029 gcc_assert (is_a <loop_vec_info> (vinfo));
2030 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2031 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2034 if (!vectype)
2036 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2039 "Build SLP failed: unsupported data-type ");
2040 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2041 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2044 return false;
2046 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2048 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2049 scalar_stmts.create (group_size);
2050 next = stmt;
2051 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2053 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2054 while (next)
2056 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2057 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2058 scalar_stmts.safe_push (
2059 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2060 else
2061 scalar_stmts.safe_push (next);
2062 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2064 /* Mark the first element of the reduction chain as reduction to properly
2065 transform the node. In the reduction analysis phase only the last
2066 element of the chain is marked as reduction. */
2067 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2068 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2070 else
2072 /* Collect reduction statements. */
2073 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2074 for (i = 0; reductions.iterate (i, &next); i++)
2075 scalar_stmts.safe_push (next);
2078 loads.create (group_size);
2080 /* Build the tree for the SLP instance. */
2081 bool *matches = XALLOCAVEC (bool, group_size);
2082 unsigned npermutes = 0;
2083 bst_fail = new scalar_stmts_set_t ();
2084 poly_uint64 max_nunits = nunits;
2085 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2086 &max_nunits, &loads, matches, &npermutes,
2087 NULL, max_tree_size);
2088 delete bst_fail;
2089 if (node != NULL)
2091 /* Calculate the unrolling factor based on the smallest type. */
2092 poly_uint64 unrolling_factor
2093 = calculate_unrolling_factor (max_nunits, group_size);
2095 if (maybe_ne (unrolling_factor, 1U)
2096 && is_a <bb_vec_info> (vinfo))
2098 unsigned HOST_WIDE_INT const_max_nunits;
2099 if (!max_nunits.is_constant (&const_max_nunits)
2100 || const_max_nunits > group_size)
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104 "Build SLP failed: store group "
2105 "size not a multiple of the vector size "
2106 "in basic block SLP\n");
2107 vect_free_slp_tree (node);
2108 loads.release ();
2109 return false;
2111 /* Fatal mismatch. */
2112 matches[group_size / const_max_nunits * const_max_nunits] = false;
2113 vect_free_slp_tree (node);
2114 loads.release ();
2116 else
2118 /* Create a new SLP instance. */
2119 new_instance = XNEW (struct _slp_instance);
2120 SLP_INSTANCE_TREE (new_instance) = node;
2121 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2122 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2123 SLP_INSTANCE_LOADS (new_instance) = loads;
2125 /* Compute the load permutation. */
2126 slp_tree load_node;
2127 bool loads_permuted = false;
2128 FOR_EACH_VEC_ELT (loads, i, load_node)
2130 vec<unsigned> load_permutation;
2131 int j;
2132 gimple *load, *first_stmt;
2133 bool this_load_permuted = false;
2134 load_permutation.create (group_size);
2135 first_stmt = GROUP_FIRST_ELEMENT
2136 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2137 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2139 int load_place = vect_get_place_in_interleaving_chain
2140 (load, first_stmt);
2141 gcc_assert (load_place != -1);
2142 if (load_place != j)
2143 this_load_permuted = true;
2144 load_permutation.safe_push (load_place);
2146 if (!this_load_permuted
2147 /* The load requires permutation when unrolling exposes
2148 a gap either because the group is larger than the SLP
2149 group-size or because there is a gap between the groups. */
2150 && (known_eq (unrolling_factor, 1U)
2151 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2152 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2154 load_permutation.release ();
2155 continue;
2157 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2158 loads_permuted = true;
2161 if (loads_permuted)
2163 if (!vect_supported_load_permutation_p (new_instance))
2165 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "Build SLP failed: unsupported load "
2169 "permutation ");
2170 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2171 TDF_SLIM, stmt, 0);
2173 vect_free_slp_instance (new_instance);
2174 return false;
2178 /* If the loads and stores can be handled with load/store-lan
2179 instructions do not generate this SLP instance. */
2180 if (is_a <loop_vec_info> (vinfo)
2181 && loads_permuted
2182 && dr && vect_store_lanes_supported (vectype, group_size))
2184 slp_tree load_node;
2185 FOR_EACH_VEC_ELT (loads, i, load_node)
2187 gimple *first_stmt = GROUP_FIRST_ELEMENT
2188 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2189 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2190 /* Use SLP for strided accesses (or if we
2191 can't load-lanes). */
2192 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2193 || ! vect_load_lanes_supported
2194 (STMT_VINFO_VECTYPE (stmt_vinfo),
2195 GROUP_SIZE (stmt_vinfo)))
2196 break;
2198 if (i == loads.length ())
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "Built SLP cancelled: can use "
2203 "load/store-lanes\n");
2204 vect_free_slp_instance (new_instance);
2205 return false;
2209 vinfo->slp_instances.safe_push (new_instance);
2211 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE, vect_location,
2214 "Final SLP tree for instance:\n");
2215 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2218 return true;
2221 else
2223 /* Failed to SLP. */
2224 /* Free the allocated memory. */
2225 scalar_stmts.release ();
2226 loads.release ();
2229 /* For basic block SLP, try to break the group up into multiples of the
2230 vector size. */
2231 unsigned HOST_WIDE_INT const_nunits;
2232 if (is_a <bb_vec_info> (vinfo)
2233 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2234 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2235 && nunits.is_constant (&const_nunits))
2237 /* We consider breaking the group only on VF boundaries from the existing
2238 start. */
2239 for (i = 0; i < group_size; i++)
2240 if (!matches[i]) break;
2242 if (i >= const_nunits && i < group_size)
2244 /* Split into two groups at the first vector boundary before i. */
2245 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2246 unsigned group1_size = i & ~(const_nunits - 1);
2248 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2249 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2250 /* If the first non-match was in the middle of a vector,
2251 skip the rest of that vector. */
2252 if (group1_size < i)
2254 i = group1_size + const_nunits;
2255 if (i < group_size)
2256 rest = vect_split_slp_store_group (rest, const_nunits);
2258 if (i < group_size)
2259 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2260 return res;
2262 /* Even though the first vector did not all match, we might be able to SLP
2263 (some) of the remainder. FORNOW ignore this possibility. */
2266 return false;
2270 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2271 trees of packed scalar stmts if SLP is possible. */
2273 bool
2274 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2276 unsigned int i;
2277 gimple *first_element;
2279 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2282 /* Find SLP sequences starting from groups of grouped stores. */
2283 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2284 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2286 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2288 if (loop_vinfo->reduction_chains.length () > 0)
2290 /* Find SLP sequences starting from reduction chains. */
2291 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2292 if (! vect_analyze_slp_instance (vinfo, first_element,
2293 max_tree_size))
2295 /* Dissolve reduction chain group. */
2296 gimple *next, *stmt = first_element;
2297 while (stmt)
2299 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2300 next = GROUP_NEXT_ELEMENT (vinfo);
2301 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2302 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2303 stmt = next;
2305 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2306 = vect_internal_def;
2310 /* Find SLP sequences starting from groups of reductions. */
2311 if (loop_vinfo->reductions.length () > 1)
2312 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2313 max_tree_size);
2316 return true;
2320 /* For each possible SLP instance decide whether to SLP it and calculate overall
2321 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2322 least one instance. */
2324 bool
2325 vect_make_slp_decision (loop_vec_info loop_vinfo)
2327 unsigned int i;
2328 poly_uint64 unrolling_factor = 1;
2329 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2330 slp_instance instance;
2331 int decided_to_slp = 0;
2333 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2335 "\n");
2337 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2339 /* FORNOW: SLP if you can. */
2340 /* All unroll factors have the form current_vector_size * X for some
2341 rational X, so they must have a common multiple. */
2342 unrolling_factor
2343 = force_common_multiple (unrolling_factor,
2344 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2346 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2347 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2348 loop-based vectorization. Such stmts will be marked as HYBRID. */
2349 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2350 decided_to_slp++;
2353 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2355 if (decided_to_slp && dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE, vect_location,
2358 "Decided to SLP %d instances. Unrolling factor ",
2359 decided_to_slp);
2360 dump_dec (MSG_NOTE, unrolling_factor);
2361 dump_printf (MSG_NOTE, "\n");
2364 return (decided_to_slp > 0);
2368 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2369 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2371 static void
2372 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2374 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2375 imm_use_iterator imm_iter;
2376 gimple *use_stmt;
2377 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2378 slp_tree child;
2379 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2380 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2381 int j;
2383 /* Propagate hybrid down the SLP tree. */
2384 if (stype == hybrid)
2386 else if (HYBRID_SLP_STMT (stmt_vinfo))
2387 stype = hybrid;
2388 else
2390 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2391 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2392 /* If we get a pattern stmt here we have to use the LHS of the
2393 original stmt for immediate uses. */
2394 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2395 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2396 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2397 tree def;
2398 if (gimple_code (stmt) == GIMPLE_PHI)
2399 def = gimple_phi_result (stmt);
2400 else
2401 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2402 if (def)
2403 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2405 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2406 continue;
2407 use_vinfo = vinfo_for_stmt (use_stmt);
2408 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2409 && STMT_VINFO_RELATED_STMT (use_vinfo))
2410 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2411 if (!STMT_SLP_TYPE (use_vinfo)
2412 && (STMT_VINFO_RELEVANT (use_vinfo)
2413 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2414 && !(gimple_code (use_stmt) == GIMPLE_PHI
2415 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2417 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2420 "def in non-SLP stmt: ");
2421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2423 stype = hybrid;
2428 if (stype == hybrid
2429 && !HYBRID_SLP_STMT (stmt_vinfo))
2431 if (dump_enabled_p ())
2433 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2434 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2436 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2439 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2440 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2441 vect_detect_hybrid_slp_stmts (child, i, stype);
2444 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2446 static tree
2447 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2449 walk_stmt_info *wi = (walk_stmt_info *)data;
2450 struct loop *loopp = (struct loop *)wi->info;
2452 if (wi->is_lhs)
2453 return NULL_TREE;
2455 if (TREE_CODE (*tp) == SSA_NAME
2456 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2458 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2459 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2460 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2462 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2465 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2467 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2471 return NULL_TREE;
2474 static tree
2475 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2476 walk_stmt_info *)
2478 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2479 /* If the stmt is in a SLP instance then this isn't a reason
2480 to mark use definitions in other SLP instances as hybrid. */
2481 if (! STMT_SLP_TYPE (use_vinfo)
2482 && (STMT_VINFO_RELEVANT (use_vinfo)
2483 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2484 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2485 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2487 else
2488 *handled = true;
2489 return NULL_TREE;
2492 /* Find stmts that must be both vectorized and SLPed. */
2494 void
2495 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2497 unsigned int i;
2498 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2499 slp_instance instance;
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2503 "\n");
2505 /* First walk all pattern stmt in the loop and mark defs of uses as
2506 hybrid because immediate uses in them are not recorded. */
2507 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2509 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2510 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2511 gsi_next (&gsi))
2513 gimple *stmt = gsi_stmt (gsi);
2514 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2515 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2517 walk_stmt_info wi;
2518 memset (&wi, 0, sizeof (wi));
2519 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2520 gimple_stmt_iterator gsi2
2521 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2522 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2523 vect_detect_hybrid_slp_1, &wi);
2524 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2525 vect_detect_hybrid_slp_2,
2526 vect_detect_hybrid_slp_1, &wi);
2531 /* Then walk the SLP instance trees marking stmts with uses in
2532 non-SLP stmts as hybrid, also propagating hybrid down the
2533 SLP tree, collecting the above info on-the-fly. */
2534 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2536 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2537 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2538 i, pure_slp);
2543 /* Initialize a bb_vec_info struct for the statements between
2544 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2546 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2547 gimple_stmt_iterator region_end_in)
2548 : vec_info (vec_info::bb, init_cost (NULL)),
2549 bb (gsi_bb (region_begin_in)),
2550 region_begin (region_begin_in),
2551 region_end (region_end_in)
2553 gimple_stmt_iterator gsi;
2555 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2556 gsi_next (&gsi))
2558 gimple *stmt = gsi_stmt (gsi);
2559 gimple_set_uid (stmt, 0);
2560 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2563 bb->aux = this;
2567 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2568 stmts in the basic block. */
2570 _bb_vec_info::~_bb_vec_info ()
2572 for (gimple_stmt_iterator si = region_begin;
2573 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2575 gimple *stmt = gsi_stmt (si);
2576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2578 if (stmt_info)
2579 /* Free stmt_vec_info. */
2580 free_stmt_vec_info (stmt);
2582 /* Reset region marker. */
2583 gimple_set_uid (stmt, -1);
2586 bb->aux = NULL;
2590 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2591 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2593 Return true if the operations are supported. */
2595 static bool
2596 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2597 slp_instance node_instance)
2599 bool dummy;
2600 int i, j;
2601 gimple *stmt;
2602 slp_tree child;
2604 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2605 return true;
2607 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2608 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2609 return false;
2611 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2613 gcc_assert (stmt_info);
2614 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2616 /* For BB vectorization vector types are assigned here.
2617 Memory accesses already got their vector type assigned
2618 in vect_analyze_data_refs. */
2619 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2620 if (bb_vinfo
2621 && ! STMT_VINFO_DATA_REF (stmt_info))
2623 gcc_assert (PURE_SLP_STMT (stmt_info));
2625 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2626 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_NOTE, vect_location,
2629 "get vectype for scalar type: ");
2630 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2631 dump_printf (MSG_NOTE, "\n");
2634 tree vectype = get_vectype_for_scalar_type (scalar_type);
2635 if (!vectype)
2637 if (dump_enabled_p ())
2639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640 "not SLPed: unsupported data-type ");
2641 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2642 scalar_type);
2643 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2645 return false;
2648 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2651 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2652 dump_printf (MSG_NOTE, "\n");
2655 gimple *sstmt;
2656 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2657 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2660 /* Calculate the number of vector statements to be created for the
2661 scalar stmts in this node. For SLP reductions it is equal to the
2662 number of vector statements in the children (which has already been
2663 calculated by the recursive call). Otherwise it is the number of
2664 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2665 VF divided by the number of elements in a vector. */
2666 if (GROUP_FIRST_ELEMENT (stmt_info)
2667 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2668 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2669 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2670 else
2672 poly_uint64 vf;
2673 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2674 vf = loop_vinfo->vectorization_factor;
2675 else
2676 vf = 1;
2677 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2678 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2679 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2680 = vect_get_num_vectors (vf * group_size, vectype);
2683 /* Push SLP node def-type to stmt operands. */
2684 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2685 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2686 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2687 = SLP_TREE_DEF_TYPE (child);
2688 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2689 /* Restore def-types. */
2690 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2691 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2692 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2693 = vect_internal_def;
2694 if (! res)
2695 return false;
2697 return true;
2701 /* Analyze statements in SLP instances of VINFO. Return true if the
2702 operations are supported. */
2704 bool
2705 vect_slp_analyze_operations (vec_info *vinfo)
2707 slp_instance instance;
2708 int i;
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_NOTE, vect_location,
2712 "=== vect_slp_analyze_operations ===\n");
2714 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2716 if (!vect_slp_analyze_node_operations (vinfo,
2717 SLP_INSTANCE_TREE (instance),
2718 instance))
2720 dump_printf_loc (MSG_NOTE, vect_location,
2721 "removing SLP instance operations starting from: ");
2722 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2723 SLP_TREE_SCALAR_STMTS
2724 (SLP_INSTANCE_TREE (instance))[0], 0);
2725 vect_free_slp_instance (instance);
2726 vinfo->slp_instances.ordered_remove (i);
2728 else
2730 /* Compute the costs of the SLP instance. */
2731 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2732 i++;
2736 return !vinfo->slp_instances.is_empty ();
2740 /* Compute the scalar cost of the SLP node NODE and its children
2741 and return it. Do not account defs that are marked in LIFE and
2742 update LIFE according to uses of NODE. */
2744 static unsigned
2745 vect_bb_slp_scalar_cost (basic_block bb,
2746 slp_tree node, vec<bool, va_heap> *life)
2748 unsigned scalar_cost = 0;
2749 unsigned i;
2750 gimple *stmt;
2751 slp_tree child;
2753 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2755 unsigned stmt_cost;
2756 ssa_op_iter op_iter;
2757 def_operand_p def_p;
2758 stmt_vec_info stmt_info;
2760 if ((*life)[i])
2761 continue;
2763 /* If there is a non-vectorized use of the defs then the scalar
2764 stmt is kept live in which case we do not account it or any
2765 required defs in the SLP children in the scalar cost. This
2766 way we make the vectorization more costly when compared to
2767 the scalar cost. */
2768 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2770 imm_use_iterator use_iter;
2771 gimple *use_stmt;
2772 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2773 if (!is_gimple_debug (use_stmt)
2774 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2775 use_stmt)
2776 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2778 (*life)[i] = true;
2779 BREAK_FROM_IMM_USE_STMT (use_iter);
2782 if ((*life)[i])
2783 continue;
2785 /* Count scalar stmts only once. */
2786 if (gimple_visited_p (stmt))
2787 continue;
2788 gimple_set_visited (stmt, true);
2790 stmt_info = vinfo_for_stmt (stmt);
2791 if (STMT_VINFO_DATA_REF (stmt_info))
2793 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2794 stmt_cost = vect_get_stmt_cost (scalar_load);
2795 else
2796 stmt_cost = vect_get_stmt_cost (scalar_store);
2798 else
2799 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2801 scalar_cost += stmt_cost;
2804 auto_vec<bool, 20> subtree_life;
2805 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2807 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2809 /* Do not directly pass LIFE to the recursive call, copy it to
2810 confine changes in the callee to the current child/subtree. */
2811 subtree_life.safe_splice (*life);
2812 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2813 subtree_life.truncate (0);
2817 return scalar_cost;
2820 /* Check if vectorization of the basic block is profitable. */
2822 static bool
2823 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2825 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2826 slp_instance instance;
2827 int i;
2828 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2829 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2831 /* Calculate scalar cost. */
2832 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2834 auto_vec<bool, 20> life;
2835 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2836 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2837 SLP_INSTANCE_TREE (instance),
2838 &life);
2841 /* Unset visited flag. */
2842 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2843 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2844 gimple_set_visited (gsi_stmt (gsi), false);
2846 /* Complete the target-specific cost calculation. */
2847 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2848 &vec_inside_cost, &vec_epilogue_cost);
2850 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2852 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2855 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2856 vec_inside_cost);
2857 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2858 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2859 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2862 /* Vectorization is profitable if its cost is more than the cost of scalar
2863 version. Note that we err on the vector side for equal cost because
2864 the cost estimate is otherwise quite pessimistic (constant uses are
2865 free on the scalar side but cost a load on the vector side for
2866 example). */
2867 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2868 return false;
2870 return true;
2873 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2874 if so and sets fatal to true if failure is independent of
2875 current_vector_size. */
2877 static bb_vec_info
2878 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2879 gimple_stmt_iterator region_end,
2880 vec<data_reference_p> datarefs, int n_stmts,
2881 bool &fatal)
2883 bb_vec_info bb_vinfo;
2884 slp_instance instance;
2885 int i;
2886 poly_uint64 min_vf = 2;
2888 /* The first group of checks is independent of the vector size. */
2889 fatal = true;
2891 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2893 if (dump_enabled_p ())
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2895 "not vectorized: too many instructions in "
2896 "basic block.\n");
2897 free_data_refs (datarefs);
2898 return NULL;
2901 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2902 if (!bb_vinfo)
2903 return NULL;
2905 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2907 /* Analyze the data references. */
2909 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2911 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2913 "not vectorized: unhandled data-ref in basic "
2914 "block.\n");
2916 delete bb_vinfo;
2917 return NULL;
2920 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2922 if (dump_enabled_p ())
2923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2924 "not vectorized: not enough data-refs in "
2925 "basic block.\n");
2927 delete bb_vinfo;
2928 return NULL;
2931 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2935 "not vectorized: unhandled data access in "
2936 "basic block.\n");
2938 delete bb_vinfo;
2939 return NULL;
2942 /* If there are no grouped stores in the region there is no need
2943 to continue with pattern recog as vect_analyze_slp will fail
2944 anyway. */
2945 if (bb_vinfo->grouped_stores.is_empty ())
2947 if (dump_enabled_p ())
2948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2949 "not vectorized: no grouped stores in "
2950 "basic block.\n");
2952 delete bb_vinfo;
2953 return NULL;
2956 /* While the rest of the analysis below depends on it in some way. */
2957 fatal = false;
2959 vect_pattern_recog (bb_vinfo);
2961 /* Check the SLP opportunities in the basic block, analyze and build SLP
2962 trees. */
2963 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2965 if (dump_enabled_p ())
2967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2968 "Failed to SLP the basic block.\n");
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "not vectorized: failed to find SLP opportunities "
2971 "in basic block.\n");
2974 delete bb_vinfo;
2975 return NULL;
2978 vect_record_base_alignments (bb_vinfo);
2980 /* Analyze and verify the alignment of data references and the
2981 dependence in the SLP instances. */
2982 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2984 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2985 || ! vect_slp_analyze_instance_dependence (instance))
2987 dump_printf_loc (MSG_NOTE, vect_location,
2988 "removing SLP instance operations starting from: ");
2989 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2990 SLP_TREE_SCALAR_STMTS
2991 (SLP_INSTANCE_TREE (instance))[0], 0);
2992 vect_free_slp_instance (instance);
2993 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2994 continue;
2997 /* Mark all the statements that we want to vectorize as pure SLP and
2998 relevant. */
2999 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3000 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3002 i++;
3004 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3006 delete bb_vinfo;
3007 return NULL;
3010 if (!vect_slp_analyze_operations (bb_vinfo))
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014 "not vectorized: bad operation in basic block.\n");
3016 delete bb_vinfo;
3017 return NULL;
3020 /* Cost model: check if the vectorization is worthwhile. */
3021 if (!unlimited_cost_model (NULL)
3022 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3024 if (dump_enabled_p ())
3025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3026 "not vectorized: vectorization is not "
3027 "profitable.\n");
3029 delete bb_vinfo;
3030 return NULL;
3033 if (dump_enabled_p ())
3034 dump_printf_loc (MSG_NOTE, vect_location,
3035 "Basic block will be vectorized using SLP\n");
3037 return bb_vinfo;
3041 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3042 true if anything in the basic-block was vectorized. */
3044 bool
3045 vect_slp_bb (basic_block bb)
3047 bb_vec_info bb_vinfo;
3048 gimple_stmt_iterator gsi;
3049 bool any_vectorized = false;
3050 auto_vector_sizes vector_sizes;
3052 if (dump_enabled_p ())
3053 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3055 /* Autodetect first vector size we try. */
3056 current_vector_size = 0;
3057 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3058 unsigned int next_size = 0;
3060 gsi = gsi_start_bb (bb);
3062 poly_uint64 autodetected_vector_size = 0;
3063 while (1)
3065 if (gsi_end_p (gsi))
3066 break;
3068 gimple_stmt_iterator region_begin = gsi;
3069 vec<data_reference_p> datarefs = vNULL;
3070 int insns = 0;
3072 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3074 gimple *stmt = gsi_stmt (gsi);
3075 if (is_gimple_debug (stmt))
3076 continue;
3077 insns++;
3079 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3080 vect_location = gimple_location (stmt);
3082 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3083 break;
3086 /* Skip leading unhandled stmts. */
3087 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3089 gsi_next (&gsi);
3090 continue;
3093 gimple_stmt_iterator region_end = gsi;
3095 bool vectorized = false;
3096 bool fatal = false;
3097 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3098 datarefs, insns, fatal);
3099 if (bb_vinfo
3100 && dbg_cnt (vect_slp))
3102 if (dump_enabled_p ())
3103 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3105 vect_schedule_slp (bb_vinfo);
3107 if (dump_enabled_p ())
3108 dump_printf_loc (MSG_NOTE, vect_location,
3109 "basic block part vectorized\n");
3111 vectorized = true;
3113 delete bb_vinfo;
3115 any_vectorized |= vectorized;
3117 if (next_size == 0)
3118 autodetected_vector_size = current_vector_size;
3120 if (next_size < vector_sizes.length ()
3121 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3122 next_size += 1;
3124 if (vectorized
3125 || next_size == vector_sizes.length ()
3126 || known_eq (current_vector_size, 0U)
3127 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3128 vector sizes will fail do not bother iterating. */
3129 || fatal)
3131 if (gsi_end_p (region_end))
3132 break;
3134 /* Skip the unhandled stmt. */
3135 gsi_next (&gsi);
3137 /* And reset vector sizes. */
3138 current_vector_size = 0;
3139 next_size = 0;
3141 else
3143 /* Try the next biggest vector size. */
3144 current_vector_size = vector_sizes[next_size++];
3145 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_NOTE, vect_location,
3148 "***** Re-trying analysis with "
3149 "vector size ");
3150 dump_dec (MSG_NOTE, current_vector_size);
3151 dump_printf (MSG_NOTE, "\n");
3154 /* Start over. */
3155 gsi = region_begin;
3159 return any_vectorized;
3163 /* Return 1 if vector type of boolean constant which is OPNUM
3164 operand in statement STMT is a boolean vector. */
3166 static bool
3167 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3169 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3170 enum tree_code code = gimple_expr_code (stmt);
3171 tree op, vectype;
3172 gimple *def_stmt;
3173 enum vect_def_type dt;
3175 /* For comparison and COND_EXPR type is chosen depending
3176 on the other comparison operand. */
3177 if (TREE_CODE_CLASS (code) == tcc_comparison)
3179 if (opnum)
3180 op = gimple_assign_rhs1 (stmt);
3181 else
3182 op = gimple_assign_rhs2 (stmt);
3184 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3185 &dt, &vectype))
3186 gcc_unreachable ();
3188 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3191 if (code == COND_EXPR)
3193 tree cond = gimple_assign_rhs1 (stmt);
3195 if (TREE_CODE (cond) == SSA_NAME)
3196 op = cond;
3197 else if (opnum)
3198 op = TREE_OPERAND (cond, 1);
3199 else
3200 op = TREE_OPERAND (cond, 0);
3202 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3203 &dt, &vectype))
3204 gcc_unreachable ();
3206 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3209 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3213 /* For constant and loop invariant defs of SLP_NODE this function returns
3214 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3215 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3216 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3217 REDUC_INDEX is the index of the reduction operand in the statements, unless
3218 it is -1. */
3220 static void
3221 vect_get_constant_vectors (tree op, slp_tree slp_node,
3222 vec<tree> *vec_oprnds,
3223 unsigned int op_num, unsigned int number_of_vectors)
3225 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3226 gimple *stmt = stmts[0];
3227 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3228 unsigned nunits;
3229 tree vec_cst;
3230 unsigned j, number_of_places_left_in_vector;
3231 tree vector_type;
3232 tree vop;
3233 int group_size = stmts.length ();
3234 unsigned int vec_num, i;
3235 unsigned number_of_copies = 1;
3236 vec<tree> voprnds;
3237 voprnds.create (number_of_vectors);
3238 bool constant_p, is_store;
3239 tree neutral_op = NULL;
3240 enum tree_code code = gimple_expr_code (stmt);
3241 gimple_seq ctor_seq = NULL;
3243 /* Check if vector type is a boolean vector. */
3244 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3245 && vect_mask_constant_operand_p (stmt, op_num))
3246 vector_type
3247 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3248 else
3249 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3250 /* Enforced by vect_get_and_check_slp_defs. */
3251 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3253 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3255 is_store = true;
3256 op = gimple_assign_rhs1 (stmt);
3258 else
3259 is_store = false;
3261 gcc_assert (op);
3263 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3264 created vectors. It is greater than 1 if unrolling is performed.
3266 For example, we have two scalar operands, s1 and s2 (e.g., group of
3267 strided accesses of size two), while NUNITS is four (i.e., four scalars
3268 of this type can be packed in a vector). The output vector will contain
3269 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3270 will be 2).
3272 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3273 containing the operands.
3275 For example, NUNITS is four as before, and the group size is 8
3276 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3277 {s5, s6, s7, s8}. */
3279 number_of_copies = nunits * number_of_vectors / group_size;
3281 number_of_places_left_in_vector = nunits;
3282 constant_p = true;
3283 tree_vector_builder elts (vector_type, nunits, 1);
3284 elts.quick_grow (nunits);
3285 bool place_after_defs = false;
3286 for (j = 0; j < number_of_copies; j++)
3288 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3290 if (is_store)
3291 op = gimple_assign_rhs1 (stmt);
3292 else
3294 switch (code)
3296 case COND_EXPR:
3298 tree cond = gimple_assign_rhs1 (stmt);
3299 if (TREE_CODE (cond) == SSA_NAME)
3300 op = gimple_op (stmt, op_num + 1);
3301 else if (op_num == 0 || op_num == 1)
3302 op = TREE_OPERAND (cond, op_num);
3303 else
3305 if (op_num == 2)
3306 op = gimple_assign_rhs2 (stmt);
3307 else
3308 op = gimple_assign_rhs3 (stmt);
3311 break;
3313 case CALL_EXPR:
3314 op = gimple_call_arg (stmt, op_num);
3315 break;
3317 case LSHIFT_EXPR:
3318 case RSHIFT_EXPR:
3319 case LROTATE_EXPR:
3320 case RROTATE_EXPR:
3321 op = gimple_op (stmt, op_num + 1);
3322 /* Unlike the other binary operators, shifts/rotates have
3323 the shift count being int, instead of the same type as
3324 the lhs, so make sure the scalar is the right type if
3325 we are dealing with vectors of
3326 long long/long/short/char. */
3327 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3328 op = fold_convert (TREE_TYPE (vector_type), op);
3329 break;
3331 default:
3332 op = gimple_op (stmt, op_num + 1);
3333 break;
3337 /* Create 'vect_ = {op0,op1,...,opn}'. */
3338 number_of_places_left_in_vector--;
3339 tree orig_op = op;
3340 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3342 if (CONSTANT_CLASS_P (op))
3344 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3346 /* Can't use VIEW_CONVERT_EXPR for booleans because
3347 of possibly different sizes of scalar value and
3348 vector element. */
3349 if (integer_zerop (op))
3350 op = build_int_cst (TREE_TYPE (vector_type), 0);
3351 else if (integer_onep (op))
3352 op = build_all_ones_cst (TREE_TYPE (vector_type));
3353 else
3354 gcc_unreachable ();
3356 else
3357 op = fold_unary (VIEW_CONVERT_EXPR,
3358 TREE_TYPE (vector_type), op);
3359 gcc_assert (op && CONSTANT_CLASS_P (op));
3361 else
3363 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3364 gimple *init_stmt;
3365 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3367 tree true_val
3368 = build_all_ones_cst (TREE_TYPE (vector_type));
3369 tree false_val
3370 = build_zero_cst (TREE_TYPE (vector_type));
3371 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3372 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3373 op, true_val,
3374 false_val);
3376 else
3378 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3379 op);
3380 init_stmt
3381 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3382 op);
3384 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3385 op = new_temp;
3388 elts[number_of_places_left_in_vector] = op;
3389 if (!CONSTANT_CLASS_P (op))
3390 constant_p = false;
3391 if (TREE_CODE (orig_op) == SSA_NAME
3392 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3393 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3394 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3395 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3396 place_after_defs = true;
3398 if (number_of_places_left_in_vector == 0)
3400 if (constant_p)
3401 vec_cst = elts.build ();
3402 else
3404 vec<constructor_elt, va_gc> *v;
3405 unsigned k;
3406 vec_alloc (v, nunits);
3407 for (k = 0; k < nunits; ++k)
3408 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3409 vec_cst = build_constructor (vector_type, v);
3411 tree init;
3412 gimple_stmt_iterator gsi;
3413 if (place_after_defs)
3415 gsi = gsi_for_stmt
3416 (vect_find_last_scalar_stmt_in_slp (slp_node));
3417 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3419 else
3420 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3421 if (ctor_seq != NULL)
3423 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3424 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3425 GSI_SAME_STMT);
3426 ctor_seq = NULL;
3428 voprnds.quick_push (init);
3429 place_after_defs = false;
3430 number_of_places_left_in_vector = nunits;
3431 constant_p = true;
3432 elts.new_vector (vector_type, nunits, 1);
3433 elts.quick_grow (nunits);
3438 /* Since the vectors are created in the reverse order, we should invert
3439 them. */
3440 vec_num = voprnds.length ();
3441 for (j = vec_num; j != 0; j--)
3443 vop = voprnds[j - 1];
3444 vec_oprnds->quick_push (vop);
3447 voprnds.release ();
3449 /* In case that VF is greater than the unrolling factor needed for the SLP
3450 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3451 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3452 to replicate the vectors. */
3453 while (number_of_vectors > vec_oprnds->length ())
3455 tree neutral_vec = NULL;
3457 if (neutral_op)
3459 if (!neutral_vec)
3460 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3462 vec_oprnds->quick_push (neutral_vec);
3464 else
3466 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3467 vec_oprnds->quick_push (vop);
3473 /* Get vectorized definitions from SLP_NODE that contains corresponding
3474 vectorized def-stmts. */
3476 static void
3477 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3479 tree vec_oprnd;
3480 gimple *vec_def_stmt;
3481 unsigned int i;
3483 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3485 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3487 gcc_assert (vec_def_stmt);
3488 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3489 vec_oprnd = gimple_phi_result (vec_def_stmt);
3490 else
3491 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3492 vec_oprnds->quick_push (vec_oprnd);
3497 /* Get vectorized definitions for SLP_NODE.
3498 If the scalar definitions are loop invariants or constants, collect them and
3499 call vect_get_constant_vectors() to create vector stmts.
3500 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3501 must be stored in the corresponding child of SLP_NODE, and we call
3502 vect_get_slp_vect_defs () to retrieve them. */
3504 void
3505 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3506 vec<vec<tree> > *vec_oprnds)
3508 gimple *first_stmt;
3509 int number_of_vects = 0, i;
3510 unsigned int child_index = 0;
3511 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3512 slp_tree child = NULL;
3513 vec<tree> vec_defs;
3514 tree oprnd;
3515 bool vectorized_defs;
3517 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3518 FOR_EACH_VEC_ELT (ops, i, oprnd)
3520 /* For each operand we check if it has vectorized definitions in a child
3521 node or we need to create them (for invariants and constants). We
3522 check if the LHS of the first stmt of the next child matches OPRND.
3523 If it does, we found the correct child. Otherwise, we call
3524 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3525 to check this child node for the next operand. */
3526 vectorized_defs = false;
3527 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3529 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3531 /* We have to check both pattern and original def, if available. */
3532 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3534 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3535 gimple *related
3536 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3537 tree first_def_op;
3539 if (gimple_code (first_def) == GIMPLE_PHI)
3540 first_def_op = gimple_phi_result (first_def);
3541 else
3542 first_def_op = gimple_get_lhs (first_def);
3543 if (operand_equal_p (oprnd, first_def_op, 0)
3544 || (related
3545 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3547 /* The number of vector defs is determined by the number of
3548 vector statements in the node from which we get those
3549 statements. */
3550 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3551 vectorized_defs = true;
3552 child_index++;
3555 else
3556 child_index++;
3559 if (!vectorized_defs)
3561 if (i == 0)
3563 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3564 /* Number of vector stmts was calculated according to LHS in
3565 vect_schedule_slp_instance (), fix it by replacing LHS with
3566 RHS, if necessary. See vect_get_smallest_scalar_type () for
3567 details. */
3568 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3569 &rhs_size_unit);
3570 if (rhs_size_unit != lhs_size_unit)
3572 number_of_vects *= rhs_size_unit;
3573 number_of_vects /= lhs_size_unit;
3578 /* Allocate memory for vectorized defs. */
3579 vec_defs = vNULL;
3580 vec_defs.create (number_of_vects);
3582 /* For reduction defs we call vect_get_constant_vectors (), since we are
3583 looking for initial loop invariant values. */
3584 if (vectorized_defs)
3585 /* The defs are already vectorized. */
3586 vect_get_slp_vect_defs (child, &vec_defs);
3587 else
3588 /* Build vectors from scalar defs. */
3589 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3590 number_of_vects);
3592 vec_oprnds->quick_push (vec_defs);
3596 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3597 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3598 permute statements for the SLP node NODE of the SLP instance
3599 SLP_NODE_INSTANCE. */
3601 bool
3602 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3603 gimple_stmt_iterator *gsi, poly_uint64 vf,
3604 slp_instance slp_node_instance, bool analyze_only,
3605 unsigned *n_perms)
3607 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3609 tree mask_element_type = NULL_TREE, mask_type;
3610 int nunits, vec_index = 0;
3611 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3612 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3613 int mask_element;
3614 machine_mode mode;
3615 unsigned HOST_WIDE_INT const_vf;
3617 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3618 return false;
3620 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3622 mode = TYPE_MODE (vectype);
3624 /* At the moment, all permutations are represented using per-element
3625 indices, so we can't cope with variable vectorization factors. */
3626 if (!vf.is_constant (&const_vf))
3627 return false;
3629 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3630 same size as the vector element being permuted. */
3631 mask_element_type = lang_hooks.types.type_for_mode
3632 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3633 mask_type = get_vectype_for_scalar_type (mask_element_type);
3634 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3635 vec_perm_builder mask (nunits, nunits, 1);
3636 mask.quick_grow (nunits);
3637 vec_perm_indices indices;
3639 /* Initialize the vect stmts of NODE to properly insert the generated
3640 stmts later. */
3641 if (! analyze_only)
3642 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3643 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3644 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3646 /* Generate permutation masks for every NODE. Number of masks for each NODE
3647 is equal to GROUP_SIZE.
3648 E.g., we have a group of three nodes with three loads from the same
3649 location in each node, and the vector size is 4. I.e., we have a
3650 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3651 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3652 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3655 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3656 The last mask is illegal since we assume two operands for permute
3657 operation, and the mask element values can't be outside that range.
3658 Hence, the last mask must be converted into {2,5,5,5}.
3659 For the first two permutations we need the first and the second input
3660 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3661 we need the second and the third vectors: {b1,c1,a2,b2} and
3662 {c2,a3,b3,c3}. */
3664 int vect_stmts_counter = 0;
3665 int index = 0;
3666 int first_vec_index = -1;
3667 int second_vec_index = -1;
3668 bool noop_p = true;
3669 *n_perms = 0;
3671 for (unsigned int j = 0; j < const_vf; j++)
3673 for (int k = 0; k < group_size; k++)
3675 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3676 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3677 vec_index = i / nunits;
3678 mask_element = i % nunits;
3679 if (vec_index == first_vec_index
3680 || first_vec_index == -1)
3682 first_vec_index = vec_index;
3684 else if (vec_index == second_vec_index
3685 || second_vec_index == -1)
3687 second_vec_index = vec_index;
3688 mask_element += nunits;
3690 else
3692 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3695 "permutation requires at "
3696 "least three vectors ");
3697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3698 stmt, 0);
3700 gcc_assert (analyze_only);
3701 return false;
3704 gcc_assert (mask_element >= 0
3705 && mask_element < 2 * nunits);
3706 if (mask_element != index)
3707 noop_p = false;
3708 mask[index++] = mask_element;
3710 if (index == nunits && !noop_p)
3712 indices.new_vector (mask, 2, nunits);
3713 if (!can_vec_perm_const_p (mode, indices))
3715 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3718 vect_location,
3719 "unsupported vect permute { ");
3720 for (i = 0; i < nunits; ++i)
3721 dump_printf (MSG_MISSED_OPTIMIZATION,
3722 HOST_WIDE_INT_PRINT_DEC " ", mask[i]);
3723 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3725 gcc_assert (analyze_only);
3726 return false;
3729 ++*n_perms;
3732 if (index == nunits)
3734 if (!analyze_only)
3736 tree mask_vec = NULL_TREE;
3738 if (! noop_p)
3739 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3741 if (second_vec_index == -1)
3742 second_vec_index = first_vec_index;
3744 /* Generate the permute statement if necessary. */
3745 tree first_vec = dr_chain[first_vec_index];
3746 tree second_vec = dr_chain[second_vec_index];
3747 gimple *perm_stmt;
3748 if (! noop_p)
3750 tree perm_dest
3751 = vect_create_destination_var (gimple_assign_lhs (stmt),
3752 vectype);
3753 perm_dest = make_ssa_name (perm_dest);
3754 perm_stmt = gimple_build_assign (perm_dest,
3755 VEC_PERM_EXPR,
3756 first_vec, second_vec,
3757 mask_vec);
3758 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3760 else
3761 /* If mask was NULL_TREE generate the requested
3762 identity transform. */
3763 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3765 /* Store the vector statement in NODE. */
3766 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3769 index = 0;
3770 first_vec_index = -1;
3771 second_vec_index = -1;
3772 noop_p = true;
3777 return true;
3780 typedef hash_map <vec <gimple *>, slp_tree,
3781 simple_hashmap_traits <bst_traits, slp_tree> >
3782 scalar_stmts_to_slp_tree_map_t;
3784 /* Vectorize SLP instance tree in postorder. */
3786 static bool
3787 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3788 scalar_stmts_to_slp_tree_map_t *bst_map)
3790 gimple *stmt;
3791 bool grouped_store, is_store;
3792 gimple_stmt_iterator si;
3793 stmt_vec_info stmt_info;
3794 unsigned int group_size;
3795 tree vectype;
3796 int i, j;
3797 slp_tree child;
3799 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3800 return false;
3802 /* See if we have already vectorized the same set of stmts and reuse their
3803 vectorized stmts. */
3804 slp_tree &leader
3805 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3806 if (leader)
3808 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3809 return false;
3812 leader = node;
3813 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3814 vect_schedule_slp_instance (child, instance, bst_map);
3816 /* Push SLP node def-type to stmts. */
3817 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3818 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3819 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3820 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3822 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3823 stmt_info = vinfo_for_stmt (stmt);
3825 /* VECTYPE is the type of the destination. */
3826 vectype = STMT_VINFO_VECTYPE (stmt_info);
3827 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3829 if (!SLP_TREE_VEC_STMTS (node).exists ())
3830 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3832 if (dump_enabled_p ())
3834 dump_printf_loc (MSG_NOTE,vect_location,
3835 "------>vectorizing SLP node starting from: ");
3836 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3839 /* Vectorized stmts go before the last scalar stmt which is where
3840 all uses are ready. */
3841 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3843 /* Mark the first element of the reduction chain as reduction to properly
3844 transform the node. In the analysis phase only the last element of the
3845 chain is marked as reduction. */
3846 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3847 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3849 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3850 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3853 /* Handle two-operation SLP nodes by vectorizing the group with
3854 both operations and then performing a merge. */
3855 if (SLP_TREE_TWO_OPERATORS (node))
3857 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3858 enum tree_code ocode = ERROR_MARK;
3859 gimple *ostmt;
3860 vec_perm_builder mask (group_size, group_size, 1);
3861 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3862 if (gimple_assign_rhs_code (ostmt) != code0)
3864 mask.quick_push (1);
3865 ocode = gimple_assign_rhs_code (ostmt);
3867 else
3868 mask.quick_push (0);
3869 if (ocode != ERROR_MARK)
3871 vec<gimple *> v0;
3872 vec<gimple *> v1;
3873 unsigned j;
3874 tree tmask = NULL_TREE;
3875 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3876 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3877 SLP_TREE_VEC_STMTS (node).truncate (0);
3878 gimple_assign_set_rhs_code (stmt, ocode);
3879 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3880 gimple_assign_set_rhs_code (stmt, code0);
3881 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3882 SLP_TREE_VEC_STMTS (node).truncate (0);
3883 tree meltype = build_nonstandard_integer_type
3884 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3885 tree mvectype = get_same_sized_vectype (meltype, vectype);
3886 unsigned k = 0, l;
3887 for (j = 0; j < v0.length (); ++j)
3889 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3890 tree_vector_builder melts (mvectype, nunits, 1);
3891 for (l = 0; l < nunits; ++l)
3893 if (k >= group_size)
3894 k = 0;
3895 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3896 melts.quick_push (t);
3898 tmask = melts.build ();
3900 /* ??? Not all targets support a VEC_PERM_EXPR with a
3901 constant mask that would translate to a vec_merge RTX
3902 (with their vec_perm_const_ok). We can either not
3903 vectorize in that case or let veclower do its job.
3904 Unfortunately that isn't too great and at least for
3905 plus/minus we'd eventually like to match targets
3906 vector addsub instructions. */
3907 gimple *vstmt;
3908 vstmt = gimple_build_assign (make_ssa_name (vectype),
3909 VEC_PERM_EXPR,
3910 gimple_assign_lhs (v0[j]),
3911 gimple_assign_lhs (v1[j]), tmask);
3912 vect_finish_stmt_generation (stmt, vstmt, &si);
3913 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3915 v0.release ();
3916 v1.release ();
3917 return false;
3920 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3922 /* Restore stmt def-types. */
3923 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3924 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3925 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3926 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3928 return is_store;
3931 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3932 For loop vectorization this is done in vectorizable_call, but for SLP
3933 it needs to be deferred until end of vect_schedule_slp, because multiple
3934 SLP instances may refer to the same scalar stmt. */
3936 static void
3937 vect_remove_slp_scalar_calls (slp_tree node)
3939 gimple *stmt, *new_stmt;
3940 gimple_stmt_iterator gsi;
3941 int i;
3942 slp_tree child;
3943 tree lhs;
3944 stmt_vec_info stmt_info;
3946 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3947 return;
3949 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3950 vect_remove_slp_scalar_calls (child);
3952 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3954 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3955 continue;
3956 stmt_info = vinfo_for_stmt (stmt);
3957 if (stmt_info == NULL
3958 || is_pattern_stmt_p (stmt_info)
3959 || !PURE_SLP_STMT (stmt_info))
3960 continue;
3961 lhs = gimple_call_lhs (stmt);
3962 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3963 set_vinfo_for_stmt (new_stmt, stmt_info);
3964 set_vinfo_for_stmt (stmt, NULL);
3965 STMT_VINFO_STMT (stmt_info) = new_stmt;
3966 gsi = gsi_for_stmt (stmt);
3967 gsi_replace (&gsi, new_stmt, false);
3968 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3972 /* Generate vector code for all SLP instances in the loop/basic block. */
3974 bool
3975 vect_schedule_slp (vec_info *vinfo)
3977 vec<slp_instance> slp_instances;
3978 slp_instance instance;
3979 unsigned int i;
3980 bool is_store = false;
3982 slp_instances = vinfo->slp_instances;
3983 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3985 /* Schedule the tree of INSTANCE. */
3986 scalar_stmts_to_slp_tree_map_t *bst_map
3987 = new scalar_stmts_to_slp_tree_map_t ();
3988 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3989 instance, bst_map);
3990 delete bst_map;
3991 if (dump_enabled_p ())
3992 dump_printf_loc (MSG_NOTE, vect_location,
3993 "vectorizing stmts using SLP.\n");
3996 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3998 slp_tree root = SLP_INSTANCE_TREE (instance);
3999 gimple *store;
4000 unsigned int j;
4001 gimple_stmt_iterator gsi;
4003 /* Remove scalar call stmts. Do not do this for basic-block
4004 vectorization as not all uses may be vectorized.
4005 ??? Why should this be necessary? DCE should be able to
4006 remove the stmts itself.
4007 ??? For BB vectorization we can as well remove scalar
4008 stmts starting from the SLP tree root if they have no
4009 uses. */
4010 if (is_a <loop_vec_info> (vinfo))
4011 vect_remove_slp_scalar_calls (root);
4013 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4014 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4016 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4017 break;
4019 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4020 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4021 /* Free the attached stmt_vec_info and remove the stmt. */
4022 gsi = gsi_for_stmt (store);
4023 unlink_stmt_vdef (store);
4024 gsi_remove (&gsi, true);
4025 release_defs (store);
4026 free_stmt_vec_info (store);
4030 return is_store;