Make vectorizable_load/store handle IFN_MASK_LOAD/STORE
[official-gcc.git] / gcc / tree-vect-slp.c
blob2a6d9244109c3db676e822c18b1fccef818f6488
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
50 static void
51 vect_free_slp_tree (slp_tree node)
53 int i;
54 slp_tree child;
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
57 vect_free_slp_tree (child);
59 gimple *stmt;
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 SLP_TREE_CHILDREN (node).release ();
69 SLP_TREE_SCALAR_STMTS (node).release ();
70 SLP_TREE_VEC_STMTS (node).release ();
71 SLP_TREE_LOAD_PERMUTATION (node).release ();
73 free (node);
77 /* Free the memory allocated for the SLP instance. */
79 void
80 vect_free_slp_instance (slp_instance instance)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
83 SLP_INSTANCE_LOADS (instance).release ();
84 free (instance);
88 /* Create an SLP node for SCALAR_STMTS. */
90 static slp_tree
91 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
93 slp_tree node;
94 gimple *stmt = scalar_stmts[0];
95 unsigned int nops;
97 if (is_gimple_call (stmt))
98 nops = gimple_call_num_args (stmt);
99 else if (is_gimple_assign (stmt))
101 nops = gimple_num_ops (stmt) - 1;
102 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
103 nops++;
105 else if (gimple_code (stmt) == GIMPLE_PHI)
106 nops = 0;
107 else
108 return NULL;
110 node = XNEW (struct _slp_tree);
111 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
112 SLP_TREE_VEC_STMTS (node).create (0);
113 SLP_TREE_CHILDREN (node).create (nops);
114 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
115 SLP_TREE_TWO_OPERATORS (node) = false;
116 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
118 unsigned i;
119 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
122 return node;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
128 node. */
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec<gimple *> def_stmts;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
135 stmt. */
136 tree first_op_type;
137 enum vect_def_type first_dt;
138 bool first_pattern;
139 bool second_pattern;
140 } *slp_oprnd_info;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 operand. */
145 static vec<slp_oprnd_info>
146 vect_create_oprnd_info (int nops, int group_size)
148 int i;
149 slp_oprnd_info oprnd_info;
150 vec<slp_oprnd_info> oprnds_info;
152 oprnds_info.create (nops);
153 for (i = 0; i < nops; i++)
155 oprnd_info = XNEW (struct _slp_oprnd_info);
156 oprnd_info->def_stmts.create (group_size);
157 oprnd_info->first_dt = vect_uninitialized_def;
158 oprnd_info->first_op_type = NULL_TREE;
159 oprnd_info->first_pattern = false;
160 oprnd_info->second_pattern = false;
161 oprnds_info.quick_push (oprnd_info);
164 return oprnds_info;
168 /* Free operands info. */
170 static void
171 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
173 int i;
174 slp_oprnd_info oprnd_info;
176 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
178 oprnd_info->def_stmts.release ();
179 XDELETE (oprnd_info);
182 oprnds_info.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
189 static int
190 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
192 gimple *next_stmt = first_stmt;
193 int result = 0;
195 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
196 return -1;
200 if (next_stmt == stmt)
201 return result;
202 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
203 if (next_stmt)
204 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
206 while (next_stmt);
208 return -1;
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
222 ok return 0. */
224 static int
225 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
226 gimple *stmt, unsigned stmt_num,
227 vec<slp_oprnd_info> *oprnds_info)
229 tree oprnd;
230 unsigned int i, number_of_oprnds;
231 gimple *def_stmt;
232 enum vect_def_type dt = vect_uninitialized_def;
233 bool pattern = false;
234 slp_oprnd_info oprnd_info;
235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
238 bool first = stmt_num == 0;
239 bool second = stmt_num == 1;
241 if (is_gimple_call (stmt))
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
246 else if (is_gimple_assign (stmt))
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
255 first_op_cond = true;
256 number_of_oprnds++;
258 else
259 commutative = commutative_tree_code (code);
261 else
262 return -1;
264 bool swapped = (*swap != 0);
265 gcc_assert (!swapped || first_op_cond);
266 for (i = 0; i < number_of_oprnds; i++)
268 again:
269 if (first_op_cond)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map = maps[*swap];
275 if (i < 2)
276 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
277 else
278 oprnd = gimple_op (stmt, map[i]);
280 else
281 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
283 oprnd_info = (*oprnds_info)[i];
285 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
295 return -1;
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
300 pattern. */
301 if (def_stmt && gimple_bb (def_stmt)
302 && vect_stmt_in_region_p (vinfo, def_stmt)
303 && vinfo_for_stmt (def_stmt)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
308 pattern = true;
309 if (!first && !oprnd_info->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info->first_dt != vect_reduction_def
313 || (!second && !oprnd_info->second_pattern)))
315 if (i == 0
316 && !swapped
317 && commutative)
319 swapped = true;
320 goto again;
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
332 return 1;
335 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
336 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
338 if (dt == vect_unknown_def_type)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
342 "Unsupported pattern.\n");
343 return -1;
346 switch (gimple_code (def_stmt))
348 case GIMPLE_PHI:
349 case GIMPLE_ASSIGN:
350 break;
352 default:
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "unsupported defining stmt:\n");
356 return -1;
360 if (second)
361 oprnd_info->second_pattern = pattern;
363 if (first)
365 oprnd_info->first_dt = dt;
366 oprnd_info->first_pattern = pattern;
367 oprnd_info->first_op_type = TREE_TYPE (oprnd);
369 else
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info->first_dt != dt
377 && !(oprnd_info->first_dt == vect_reduction_def
378 && dt == vect_internal_def)
379 && !((oprnd_info->first_dt == vect_external_def
380 || oprnd_info->first_dt == vect_constant_def)
381 && (dt == vect_external_def
382 || dt == vect_constant_def)))
383 || !types_compatible_p (oprnd_info->first_op_type,
384 TREE_TYPE (oprnd))))
386 /* Try swapping operands if we got a mismatch. */
387 if (i == 0
388 && !swapped
389 && commutative)
391 swapped = true;
392 goto again;
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: different types\n");
399 return 1;
403 /* Check the types of the definitions. */
404 switch (dt)
406 case vect_constant_def:
407 case vect_external_def:
408 /* We must already have set a vector size by now. */
409 gcc_checking_assert (maybe_ne (current_vector_size, 0U));
410 if (!current_vector_size.is_constant ())
412 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: invalid type of def "
416 "for variable-length SLP ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
418 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
420 return -1;
422 break;
424 case vect_reduction_def:
425 case vect_induction_def:
426 case vect_internal_def:
427 oprnd_info->def_stmts.quick_push (def_stmt);
428 break;
430 default:
431 /* FORNOW: Not supported. */
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435 "Build SLP failed: illegal type of def ");
436 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
437 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
440 return -1;
444 /* Swap operands. */
445 if (swapped)
447 /* If there are already uses of this stmt in a SLP instance then
448 we've committed to the operand order and can't swap it. */
449 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
451 if (dump_enabled_p ())
453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
454 "Build SLP failed: cannot swap operands of "
455 "shared stmt ");
456 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
458 return -1;
461 if (first_op_cond)
463 tree cond = gimple_assign_rhs1 (stmt);
464 enum tree_code code = TREE_CODE (cond);
466 /* Swap. */
467 if (*swap == 1)
469 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
470 &TREE_OPERAND (cond, 1));
471 TREE_SET_CODE (cond, swap_tree_comparison (code));
473 /* Invert. */
474 else
476 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
477 gimple_assign_rhs3_ptr (stmt));
478 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
479 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
480 gcc_assert (code != ERROR_MARK);
481 TREE_SET_CODE (cond, code);
484 else
485 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
486 gimple_assign_rhs2_ptr (stmt));
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location,
490 "swapped operands to match def types in ");
491 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
495 *swap = swapped;
496 return 0;
499 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
500 caller's attempt to find the vector type in STMT with the narrowest
501 element type. Return true if VECTYPE is nonnull and if it is valid
502 for VINFO. When returning true, update MAX_NUNITS to reflect the
503 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
504 as for vect_build_slp_tree. */
506 static bool
507 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
508 tree vectype, poly_uint64 *max_nunits)
510 if (!vectype)
512 if (dump_enabled_p ())
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 "Build SLP failed: unsupported data-type in ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
519 /* Fatal mismatch. */
520 return false;
523 /* If populating the vector type requires unrolling then fail
524 before adjusting *max_nunits for basic-block vectorization. */
525 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
526 unsigned HOST_WIDE_INT const_nunits;
527 if (is_a <bb_vec_info> (vinfo)
528 && (!nunits.is_constant (&const_nunits)
529 || const_nunits > group_size))
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "Build SLP failed: unrolling required "
533 "in basic block SLP\n");
534 /* Fatal mismatch. */
535 return false;
538 /* In case of multiple types we need to detect the smallest type. */
539 vect_update_max_nunits (max_nunits, vectype);
540 return true;
543 /* Verify if the scalar stmts STMTS are isomorphic, require data
544 permutation or are of unsupported types of operation. Return
545 true if they are, otherwise return false and indicate in *MATCHES
546 which stmts are not isomorphic to the first one. If MATCHES[0]
547 is false then this indicates the comparison could not be
548 carried out or the stmts will never be vectorized by SLP.
550 Note COND_EXPR is possibly ismorphic to another one after swapping its
551 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
552 the first stmt by swapping the two operands of comparison; set SWAP[i]
553 to 2 if stmt I is isormorphic to the first stmt by inverting the code
554 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
555 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
557 static bool
558 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
559 vec<gimple *> stmts, unsigned int group_size,
560 unsigned nops, poly_uint64 *max_nunits,
561 bool *matches, bool *two_operators)
563 unsigned int i;
564 gimple *first_stmt = stmts[0], *stmt = stmts[0];
565 enum tree_code first_stmt_code = ERROR_MARK;
566 enum tree_code alt_stmt_code = ERROR_MARK;
567 enum tree_code rhs_code = ERROR_MARK;
568 enum tree_code first_cond_code = ERROR_MARK;
569 tree lhs;
570 bool need_same_oprnds = false;
571 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
572 optab optab;
573 int icode;
574 machine_mode optab_op2_mode;
575 machine_mode vec_mode;
576 HOST_WIDE_INT dummy;
577 gimple *first_load = NULL, *prev_first_load = NULL;
579 /* For every stmt in NODE find its def stmt/s. */
580 FOR_EACH_VEC_ELT (stmts, i, stmt)
582 swap[i] = 0;
583 matches[i] = false;
585 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
588 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
591 /* Fail to vectorize statements marked as unvectorizable. */
592 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: unvectorizable statement ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
600 /* Fatal mismatch. */
601 matches[0] = false;
602 return false;
605 lhs = gimple_get_lhs (stmt);
606 if (lhs == NULL_TREE)
608 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: not GIMPLE_ASSIGN nor "
612 "GIMPLE_CALL ");
613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
615 /* Fatal mismatch. */
616 matches[0] = false;
617 return false;
620 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
621 vectype = get_vectype_for_scalar_type (scalar_type);
622 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
623 max_nunits))
625 /* Fatal mismatch. */
626 matches[0] = false;
627 return false;
630 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
632 rhs_code = CALL_EXPR;
633 if (gimple_call_internal_p (call_stmt)
634 || gimple_call_tail_p (call_stmt)
635 || gimple_call_noreturn_p (call_stmt)
636 || !gimple_call_nothrow_p (call_stmt)
637 || gimple_call_chain (call_stmt))
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unsupported call type ");
643 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
644 call_stmt, 0);
646 /* Fatal mismatch. */
647 matches[0] = false;
648 return false;
651 else
652 rhs_code = gimple_assign_rhs_code (stmt);
654 /* Check the operation. */
655 if (i == 0)
657 first_stmt_code = rhs_code;
659 /* Shift arguments should be equal in all the packed stmts for a
660 vector shift with scalar shift operand. */
661 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
662 || rhs_code == LROTATE_EXPR
663 || rhs_code == RROTATE_EXPR)
665 vec_mode = TYPE_MODE (vectype);
667 /* First see if we have a vector/vector shift. */
668 optab = optab_for_tree_code (rhs_code, vectype,
669 optab_vector);
671 if (!optab
672 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
674 /* No vector/vector shift, try for a vector/scalar shift. */
675 optab = optab_for_tree_code (rhs_code, vectype,
676 optab_scalar);
678 if (!optab)
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "Build SLP failed: no optab.\n");
683 /* Fatal mismatch. */
684 matches[0] = false;
685 return false;
687 icode = (int) optab_handler (optab, vec_mode);
688 if (icode == CODE_FOR_nothing)
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: "
693 "op not supported by target.\n");
694 /* Fatal mismatch. */
695 matches[0] = false;
696 return false;
698 optab_op2_mode = insn_data[icode].operand[2].mode;
699 if (!VECTOR_MODE_P (optab_op2_mode))
701 need_same_oprnds = true;
702 first_op1 = gimple_assign_rhs2 (stmt);
706 else if (rhs_code == WIDEN_LSHIFT_EXPR)
708 need_same_oprnds = true;
709 first_op1 = gimple_assign_rhs2 (stmt);
712 else
714 if (first_stmt_code != rhs_code
715 && alt_stmt_code == ERROR_MARK)
716 alt_stmt_code = rhs_code;
717 if (first_stmt_code != rhs_code
718 && (first_stmt_code != IMAGPART_EXPR
719 || rhs_code != REALPART_EXPR)
720 && (first_stmt_code != REALPART_EXPR
721 || rhs_code != IMAGPART_EXPR)
722 /* Handle mismatches in plus/minus by computing both
723 and merging the results. */
724 && !((first_stmt_code == PLUS_EXPR
725 || first_stmt_code == MINUS_EXPR)
726 && (alt_stmt_code == PLUS_EXPR
727 || alt_stmt_code == MINUS_EXPR)
728 && rhs_code == alt_stmt_code)
729 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
730 && (first_stmt_code == ARRAY_REF
731 || first_stmt_code == BIT_FIELD_REF
732 || first_stmt_code == INDIRECT_REF
733 || first_stmt_code == COMPONENT_REF
734 || first_stmt_code == MEM_REF)))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: different operation "
740 "in stmt ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "original stmt ");
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
745 first_stmt, 0);
747 /* Mismatch. */
748 continue;
751 if (need_same_oprnds
752 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: different shift "
758 "arguments in ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
761 /* Mismatch. */
762 continue;
765 if (rhs_code == CALL_EXPR)
767 gimple *first_stmt = stmts[0];
768 if (gimple_call_num_args (stmt) != nops
769 || !operand_equal_p (gimple_call_fn (first_stmt),
770 gimple_call_fn (stmt), 0)
771 || gimple_call_fntype (first_stmt)
772 != gimple_call_fntype (stmt))
774 if (dump_enabled_p ())
776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
777 "Build SLP failed: different calls in ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
779 stmt, 0);
781 /* Mismatch. */
782 continue;
787 /* Grouped store or load. */
788 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
790 if (REFERENCE_CLASS_P (lhs))
792 /* Store. */
795 else
797 /* Load. */
798 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
799 if (prev_first_load)
801 /* Check that there are no loads from different interleaving
802 chains in the same node. */
803 if (prev_first_load != first_load)
805 if (dump_enabled_p ())
807 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
808 vect_location,
809 "Build SLP failed: different "
810 "interleaving chains in one node ");
811 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
812 stmt, 0);
814 /* Mismatch. */
815 continue;
818 else
819 prev_first_load = first_load;
821 } /* Grouped access. */
822 else
824 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
826 /* Not grouped load. */
827 if (dump_enabled_p ())
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
830 "Build SLP failed: not grouped load ");
831 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
834 /* FORNOW: Not grouped loads are not supported. */
835 /* Fatal mismatch. */
836 matches[0] = false;
837 return false;
840 /* Not memory operation. */
841 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
842 && TREE_CODE_CLASS (rhs_code) != tcc_unary
843 && TREE_CODE_CLASS (rhs_code) != tcc_expression
844 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
845 && rhs_code != CALL_EXPR)
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: operation");
851 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
854 /* Fatal mismatch. */
855 matches[0] = false;
856 return false;
859 if (rhs_code == COND_EXPR)
861 tree cond_expr = gimple_assign_rhs1 (stmt);
862 enum tree_code cond_code = TREE_CODE (cond_expr);
863 enum tree_code swap_code = ERROR_MARK;
864 enum tree_code invert_code = ERROR_MARK;
866 if (i == 0)
867 first_cond_code = TREE_CODE (cond_expr);
868 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
870 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
871 swap_code = swap_tree_comparison (cond_code);
872 invert_code = invert_tree_comparison (cond_code, honor_nans);
875 if (first_cond_code == cond_code)
877 /* Isomorphic can be achieved by swapping. */
878 else if (first_cond_code == swap_code)
879 swap[i] = 1;
880 /* Isomorphic can be achieved by inverting. */
881 else if (first_cond_code == invert_code)
882 swap[i] = 2;
883 else
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "Build SLP failed: different"
889 " operation");
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
891 stmt, 0);
893 /* Mismatch. */
894 continue;
899 matches[i] = true;
902 for (i = 0; i < group_size; ++i)
903 if (!matches[i])
904 return false;
906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
908 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
909 if (alt_stmt_code != ERROR_MARK
910 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
912 unsigned HOST_WIDE_INT count;
913 if (!nunits.is_constant (&count))
915 if (dump_enabled_p ())
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "Build SLP failed: different operations "
918 "not allowed with variable-length SLP.\n");
919 return false;
921 vec_perm_builder sel (count, count, 1);
922 for (i = 0; i < count; ++i)
924 unsigned int elt = i;
925 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
926 elt += count;
927 sel.quick_push (elt);
929 vec_perm_indices indices (sel, 2, count);
930 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
932 for (i = 0; i < group_size; ++i)
933 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
935 matches[i] = false;
936 if (dump_enabled_p ())
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
939 "Build SLP failed: different operation "
940 "in stmt ");
941 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
942 stmts[i], 0);
943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
944 "original stmt ");
945 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
946 first_stmt, 0);
949 return false;
951 *two_operators = true;
954 return true;
957 /* Traits for the hash_set to record failed SLP builds for a stmt set.
958 Note we never remove apart from at destruction time so we do not
959 need a special value for deleted that differs from empty. */
960 struct bst_traits
962 typedef vec <gimple *> value_type;
963 typedef vec <gimple *> compare_type;
964 static inline hashval_t hash (value_type);
965 static inline bool equal (value_type existing, value_type candidate);
966 static inline bool is_empty (value_type x) { return !x.exists (); }
967 static inline bool is_deleted (value_type x) { return !x.exists (); }
968 static inline void mark_empty (value_type &x) { x.release (); }
969 static inline void mark_deleted (value_type &x) { x.release (); }
970 static inline void remove (value_type &x) { x.release (); }
972 inline hashval_t
973 bst_traits::hash (value_type x)
975 inchash::hash h;
976 for (unsigned i = 0; i < x.length (); ++i)
977 h.add_int (gimple_uid (x[i]));
978 return h.end ();
980 inline bool
981 bst_traits::equal (value_type existing, value_type candidate)
983 if (existing.length () != candidate.length ())
984 return false;
985 for (unsigned i = 0; i < existing.length (); ++i)
986 if (existing[i] != candidate[i])
987 return false;
988 return true;
991 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
992 static scalar_stmts_set_t *bst_fail;
994 static slp_tree
995 vect_build_slp_tree_2 (vec_info *vinfo,
996 vec<gimple *> stmts, unsigned int group_size,
997 poly_uint64 *max_nunits,
998 vec<slp_tree> *loads,
999 bool *matches, unsigned *npermutes, unsigned *tree_size,
1000 unsigned max_tree_size);
1002 static slp_tree
1003 vect_build_slp_tree (vec_info *vinfo,
1004 vec<gimple *> stmts, unsigned int group_size,
1005 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1006 bool *matches, unsigned *npermutes, unsigned *tree_size,
1007 unsigned max_tree_size)
1009 if (bst_fail->contains (stmts))
1010 return NULL;
1011 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1012 loads, matches, npermutes, tree_size,
1013 max_tree_size);
1014 /* When SLP build fails for stmts record this, otherwise SLP build
1015 can be exponential in time when we allow to construct parts from
1016 scalars, see PR81723. */
1017 if (! res)
1019 vec <gimple *> x;
1020 x.create (stmts.length ());
1021 x.splice (stmts);
1022 bst_fail->add (x);
1024 return res;
1027 /* Recursively build an SLP tree starting from NODE.
1028 Fail (and return a value not equal to zero) if def-stmts are not
1029 isomorphic, require data permutation or are of unsupported types of
1030 operation. Otherwise, return 0.
1031 The value returned is the depth in the SLP tree where a mismatch
1032 was found. */
1034 static slp_tree
1035 vect_build_slp_tree_2 (vec_info *vinfo,
1036 vec<gimple *> stmts, unsigned int group_size,
1037 poly_uint64 *max_nunits,
1038 vec<slp_tree> *loads,
1039 bool *matches, unsigned *npermutes, unsigned *tree_size,
1040 unsigned max_tree_size)
1042 unsigned nops, i, this_tree_size = 0;
1043 poly_uint64 this_max_nunits = *max_nunits;
1044 gimple *stmt;
1045 slp_tree node;
1047 matches[0] = false;
1049 stmt = stmts[0];
1050 if (is_gimple_call (stmt))
1051 nops = gimple_call_num_args (stmt);
1052 else if (is_gimple_assign (stmt))
1054 nops = gimple_num_ops (stmt) - 1;
1055 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1056 nops++;
1058 else if (gimple_code (stmt) == GIMPLE_PHI)
1059 nops = 0;
1060 else
1061 return NULL;
1063 /* If the SLP node is a PHI (induction or reduction), terminate
1064 the recursion. */
1065 if (gimple_code (stmt) == GIMPLE_PHI)
1067 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1068 tree vectype = get_vectype_for_scalar_type (scalar_type);
1069 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1070 max_nunits))
1071 return NULL;
1073 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1074 /* Induction from different IVs is not supported. */
1075 if (def_type == vect_induction_def)
1077 FOR_EACH_VEC_ELT (stmts, i, stmt)
1078 if (stmt != stmts[0])
1079 return NULL;
1081 else
1083 /* Else def types have to match. */
1084 FOR_EACH_VEC_ELT (stmts, i, stmt)
1086 /* But for reduction chains only check on the first stmt. */
1087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1088 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1089 continue;
1090 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1091 return NULL;
1094 node = vect_create_new_slp_node (stmts);
1095 return node;
1099 bool two_operators = false;
1100 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1101 if (!vect_build_slp_tree_1 (vinfo, swap,
1102 stmts, group_size, nops,
1103 &this_max_nunits, matches, &two_operators))
1104 return NULL;
1106 /* If the SLP node is a load, terminate the recursion. */
1107 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1108 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1110 *max_nunits = this_max_nunits;
1111 node = vect_create_new_slp_node (stmts);
1112 loads->safe_push (node);
1113 return node;
1116 /* Get at the operands, verifying they are compatible. */
1117 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1118 slp_oprnd_info oprnd_info;
1119 FOR_EACH_VEC_ELT (stmts, i, stmt)
1121 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1122 stmt, i, &oprnds_info);
1123 if (res != 0)
1124 matches[(res == -1) ? 0 : i] = false;
1125 if (!matches[0])
1126 break;
1128 for (i = 0; i < group_size; ++i)
1129 if (!matches[i])
1131 vect_free_oprnd_info (oprnds_info);
1132 return NULL;
1135 auto_vec<slp_tree, 4> children;
1136 auto_vec<slp_tree> this_loads;
1138 stmt = stmts[0];
1140 if (tree_size)
1141 max_tree_size -= *tree_size;
1143 /* Create SLP_TREE nodes for the definition node/s. */
1144 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1146 slp_tree child;
1147 unsigned old_nloads = this_loads.length ();
1148 unsigned old_tree_size = this_tree_size;
1149 unsigned int j;
1151 if (oprnd_info->first_dt != vect_internal_def
1152 && oprnd_info->first_dt != vect_reduction_def
1153 && oprnd_info->first_dt != vect_induction_def)
1154 continue;
1156 if (++this_tree_size > max_tree_size)
1158 FOR_EACH_VEC_ELT (children, j, child)
1159 vect_free_slp_tree (child);
1160 vect_free_oprnd_info (oprnds_info);
1161 return NULL;
1164 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1165 group_size, &this_max_nunits,
1166 &this_loads, matches, npermutes,
1167 &this_tree_size,
1168 max_tree_size)) != NULL)
1170 /* If we have all children of child built up from scalars then just
1171 throw that away and build it up this node from scalars. */
1172 if (!SLP_TREE_CHILDREN (child).is_empty ()
1173 /* ??? Rejecting patterns this way doesn't work. We'd have to
1174 do extra work to cancel the pattern so the uses see the
1175 scalar version. */
1176 && !is_pattern_stmt_p
1177 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1179 slp_tree grandchild;
1181 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1182 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1183 break;
1184 if (!grandchild)
1186 /* Roll back. */
1187 this_loads.truncate (old_nloads);
1188 this_tree_size = old_tree_size;
1189 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1190 vect_free_slp_tree (grandchild);
1191 SLP_TREE_CHILDREN (child).truncate (0);
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "Building parent vector operands from "
1195 "scalars instead\n");
1196 oprnd_info->def_stmts = vNULL;
1197 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1198 children.safe_push (child);
1199 continue;
1203 oprnd_info->def_stmts = vNULL;
1204 children.safe_push (child);
1205 continue;
1208 /* If the SLP build failed fatally and we analyze a basic-block
1209 simply treat nodes we fail to build as externally defined
1210 (and thus build vectors from the scalar defs).
1211 The cost model will reject outright expensive cases.
1212 ??? This doesn't treat cases where permutation ultimatively
1213 fails (or we don't try permutation below). Ideally we'd
1214 even compute a permutation that will end up with the maximum
1215 SLP tree size... */
1216 if (is_a <bb_vec_info> (vinfo)
1217 && !matches[0]
1218 /* ??? Rejecting patterns this way doesn't work. We'd have to
1219 do extra work to cancel the pattern so the uses see the
1220 scalar version. */
1221 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1223 dump_printf_loc (MSG_NOTE, vect_location,
1224 "Building vector operands from scalars\n");
1225 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1226 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1227 children.safe_push (child);
1228 oprnd_info->def_stmts = vNULL;
1229 continue;
1232 /* If the SLP build for operand zero failed and operand zero
1233 and one can be commutated try that for the scalar stmts
1234 that failed the match. */
1235 if (i == 0
1236 /* A first scalar stmt mismatch signals a fatal mismatch. */
1237 && matches[0]
1238 /* ??? For COND_EXPRs we can swap the comparison operands
1239 as well as the arms under some constraints. */
1240 && nops == 2
1241 && oprnds_info[1]->first_dt == vect_internal_def
1242 && is_gimple_assign (stmt)
1243 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1244 && ! two_operators
1245 /* Do so only if the number of not successful permutes was nor more
1246 than a cut-ff as re-trying the recursive match on
1247 possibly each level of the tree would expose exponential
1248 behavior. */
1249 && *npermutes < 4)
1251 /* Verify if we can safely swap or if we committed to a specific
1252 operand order already. */
1253 for (j = 0; j < group_size; ++j)
1254 if (!matches[j]
1255 && (swap[j] != 0
1256 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1258 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1261 "Build SLP failed: cannot swap operands "
1262 "of shared stmt ");
1263 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1264 stmts[j], 0);
1266 goto fail;
1269 /* Swap mismatched definition stmts. */
1270 dump_printf_loc (MSG_NOTE, vect_location,
1271 "Re-trying with swapped operands of stmts ");
1272 for (j = 0; j < group_size; ++j)
1273 if (!matches[j])
1275 std::swap (oprnds_info[0]->def_stmts[j],
1276 oprnds_info[1]->def_stmts[j]);
1277 dump_printf (MSG_NOTE, "%d ", j);
1279 dump_printf (MSG_NOTE, "\n");
1280 /* And try again with scratch 'matches' ... */
1281 bool *tem = XALLOCAVEC (bool, group_size);
1282 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1283 group_size, &this_max_nunits,
1284 &this_loads, tem, npermutes,
1285 &this_tree_size,
1286 max_tree_size)) != NULL)
1288 /* ... so if successful we can apply the operand swapping
1289 to the GIMPLE IL. This is necessary because for example
1290 vect_get_slp_defs uses operand indexes and thus expects
1291 canonical operand order. This is also necessary even
1292 if we end up building the operand from scalars as
1293 we'll continue to process swapped operand two. */
1294 for (j = 0; j < group_size; ++j)
1296 gimple *stmt = stmts[j];
1297 gimple_set_plf (stmt, GF_PLF_1, false);
1299 for (j = 0; j < group_size; ++j)
1301 gimple *stmt = stmts[j];
1302 if (!matches[j])
1304 /* Avoid swapping operands twice. */
1305 if (gimple_plf (stmt, GF_PLF_1))
1306 continue;
1307 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1308 gimple_assign_rhs2_ptr (stmt));
1309 gimple_set_plf (stmt, GF_PLF_1, true);
1312 /* Verify we swap all duplicates or none. */
1313 if (flag_checking)
1314 for (j = 0; j < group_size; ++j)
1316 gimple *stmt = stmts[j];
1317 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1320 /* If we have all children of child built up from scalars then
1321 just throw that away and build it up this node from scalars. */
1322 if (!SLP_TREE_CHILDREN (child).is_empty ()
1323 /* ??? Rejecting patterns this way doesn't work. We'd have
1324 to do extra work to cancel the pattern so the uses see the
1325 scalar version. */
1326 && !is_pattern_stmt_p
1327 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1329 unsigned int j;
1330 slp_tree grandchild;
1332 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1333 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1334 break;
1335 if (!grandchild)
1337 /* Roll back. */
1338 this_loads.truncate (old_nloads);
1339 this_tree_size = old_tree_size;
1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1341 vect_free_slp_tree (grandchild);
1342 SLP_TREE_CHILDREN (child).truncate (0);
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "Building parent vector operands from "
1346 "scalars instead\n");
1347 oprnd_info->def_stmts = vNULL;
1348 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1349 children.safe_push (child);
1350 continue;
1354 oprnd_info->def_stmts = vNULL;
1355 children.safe_push (child);
1356 continue;
1359 ++*npermutes;
1362 fail:
1363 gcc_assert (child == NULL);
1364 FOR_EACH_VEC_ELT (children, j, child)
1365 vect_free_slp_tree (child);
1366 vect_free_oprnd_info (oprnds_info);
1367 return NULL;
1370 vect_free_oprnd_info (oprnds_info);
1372 if (tree_size)
1373 *tree_size += this_tree_size;
1374 *max_nunits = this_max_nunits;
1375 loads->safe_splice (this_loads);
1377 node = vect_create_new_slp_node (stmts);
1378 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1379 SLP_TREE_CHILDREN (node).splice (children);
1380 return node;
1383 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1385 static void
1386 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1388 int i;
1389 gimple *stmt;
1390 slp_tree child;
1392 dump_printf_loc (dump_kind, loc, "node%s\n",
1393 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1394 ? " (external)" : "");
1395 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1397 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1398 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1401 vect_print_slp_tree (dump_kind, loc, child);
1405 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1406 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1407 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1408 stmts in NODE are to be marked. */
1410 static void
1411 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1413 int i;
1414 gimple *stmt;
1415 slp_tree child;
1417 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1418 return;
1420 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1421 if (j < 0 || i == j)
1422 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1424 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1425 vect_mark_slp_stmts (child, mark, j);
1429 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1431 static void
1432 vect_mark_slp_stmts_relevant (slp_tree node)
1434 int i;
1435 gimple *stmt;
1436 stmt_vec_info stmt_info;
1437 slp_tree child;
1439 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1440 return;
1442 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1444 stmt_info = vinfo_for_stmt (stmt);
1445 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1446 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1447 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1450 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1451 vect_mark_slp_stmts_relevant (child);
1455 /* Rearrange the statements of NODE according to PERMUTATION. */
1457 static void
1458 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1459 vec<unsigned> permutation)
1461 gimple *stmt;
1462 vec<gimple *> tmp_stmts;
1463 unsigned int i;
1464 slp_tree child;
1466 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1467 vect_slp_rearrange_stmts (child, group_size, permutation);
1469 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1470 tmp_stmts.create (group_size);
1471 tmp_stmts.quick_grow_cleared (group_size);
1473 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1474 tmp_stmts[permutation[i]] = stmt;
1476 SLP_TREE_SCALAR_STMTS (node).release ();
1477 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1481 /* Attempt to reorder stmts in a reduction chain so that we don't
1482 require any load permutation. Return true if that was possible,
1483 otherwise return false. */
1485 static bool
1486 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1488 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1489 unsigned int i, j;
1490 unsigned int lidx;
1491 slp_tree node, load;
1493 /* Compare all the permutation sequences to the first one. We know
1494 that at least one load is permuted. */
1495 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1496 if (!node->load_permutation.exists ())
1497 return false;
1498 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1500 if (!load->load_permutation.exists ())
1501 return false;
1502 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1503 if (lidx != node->load_permutation[j])
1504 return false;
1507 /* Check that the loads in the first sequence are different and there
1508 are no gaps between them. */
1509 auto_sbitmap load_index (group_size);
1510 bitmap_clear (load_index);
1511 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1513 if (lidx >= group_size)
1514 return false;
1515 if (bitmap_bit_p (load_index, lidx))
1516 return false;
1518 bitmap_set_bit (load_index, lidx);
1520 for (i = 0; i < group_size; i++)
1521 if (!bitmap_bit_p (load_index, i))
1522 return false;
1524 /* This permutation is valid for reduction. Since the order of the
1525 statements in the nodes is not important unless they are memory
1526 accesses, we can rearrange the statements in all the nodes
1527 according to the order of the loads. */
1528 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1529 node->load_permutation);
1531 /* We are done, no actual permutations need to be generated. */
1532 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1533 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1535 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1536 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1537 /* But we have to keep those permutations that are required because
1538 of handling of gaps. */
1539 if (known_eq (unrolling_factor, 1U)
1540 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1541 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1542 SLP_TREE_LOAD_PERMUTATION (node).release ();
1543 else
1544 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1545 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1548 return true;
1551 /* Check if the required load permutations in the SLP instance
1552 SLP_INSTN are supported. */
1554 static bool
1555 vect_supported_load_permutation_p (slp_instance slp_instn)
1557 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1558 unsigned int i, j, k, next;
1559 slp_tree node;
1560 gimple *stmt, *load, *next_load;
1562 if (dump_enabled_p ())
1564 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1565 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1566 if (node->load_permutation.exists ())
1567 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1568 dump_printf (MSG_NOTE, "%d ", next);
1569 else
1570 for (k = 0; k < group_size; ++k)
1571 dump_printf (MSG_NOTE, "%d ", k);
1572 dump_printf (MSG_NOTE, "\n");
1575 /* In case of reduction every load permutation is allowed, since the order
1576 of the reduction statements is not important (as opposed to the case of
1577 grouped stores). The only condition we need to check is that all the
1578 load nodes are of the same size and have the same permutation (and then
1579 rearrange all the nodes of the SLP instance according to this
1580 permutation). */
1582 /* Check that all the load nodes are of the same size. */
1583 /* ??? Can't we assert this? */
1584 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1585 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1586 return false;
1588 node = SLP_INSTANCE_TREE (slp_instn);
1589 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1591 /* Reduction (there are no data-refs in the root).
1592 In reduction chain the order of the loads is not important. */
1593 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1594 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1595 vect_attempt_slp_rearrange_stmts (slp_instn);
1597 /* In basic block vectorization we allow any subchain of an interleaving
1598 chain.
1599 FORNOW: not supported in loop SLP because of realignment compications. */
1600 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1602 /* Check whether the loads in an instance form a subchain and thus
1603 no permutation is necessary. */
1604 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1606 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1607 continue;
1608 bool subchain_p = true;
1609 next_load = NULL;
1610 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1612 if (j != 0
1613 && (next_load != load
1614 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1616 subchain_p = false;
1617 break;
1619 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1621 if (subchain_p)
1622 SLP_TREE_LOAD_PERMUTATION (node).release ();
1623 else
1625 stmt_vec_info group_info
1626 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1627 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1628 unsigned HOST_WIDE_INT nunits;
1629 unsigned k, maxk = 0;
1630 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1631 if (k > maxk)
1632 maxk = k;
1633 /* In BB vectorization we may not actually use a loaded vector
1634 accessing elements in excess of GROUP_SIZE. */
1635 tree vectype = STMT_VINFO_VECTYPE (group_info);
1636 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1637 || maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1640 "BB vectorization with gaps at the end of "
1641 "a load is not supported\n");
1642 return false;
1645 /* Verify the permutation can be generated. */
1646 vec<tree> tem;
1647 unsigned n_perms;
1648 if (!vect_transform_slp_perm_load (node, tem, NULL,
1649 1, slp_instn, true, &n_perms))
1651 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1652 vect_location,
1653 "unsupported load permutation\n");
1654 return false;
1658 return true;
1661 /* For loop vectorization verify we can generate the permutation. Be
1662 conservative about the vectorization factor, there are permutations
1663 that will use three vector inputs only starting from a specific factor
1664 and the vectorization factor is not yet final.
1665 ??? The SLP instance unrolling factor might not be the maximum one. */
1666 unsigned n_perms;
1667 poly_uint64 test_vf
1668 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1669 LOOP_VINFO_VECT_FACTOR
1670 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1671 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1672 if (node->load_permutation.exists ()
1673 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1674 slp_instn, true, &n_perms))
1675 return false;
1677 return true;
1681 /* Find the last store in SLP INSTANCE. */
1683 gimple *
1684 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1686 gimple *last = NULL, *stmt;
1688 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1690 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1691 if (is_pattern_stmt_p (stmt_vinfo))
1692 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1693 else
1694 last = get_later_stmt (stmt, last);
1697 return last;
1700 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1702 static void
1703 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1704 stmt_vector_for_cost *prologue_cost_vec,
1705 stmt_vector_for_cost *body_cost_vec,
1706 unsigned ncopies_for_cost,
1707 scalar_stmts_set_t* visited)
1709 unsigned i, j;
1710 slp_tree child;
1711 gimple *stmt;
1712 stmt_vec_info stmt_info;
1713 tree lhs;
1715 /* If we already costed the exact same set of scalar stmts we're done.
1716 We share the generated vector stmts for those. */
1717 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1718 return;
1720 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1722 /* Recurse down the SLP tree. */
1723 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1724 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1725 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1726 body_cost_vec, ncopies_for_cost, visited);
1728 /* Look at the first scalar stmt to determine the cost. */
1729 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1730 stmt_info = vinfo_for_stmt (stmt);
1731 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1733 vect_memory_access_type memory_access_type
1734 = (STMT_VINFO_STRIDED_P (stmt_info)
1735 ? VMAT_STRIDED_SLP
1736 : VMAT_CONTIGUOUS);
1737 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1738 vect_model_store_cost (stmt_info, ncopies_for_cost,
1739 memory_access_type, VLS_STORE,
1740 node, prologue_cost_vec, body_cost_vec);
1741 else
1743 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1744 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1746 /* If the load is permuted then the alignment is determined by
1747 the first group element not by the first scalar stmt DR. */
1748 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1749 stmt_info = vinfo_for_stmt (stmt);
1750 /* Record the cost for the permutation. */
1751 unsigned n_perms;
1752 vect_transform_slp_perm_load (node, vNULL, NULL,
1753 ncopies_for_cost, instance, true,
1754 &n_perms);
1755 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1756 stmt_info, 0, vect_body);
1757 unsigned assumed_nunits
1758 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1759 /* And adjust the number of loads performed. This handles
1760 redundancies as well as loads that are later dead. */
1761 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1762 bitmap_clear (perm);
1763 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1764 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1765 ncopies_for_cost = 0;
1766 bool load_seen = false;
1767 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1769 if (i % assumed_nunits == 0)
1771 if (load_seen)
1772 ncopies_for_cost++;
1773 load_seen = false;
1775 if (bitmap_bit_p (perm, i))
1776 load_seen = true;
1778 if (load_seen)
1779 ncopies_for_cost++;
1780 gcc_assert (ncopies_for_cost
1781 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1782 + assumed_nunits - 1) / assumed_nunits);
1783 poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1784 ncopies_for_cost *= estimated_poly_value (uf);
1786 /* Record the cost for the vector loads. */
1787 vect_model_load_cost (stmt_info, ncopies_for_cost,
1788 memory_access_type, node, prologue_cost_vec,
1789 body_cost_vec);
1790 return;
1793 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1795 /* ncopies_for_cost is the number of IVs we generate. */
1796 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1797 stmt_info, 0, vect_body);
1799 /* Prologue cost for the initial values and step vector. */
1800 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1801 CONSTANT_CLASS_P
1802 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1803 (stmt_info))
1804 ? vector_load : vec_construct,
1805 stmt_info, 0, vect_prologue);
1806 record_stmt_cost (prologue_cost_vec, 1,
1807 CONSTANT_CLASS_P
1808 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1809 ? vector_load : vec_construct,
1810 stmt_info, 0, vect_prologue);
1812 /* ??? No easy way to get at the actual number of vector stmts
1813 to be geneated and thus the derived IVs. */
1815 else
1817 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1818 stmt_info, 0, vect_body);
1819 if (SLP_TREE_TWO_OPERATORS (node))
1821 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1822 stmt_info, 0, vect_body);
1823 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1824 stmt_info, 0, vect_body);
1828 /* Push SLP node def-type to stmts. */
1829 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1830 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1831 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1832 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1834 /* Scan operands and account for prologue cost of constants/externals.
1835 ??? This over-estimates cost for multiple uses and should be
1836 re-engineered. */
1837 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1838 lhs = gimple_get_lhs (stmt);
1839 for (i = 0; i < gimple_num_ops (stmt); ++i)
1841 tree op = gimple_op (stmt, i);
1842 gimple *def_stmt;
1843 enum vect_def_type dt;
1844 if (!op || op == lhs)
1845 continue;
1846 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1848 /* Without looking at the actual initializer a vector of
1849 constants can be implemented as load from the constant pool.
1850 ??? We need to pass down stmt_info for a vector type
1851 even if it points to the wrong stmt. */
1852 if (dt == vect_constant_def)
1853 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1854 stmt_info, 0, vect_prologue);
1855 else if (dt == vect_external_def)
1856 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1857 stmt_info, 0, vect_prologue);
1861 /* Restore stmt def-types. */
1862 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1863 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1864 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1865 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1868 /* Compute the cost for the SLP instance INSTANCE. */
1870 static void
1871 vect_analyze_slp_cost (slp_instance instance, void *data)
1873 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1874 unsigned ncopies_for_cost;
1875 stmt_info_for_cost *si;
1876 unsigned i;
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_NOTE, vect_location,
1880 "=== vect_analyze_slp_cost ===\n");
1882 /* Calculate the number of vector stmts to create based on the unrolling
1883 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1884 GROUP_SIZE / NUNITS otherwise. */
1885 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1886 slp_tree node = SLP_INSTANCE_TREE (instance);
1887 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1888 /* Get the estimated vectorization factor, which is always one for
1889 basic-block vectorization. */
1890 unsigned int assumed_vf;
1891 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1892 assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
1893 else
1894 assumed_vf = 1;
1895 /* For reductions look at a reduction operand in case the reduction
1896 operation is widening like DOT_PROD or SAD. */
1897 tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
1898 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1900 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1901 switch (gimple_assign_rhs_code (stmt))
1903 case DOT_PROD_EXPR:
1904 case SAD_EXPR:
1905 vectype_for_cost = get_vectype_for_scalar_type
1906 (TREE_TYPE (gimple_assign_rhs1 (stmt)));
1907 break;
1908 default:;
1911 unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
1912 ncopies_for_cost = (least_common_multiple (assumed_nunits,
1913 group_size * assumed_vf)
1914 / assumed_nunits);
1916 prologue_cost_vec.create (10);
1917 body_cost_vec.create (10);
1918 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1919 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1920 &prologue_cost_vec, &body_cost_vec,
1921 ncopies_for_cost, visited);
1922 delete visited;
1924 /* Record the prologue costs, which were delayed until we were
1925 sure that SLP was successful. */
1926 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1928 struct _stmt_vec_info *stmt_info
1929 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1930 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1931 si->misalign, vect_prologue);
1934 /* Record the instance's instructions in the target cost model. */
1935 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1937 struct _stmt_vec_info *stmt_info
1938 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1939 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1940 si->misalign, vect_body);
1943 prologue_cost_vec.release ();
1944 body_cost_vec.release ();
1947 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1948 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1949 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1950 containing the remainder.
1951 Return the first stmt in the second group. */
1953 static gimple *
1954 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1956 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1957 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1958 gcc_assert (group1_size > 0);
1959 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1960 gcc_assert (group2_size > 0);
1961 GROUP_SIZE (first_vinfo) = group1_size;
1963 gimple *stmt = first_stmt;
1964 for (unsigned i = group1_size; i > 1; i--)
1966 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1967 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1969 /* STMT is now the last element of the first group. */
1970 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1971 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1973 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1974 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1976 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1977 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1980 /* For the second group, the GROUP_GAP is that before the original group,
1981 plus skipping over the first vector. */
1982 GROUP_GAP (vinfo_for_stmt (group2)) =
1983 GROUP_GAP (first_vinfo) + group1_size;
1985 /* GROUP_GAP of the first group now has to skip over the second group too. */
1986 GROUP_GAP (first_vinfo) += group2_size;
1988 if (dump_enabled_p ())
1989 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1990 group1_size, group2_size);
1992 return group2;
1995 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1996 statements and a vector of NUNITS elements. */
1998 static poly_uint64
1999 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2001 return exact_div (common_multiple (nunits, group_size), group_size);
2004 /* Analyze an SLP instance starting from a group of grouped stores. Call
2005 vect_build_slp_tree to build a tree of packed stmts if possible.
2006 Return FALSE if it's impossible to SLP any stmt in the loop. */
2008 static bool
2009 vect_analyze_slp_instance (vec_info *vinfo,
2010 gimple *stmt, unsigned max_tree_size)
2012 slp_instance new_instance;
2013 slp_tree node;
2014 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2015 tree vectype, scalar_type = NULL_TREE;
2016 gimple *next;
2017 unsigned int i;
2018 vec<slp_tree> loads;
2019 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2020 vec<gimple *> scalar_stmts;
2022 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2024 if (dr)
2026 scalar_type = TREE_TYPE (DR_REF (dr));
2027 vectype = get_vectype_for_scalar_type (scalar_type);
2029 else
2031 gcc_assert (is_a <loop_vec_info> (vinfo));
2032 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2035 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2037 else
2039 gcc_assert (is_a <loop_vec_info> (vinfo));
2040 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2041 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2044 if (!vectype)
2046 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049 "Build SLP failed: unsupported data-type ");
2050 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2051 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2054 return false;
2056 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2058 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2059 scalar_stmts.create (group_size);
2060 next = stmt;
2061 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2063 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2064 while (next)
2066 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2067 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2068 scalar_stmts.safe_push (
2069 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2070 else
2071 scalar_stmts.safe_push (next);
2072 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2074 /* Mark the first element of the reduction chain as reduction to properly
2075 transform the node. In the reduction analysis phase only the last
2076 element of the chain is marked as reduction. */
2077 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2078 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2080 else
2082 /* Collect reduction statements. */
2083 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2084 for (i = 0; reductions.iterate (i, &next); i++)
2085 scalar_stmts.safe_push (next);
2088 loads.create (group_size);
2090 /* Build the tree for the SLP instance. */
2091 bool *matches = XALLOCAVEC (bool, group_size);
2092 unsigned npermutes = 0;
2093 bst_fail = new scalar_stmts_set_t ();
2094 poly_uint64 max_nunits = nunits;
2095 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2096 &max_nunits, &loads, matches, &npermutes,
2097 NULL, max_tree_size);
2098 delete bst_fail;
2099 if (node != NULL)
2101 /* Calculate the unrolling factor based on the smallest type. */
2102 poly_uint64 unrolling_factor
2103 = calculate_unrolling_factor (max_nunits, group_size);
2105 if (maybe_ne (unrolling_factor, 1U)
2106 && is_a <bb_vec_info> (vinfo))
2108 unsigned HOST_WIDE_INT const_max_nunits;
2109 if (!max_nunits.is_constant (&const_max_nunits)
2110 || const_max_nunits > group_size)
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114 "Build SLP failed: store group "
2115 "size not a multiple of the vector size "
2116 "in basic block SLP\n");
2117 vect_free_slp_tree (node);
2118 loads.release ();
2119 return false;
2121 /* Fatal mismatch. */
2122 matches[group_size / const_max_nunits * const_max_nunits] = false;
2123 vect_free_slp_tree (node);
2124 loads.release ();
2126 else
2128 /* Create a new SLP instance. */
2129 new_instance = XNEW (struct _slp_instance);
2130 SLP_INSTANCE_TREE (new_instance) = node;
2131 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2132 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2133 SLP_INSTANCE_LOADS (new_instance) = loads;
2135 /* Compute the load permutation. */
2136 slp_tree load_node;
2137 bool loads_permuted = false;
2138 FOR_EACH_VEC_ELT (loads, i, load_node)
2140 vec<unsigned> load_permutation;
2141 int j;
2142 gimple *load, *first_stmt;
2143 bool this_load_permuted = false;
2144 load_permutation.create (group_size);
2145 first_stmt = GROUP_FIRST_ELEMENT
2146 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2147 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2149 int load_place = vect_get_place_in_interleaving_chain
2150 (load, first_stmt);
2151 gcc_assert (load_place != -1);
2152 if (load_place != j)
2153 this_load_permuted = true;
2154 load_permutation.safe_push (load_place);
2156 if (!this_load_permuted
2157 /* The load requires permutation when unrolling exposes
2158 a gap either because the group is larger than the SLP
2159 group-size or because there is a gap between the groups. */
2160 && (known_eq (unrolling_factor, 1U)
2161 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2162 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2164 load_permutation.release ();
2165 continue;
2167 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2168 loads_permuted = true;
2171 if (loads_permuted)
2173 if (!vect_supported_load_permutation_p (new_instance))
2175 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "Build SLP failed: unsupported load "
2179 "permutation ");
2180 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2181 TDF_SLIM, stmt, 0);
2183 vect_free_slp_instance (new_instance);
2184 return false;
2188 /* If the loads and stores can be handled with load/store-lan
2189 instructions do not generate this SLP instance. */
2190 if (is_a <loop_vec_info> (vinfo)
2191 && loads_permuted
2192 && dr && vect_store_lanes_supported (vectype, group_size))
2194 slp_tree load_node;
2195 FOR_EACH_VEC_ELT (loads, i, load_node)
2197 gimple *first_stmt = GROUP_FIRST_ELEMENT
2198 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2199 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2200 /* Use SLP for strided accesses (or if we
2201 can't load-lanes). */
2202 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2203 || ! vect_load_lanes_supported
2204 (STMT_VINFO_VECTYPE (stmt_vinfo),
2205 GROUP_SIZE (stmt_vinfo)))
2206 break;
2208 if (i == loads.length ())
2210 if (dump_enabled_p ())
2211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2212 "Built SLP cancelled: can use "
2213 "load/store-lanes\n");
2214 vect_free_slp_instance (new_instance);
2215 return false;
2219 vinfo->slp_instances.safe_push (new_instance);
2221 if (dump_enabled_p ())
2223 dump_printf_loc (MSG_NOTE, vect_location,
2224 "Final SLP tree for instance:\n");
2225 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2228 return true;
2231 else
2233 /* Failed to SLP. */
2234 /* Free the allocated memory. */
2235 scalar_stmts.release ();
2236 loads.release ();
2239 /* For basic block SLP, try to break the group up into multiples of the
2240 vector size. */
2241 unsigned HOST_WIDE_INT const_nunits;
2242 if (is_a <bb_vec_info> (vinfo)
2243 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2244 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2245 && nunits.is_constant (&const_nunits))
2247 /* We consider breaking the group only on VF boundaries from the existing
2248 start. */
2249 for (i = 0; i < group_size; i++)
2250 if (!matches[i]) break;
2252 if (i >= const_nunits && i < group_size)
2254 /* Split into two groups at the first vector boundary before i. */
2255 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2256 unsigned group1_size = i & ~(const_nunits - 1);
2258 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2259 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2260 /* If the first non-match was in the middle of a vector,
2261 skip the rest of that vector. */
2262 if (group1_size < i)
2264 i = group1_size + const_nunits;
2265 if (i < group_size)
2266 rest = vect_split_slp_store_group (rest, const_nunits);
2268 if (i < group_size)
2269 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2270 return res;
2272 /* Even though the first vector did not all match, we might be able to SLP
2273 (some) of the remainder. FORNOW ignore this possibility. */
2276 return false;
2280 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2281 trees of packed scalar stmts if SLP is possible. */
2283 bool
2284 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2286 unsigned int i;
2287 gimple *first_element;
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2292 /* Find SLP sequences starting from groups of grouped stores. */
2293 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2294 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2296 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2298 if (loop_vinfo->reduction_chains.length () > 0)
2300 /* Find SLP sequences starting from reduction chains. */
2301 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2302 if (! vect_analyze_slp_instance (vinfo, first_element,
2303 max_tree_size))
2305 /* Dissolve reduction chain group. */
2306 gimple *next, *stmt = first_element;
2307 while (stmt)
2309 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2310 next = GROUP_NEXT_ELEMENT (vinfo);
2311 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2312 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2313 stmt = next;
2315 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2316 = vect_internal_def;
2320 /* Find SLP sequences starting from groups of reductions. */
2321 if (loop_vinfo->reductions.length () > 1)
2322 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2323 max_tree_size);
2326 return true;
2330 /* For each possible SLP instance decide whether to SLP it and calculate overall
2331 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2332 least one instance. */
2334 bool
2335 vect_make_slp_decision (loop_vec_info loop_vinfo)
2337 unsigned int i;
2338 poly_uint64 unrolling_factor = 1;
2339 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2340 slp_instance instance;
2341 int decided_to_slp = 0;
2343 if (dump_enabled_p ())
2344 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2345 "\n");
2347 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2349 /* FORNOW: SLP if you can. */
2350 /* All unroll factors have the form current_vector_size * X for some
2351 rational X, so they must have a common multiple. */
2352 unrolling_factor
2353 = force_common_multiple (unrolling_factor,
2354 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2356 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2357 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2358 loop-based vectorization. Such stmts will be marked as HYBRID. */
2359 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2360 decided_to_slp++;
2363 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2365 if (decided_to_slp && dump_enabled_p ())
2367 dump_printf_loc (MSG_NOTE, vect_location,
2368 "Decided to SLP %d instances. Unrolling factor ",
2369 decided_to_slp);
2370 dump_dec (MSG_NOTE, unrolling_factor);
2371 dump_printf (MSG_NOTE, "\n");
2374 return (decided_to_slp > 0);
2378 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2379 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2381 static void
2382 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2384 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2385 imm_use_iterator imm_iter;
2386 gimple *use_stmt;
2387 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2388 slp_tree child;
2389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2390 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2391 int j;
2393 /* Propagate hybrid down the SLP tree. */
2394 if (stype == hybrid)
2396 else if (HYBRID_SLP_STMT (stmt_vinfo))
2397 stype = hybrid;
2398 else
2400 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2401 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2402 /* If we get a pattern stmt here we have to use the LHS of the
2403 original stmt for immediate uses. */
2404 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2405 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2406 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2407 tree def;
2408 if (gimple_code (stmt) == GIMPLE_PHI)
2409 def = gimple_phi_result (stmt);
2410 else
2411 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2412 if (def)
2413 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2415 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2416 continue;
2417 use_vinfo = vinfo_for_stmt (use_stmt);
2418 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2419 && STMT_VINFO_RELATED_STMT (use_vinfo))
2420 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2421 if (!STMT_SLP_TYPE (use_vinfo)
2422 && (STMT_VINFO_RELEVANT (use_vinfo)
2423 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2424 && !(gimple_code (use_stmt) == GIMPLE_PHI
2425 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2427 if (dump_enabled_p ())
2429 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2430 "def in non-SLP stmt: ");
2431 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2433 stype = hybrid;
2438 if (stype == hybrid
2439 && !HYBRID_SLP_STMT (stmt_vinfo))
2441 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2444 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2446 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2449 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2450 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2451 vect_detect_hybrid_slp_stmts (child, i, stype);
2454 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2456 static tree
2457 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2459 walk_stmt_info *wi = (walk_stmt_info *)data;
2460 struct loop *loopp = (struct loop *)wi->info;
2462 if (wi->is_lhs)
2463 return NULL_TREE;
2465 if (TREE_CODE (*tp) == SSA_NAME
2466 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2468 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2469 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2470 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2472 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2477 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2481 return NULL_TREE;
2484 static tree
2485 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2486 walk_stmt_info *)
2488 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2489 /* If the stmt is in a SLP instance then this isn't a reason
2490 to mark use definitions in other SLP instances as hybrid. */
2491 if (! STMT_SLP_TYPE (use_vinfo)
2492 && (STMT_VINFO_RELEVANT (use_vinfo)
2493 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2494 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2495 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2497 else
2498 *handled = true;
2499 return NULL_TREE;
2502 /* Find stmts that must be both vectorized and SLPed. */
2504 void
2505 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2507 unsigned int i;
2508 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2509 slp_instance instance;
2511 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2513 "\n");
2515 /* First walk all pattern stmt in the loop and mark defs of uses as
2516 hybrid because immediate uses in them are not recorded. */
2517 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2519 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2520 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2521 gsi_next (&gsi))
2523 gimple *stmt = gsi_stmt (gsi);
2524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2525 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2527 walk_stmt_info wi;
2528 memset (&wi, 0, sizeof (wi));
2529 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2530 gimple_stmt_iterator gsi2
2531 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2532 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2533 vect_detect_hybrid_slp_1, &wi);
2534 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2535 vect_detect_hybrid_slp_2,
2536 vect_detect_hybrid_slp_1, &wi);
2541 /* Then walk the SLP instance trees marking stmts with uses in
2542 non-SLP stmts as hybrid, also propagating hybrid down the
2543 SLP tree, collecting the above info on-the-fly. */
2544 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2546 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2547 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2548 i, pure_slp);
2553 /* Initialize a bb_vec_info struct for the statements between
2554 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2556 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2557 gimple_stmt_iterator region_end_in)
2558 : vec_info (vec_info::bb, init_cost (NULL)),
2559 bb (gsi_bb (region_begin_in)),
2560 region_begin (region_begin_in),
2561 region_end (region_end_in)
2563 gimple_stmt_iterator gsi;
2565 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2566 gsi_next (&gsi))
2568 gimple *stmt = gsi_stmt (gsi);
2569 gimple_set_uid (stmt, 0);
2570 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2573 bb->aux = this;
2577 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2578 stmts in the basic block. */
2580 _bb_vec_info::~_bb_vec_info ()
2582 for (gimple_stmt_iterator si = region_begin;
2583 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2585 gimple *stmt = gsi_stmt (si);
2586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2588 if (stmt_info)
2589 /* Free stmt_vec_info. */
2590 free_stmt_vec_info (stmt);
2592 /* Reset region marker. */
2593 gimple_set_uid (stmt, -1);
2596 bb->aux = NULL;
2600 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2601 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2603 Return true if the operations are supported. */
2605 static bool
2606 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2607 slp_instance node_instance)
2609 bool dummy;
2610 int i, j;
2611 gimple *stmt;
2612 slp_tree child;
2614 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2615 return true;
2617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2618 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2619 return false;
2621 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2623 gcc_assert (stmt_info);
2624 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2626 /* For BB vectorization vector types are assigned here.
2627 Memory accesses already got their vector type assigned
2628 in vect_analyze_data_refs. */
2629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2630 if (bb_vinfo
2631 && ! STMT_VINFO_DATA_REF (stmt_info))
2633 gcc_assert (PURE_SLP_STMT (stmt_info));
2635 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2636 if (dump_enabled_p ())
2638 dump_printf_loc (MSG_NOTE, vect_location,
2639 "get vectype for scalar type: ");
2640 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2641 dump_printf (MSG_NOTE, "\n");
2644 tree vectype = get_vectype_for_scalar_type (scalar_type);
2645 if (!vectype)
2647 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 "not SLPed: unsupported data-type ");
2651 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2652 scalar_type);
2653 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2655 return false;
2658 if (dump_enabled_p ())
2660 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2661 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2662 dump_printf (MSG_NOTE, "\n");
2665 gimple *sstmt;
2666 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2667 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2670 /* Calculate the number of vector statements to be created for the
2671 scalar stmts in this node. For SLP reductions it is equal to the
2672 number of vector statements in the children (which has already been
2673 calculated by the recursive call). Otherwise it is the number of
2674 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2675 VF divided by the number of elements in a vector. */
2676 if (GROUP_FIRST_ELEMENT (stmt_info)
2677 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2678 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2679 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2680 else
2682 poly_uint64 vf;
2683 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2684 vf = loop_vinfo->vectorization_factor;
2685 else
2686 vf = 1;
2687 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2690 = vect_get_num_vectors (vf * group_size, vectype);
2693 /* Push SLP node def-type to stmt operands. */
2694 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2695 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2696 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2697 = SLP_TREE_DEF_TYPE (child);
2698 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2699 /* Restore def-types. */
2700 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2701 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2702 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2703 = vect_internal_def;
2704 if (! res)
2705 return false;
2707 return true;
2711 /* Analyze statements in SLP instances of VINFO. Return true if the
2712 operations are supported. */
2714 bool
2715 vect_slp_analyze_operations (vec_info *vinfo)
2717 slp_instance instance;
2718 int i;
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_NOTE, vect_location,
2722 "=== vect_slp_analyze_operations ===\n");
2724 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2726 if (!vect_slp_analyze_node_operations (vinfo,
2727 SLP_INSTANCE_TREE (instance),
2728 instance))
2730 dump_printf_loc (MSG_NOTE, vect_location,
2731 "removing SLP instance operations starting from: ");
2732 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2733 SLP_TREE_SCALAR_STMTS
2734 (SLP_INSTANCE_TREE (instance))[0], 0);
2735 vect_free_slp_instance (instance);
2736 vinfo->slp_instances.ordered_remove (i);
2738 else
2740 /* Compute the costs of the SLP instance. */
2741 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2742 i++;
2746 return !vinfo->slp_instances.is_empty ();
2750 /* Compute the scalar cost of the SLP node NODE and its children
2751 and return it. Do not account defs that are marked in LIFE and
2752 update LIFE according to uses of NODE. */
2754 static unsigned
2755 vect_bb_slp_scalar_cost (basic_block bb,
2756 slp_tree node, vec<bool, va_heap> *life)
2758 unsigned scalar_cost = 0;
2759 unsigned i;
2760 gimple *stmt;
2761 slp_tree child;
2763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2765 unsigned stmt_cost;
2766 ssa_op_iter op_iter;
2767 def_operand_p def_p;
2768 stmt_vec_info stmt_info;
2770 if ((*life)[i])
2771 continue;
2773 /* If there is a non-vectorized use of the defs then the scalar
2774 stmt is kept live in which case we do not account it or any
2775 required defs in the SLP children in the scalar cost. This
2776 way we make the vectorization more costly when compared to
2777 the scalar cost. */
2778 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2780 imm_use_iterator use_iter;
2781 gimple *use_stmt;
2782 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2783 if (!is_gimple_debug (use_stmt)
2784 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2785 use_stmt)
2786 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2788 (*life)[i] = true;
2789 BREAK_FROM_IMM_USE_STMT (use_iter);
2792 if ((*life)[i])
2793 continue;
2795 /* Count scalar stmts only once. */
2796 if (gimple_visited_p (stmt))
2797 continue;
2798 gimple_set_visited (stmt, true);
2800 stmt_info = vinfo_for_stmt (stmt);
2801 if (STMT_VINFO_DATA_REF (stmt_info))
2803 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2804 stmt_cost = vect_get_stmt_cost (scalar_load);
2805 else
2806 stmt_cost = vect_get_stmt_cost (scalar_store);
2808 else
2809 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2811 scalar_cost += stmt_cost;
2814 auto_vec<bool, 20> subtree_life;
2815 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2817 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2819 /* Do not directly pass LIFE to the recursive call, copy it to
2820 confine changes in the callee to the current child/subtree. */
2821 subtree_life.safe_splice (*life);
2822 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2823 subtree_life.truncate (0);
2827 return scalar_cost;
2830 /* Check if vectorization of the basic block is profitable. */
2832 static bool
2833 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2835 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2836 slp_instance instance;
2837 int i;
2838 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2839 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2841 /* Calculate scalar cost. */
2842 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2844 auto_vec<bool, 20> life;
2845 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2846 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2847 SLP_INSTANCE_TREE (instance),
2848 &life);
2851 /* Unset visited flag. */
2852 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2853 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2854 gimple_set_visited (gsi_stmt (gsi), false);
2856 /* Complete the target-specific cost calculation. */
2857 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2858 &vec_inside_cost, &vec_epilogue_cost);
2860 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2862 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2865 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2866 vec_inside_cost);
2867 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2868 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2869 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2872 /* Vectorization is profitable if its cost is more than the cost of scalar
2873 version. Note that we err on the vector side for equal cost because
2874 the cost estimate is otherwise quite pessimistic (constant uses are
2875 free on the scalar side but cost a load on the vector side for
2876 example). */
2877 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2878 return false;
2880 return true;
2883 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2884 if so and sets fatal to true if failure is independent of
2885 current_vector_size. */
2887 static bb_vec_info
2888 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2889 gimple_stmt_iterator region_end,
2890 vec<data_reference_p> datarefs, int n_stmts,
2891 bool &fatal)
2893 bb_vec_info bb_vinfo;
2894 slp_instance instance;
2895 int i;
2896 poly_uint64 min_vf = 2;
2898 /* The first group of checks is independent of the vector size. */
2899 fatal = true;
2901 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2903 if (dump_enabled_p ())
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2905 "not vectorized: too many instructions in "
2906 "basic block.\n");
2907 free_data_refs (datarefs);
2908 return NULL;
2911 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2912 if (!bb_vinfo)
2913 return NULL;
2915 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2917 /* Analyze the data references. */
2919 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2923 "not vectorized: unhandled data-ref in basic "
2924 "block.\n");
2926 delete bb_vinfo;
2927 return NULL;
2930 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2932 if (dump_enabled_p ())
2933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2934 "not vectorized: not enough data-refs in "
2935 "basic block.\n");
2937 delete bb_vinfo;
2938 return NULL;
2941 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2943 if (dump_enabled_p ())
2944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2945 "not vectorized: unhandled data access in "
2946 "basic block.\n");
2948 delete bb_vinfo;
2949 return NULL;
2952 /* If there are no grouped stores in the region there is no need
2953 to continue with pattern recog as vect_analyze_slp will fail
2954 anyway. */
2955 if (bb_vinfo->grouped_stores.is_empty ())
2957 if (dump_enabled_p ())
2958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2959 "not vectorized: no grouped stores in "
2960 "basic block.\n");
2962 delete bb_vinfo;
2963 return NULL;
2966 /* While the rest of the analysis below depends on it in some way. */
2967 fatal = false;
2969 vect_pattern_recog (bb_vinfo);
2971 /* Check the SLP opportunities in the basic block, analyze and build SLP
2972 trees. */
2973 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2975 if (dump_enabled_p ())
2977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2978 "Failed to SLP the basic block.\n");
2979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2980 "not vectorized: failed to find SLP opportunities "
2981 "in basic block.\n");
2984 delete bb_vinfo;
2985 return NULL;
2988 vect_record_base_alignments (bb_vinfo);
2990 /* Analyze and verify the alignment of data references and the
2991 dependence in the SLP instances. */
2992 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2994 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2995 || ! vect_slp_analyze_instance_dependence (instance))
2997 dump_printf_loc (MSG_NOTE, vect_location,
2998 "removing SLP instance operations starting from: ");
2999 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3000 SLP_TREE_SCALAR_STMTS
3001 (SLP_INSTANCE_TREE (instance))[0], 0);
3002 vect_free_slp_instance (instance);
3003 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3004 continue;
3007 /* Mark all the statements that we want to vectorize as pure SLP and
3008 relevant. */
3009 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3010 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3012 i++;
3014 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3016 delete bb_vinfo;
3017 return NULL;
3020 if (!vect_slp_analyze_operations (bb_vinfo))
3022 if (dump_enabled_p ())
3023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3024 "not vectorized: bad operation in basic block.\n");
3026 delete bb_vinfo;
3027 return NULL;
3030 /* Cost model: check if the vectorization is worthwhile. */
3031 if (!unlimited_cost_model (NULL)
3032 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3034 if (dump_enabled_p ())
3035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3036 "not vectorized: vectorization is not "
3037 "profitable.\n");
3039 delete bb_vinfo;
3040 return NULL;
3043 if (dump_enabled_p ())
3044 dump_printf_loc (MSG_NOTE, vect_location,
3045 "Basic block will be vectorized using SLP\n");
3047 return bb_vinfo;
3051 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3052 true if anything in the basic-block was vectorized. */
3054 bool
3055 vect_slp_bb (basic_block bb)
3057 bb_vec_info bb_vinfo;
3058 gimple_stmt_iterator gsi;
3059 bool any_vectorized = false;
3060 auto_vector_sizes vector_sizes;
3062 if (dump_enabled_p ())
3063 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3065 /* Autodetect first vector size we try. */
3066 current_vector_size = 0;
3067 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3068 unsigned int next_size = 0;
3070 gsi = gsi_start_bb (bb);
3072 poly_uint64 autodetected_vector_size = 0;
3073 while (1)
3075 if (gsi_end_p (gsi))
3076 break;
3078 gimple_stmt_iterator region_begin = gsi;
3079 vec<data_reference_p> datarefs = vNULL;
3080 int insns = 0;
3082 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3084 gimple *stmt = gsi_stmt (gsi);
3085 if (is_gimple_debug (stmt))
3086 continue;
3087 insns++;
3089 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3090 vect_location = gimple_location (stmt);
3092 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3093 break;
3096 /* Skip leading unhandled stmts. */
3097 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3099 gsi_next (&gsi);
3100 continue;
3103 gimple_stmt_iterator region_end = gsi;
3105 bool vectorized = false;
3106 bool fatal = false;
3107 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3108 datarefs, insns, fatal);
3109 if (bb_vinfo
3110 && dbg_cnt (vect_slp))
3112 if (dump_enabled_p ())
3113 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3115 vect_schedule_slp (bb_vinfo);
3117 if (dump_enabled_p ())
3118 dump_printf_loc (MSG_NOTE, vect_location,
3119 "basic block part vectorized\n");
3121 vectorized = true;
3123 delete bb_vinfo;
3125 any_vectorized |= vectorized;
3127 if (next_size == 0)
3128 autodetected_vector_size = current_vector_size;
3130 if (next_size < vector_sizes.length ()
3131 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3132 next_size += 1;
3134 if (vectorized
3135 || next_size == vector_sizes.length ()
3136 || known_eq (current_vector_size, 0U)
3137 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3138 vector sizes will fail do not bother iterating. */
3139 || fatal)
3141 if (gsi_end_p (region_end))
3142 break;
3144 /* Skip the unhandled stmt. */
3145 gsi_next (&gsi);
3147 /* And reset vector sizes. */
3148 current_vector_size = 0;
3149 next_size = 0;
3151 else
3153 /* Try the next biggest vector size. */
3154 current_vector_size = vector_sizes[next_size++];
3155 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_NOTE, vect_location,
3158 "***** Re-trying analysis with "
3159 "vector size ");
3160 dump_dec (MSG_NOTE, current_vector_size);
3161 dump_printf (MSG_NOTE, "\n");
3164 /* Start over. */
3165 gsi = region_begin;
3169 return any_vectorized;
3173 /* Return 1 if vector type of boolean constant which is OPNUM
3174 operand in statement STMT is a boolean vector. */
3176 static bool
3177 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3179 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3180 enum tree_code code = gimple_expr_code (stmt);
3181 tree op, vectype;
3182 gimple *def_stmt;
3183 enum vect_def_type dt;
3185 /* For comparison and COND_EXPR type is chosen depending
3186 on the other comparison operand. */
3187 if (TREE_CODE_CLASS (code) == tcc_comparison)
3189 if (opnum)
3190 op = gimple_assign_rhs1 (stmt);
3191 else
3192 op = gimple_assign_rhs2 (stmt);
3194 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3195 &dt, &vectype))
3196 gcc_unreachable ();
3198 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3201 if (code == COND_EXPR)
3203 tree cond = gimple_assign_rhs1 (stmt);
3205 if (TREE_CODE (cond) == SSA_NAME)
3206 op = cond;
3207 else if (opnum)
3208 op = TREE_OPERAND (cond, 1);
3209 else
3210 op = TREE_OPERAND (cond, 0);
3212 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3213 &dt, &vectype))
3214 gcc_unreachable ();
3216 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3219 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3223 /* For constant and loop invariant defs of SLP_NODE this function returns
3224 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3225 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3226 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3227 REDUC_INDEX is the index of the reduction operand in the statements, unless
3228 it is -1. */
3230 static void
3231 vect_get_constant_vectors (tree op, slp_tree slp_node,
3232 vec<tree> *vec_oprnds,
3233 unsigned int op_num, unsigned int number_of_vectors)
3235 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3236 gimple *stmt = stmts[0];
3237 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3238 unsigned nunits;
3239 tree vec_cst;
3240 unsigned j, number_of_places_left_in_vector;
3241 tree vector_type;
3242 tree vop;
3243 int group_size = stmts.length ();
3244 unsigned int vec_num, i;
3245 unsigned number_of_copies = 1;
3246 vec<tree> voprnds;
3247 voprnds.create (number_of_vectors);
3248 bool constant_p, is_store;
3249 tree neutral_op = NULL;
3250 enum tree_code code = gimple_expr_code (stmt);
3251 gimple_seq ctor_seq = NULL;
3253 /* Check if vector type is a boolean vector. */
3254 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3255 && vect_mask_constant_operand_p (stmt, op_num))
3256 vector_type
3257 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3258 else
3259 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3260 /* Enforced by vect_get_and_check_slp_defs. */
3261 nunits = TYPE_VECTOR_SUBPARTS (vector_type).to_constant ();
3263 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3265 is_store = true;
3266 op = gimple_assign_rhs1 (stmt);
3268 else
3269 is_store = false;
3271 gcc_assert (op);
3273 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3274 created vectors. It is greater than 1 if unrolling is performed.
3276 For example, we have two scalar operands, s1 and s2 (e.g., group of
3277 strided accesses of size two), while NUNITS is four (i.e., four scalars
3278 of this type can be packed in a vector). The output vector will contain
3279 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3280 will be 2).
3282 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3283 containing the operands.
3285 For example, NUNITS is four as before, and the group size is 8
3286 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3287 {s5, s6, s7, s8}. */
3289 number_of_copies = nunits * number_of_vectors / group_size;
3291 number_of_places_left_in_vector = nunits;
3292 constant_p = true;
3293 tree_vector_builder elts (vector_type, nunits, 1);
3294 elts.quick_grow (nunits);
3295 bool place_after_defs = false;
3296 for (j = 0; j < number_of_copies; j++)
3298 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3300 if (is_store)
3301 op = gimple_assign_rhs1 (stmt);
3302 else
3304 switch (code)
3306 case COND_EXPR:
3308 tree cond = gimple_assign_rhs1 (stmt);
3309 if (TREE_CODE (cond) == SSA_NAME)
3310 op = gimple_op (stmt, op_num + 1);
3311 else if (op_num == 0 || op_num == 1)
3312 op = TREE_OPERAND (cond, op_num);
3313 else
3315 if (op_num == 2)
3316 op = gimple_assign_rhs2 (stmt);
3317 else
3318 op = gimple_assign_rhs3 (stmt);
3321 break;
3323 case CALL_EXPR:
3324 op = gimple_call_arg (stmt, op_num);
3325 break;
3327 case LSHIFT_EXPR:
3328 case RSHIFT_EXPR:
3329 case LROTATE_EXPR:
3330 case RROTATE_EXPR:
3331 op = gimple_op (stmt, op_num + 1);
3332 /* Unlike the other binary operators, shifts/rotates have
3333 the shift count being int, instead of the same type as
3334 the lhs, so make sure the scalar is the right type if
3335 we are dealing with vectors of
3336 long long/long/short/char. */
3337 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3338 op = fold_convert (TREE_TYPE (vector_type), op);
3339 break;
3341 default:
3342 op = gimple_op (stmt, op_num + 1);
3343 break;
3347 /* Create 'vect_ = {op0,op1,...,opn}'. */
3348 number_of_places_left_in_vector--;
3349 tree orig_op = op;
3350 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3352 if (CONSTANT_CLASS_P (op))
3354 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3356 /* Can't use VIEW_CONVERT_EXPR for booleans because
3357 of possibly different sizes of scalar value and
3358 vector element. */
3359 if (integer_zerop (op))
3360 op = build_int_cst (TREE_TYPE (vector_type), 0);
3361 else if (integer_onep (op))
3362 op = build_all_ones_cst (TREE_TYPE (vector_type));
3363 else
3364 gcc_unreachable ();
3366 else
3367 op = fold_unary (VIEW_CONVERT_EXPR,
3368 TREE_TYPE (vector_type), op);
3369 gcc_assert (op && CONSTANT_CLASS_P (op));
3371 else
3373 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3374 gimple *init_stmt;
3375 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3377 tree true_val
3378 = build_all_ones_cst (TREE_TYPE (vector_type));
3379 tree false_val
3380 = build_zero_cst (TREE_TYPE (vector_type));
3381 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3382 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3383 op, true_val,
3384 false_val);
3386 else
3388 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3389 op);
3390 init_stmt
3391 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3392 op);
3394 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3395 op = new_temp;
3398 elts[number_of_places_left_in_vector] = op;
3399 if (!CONSTANT_CLASS_P (op))
3400 constant_p = false;
3401 if (TREE_CODE (orig_op) == SSA_NAME
3402 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3403 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3404 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3405 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3406 place_after_defs = true;
3408 if (number_of_places_left_in_vector == 0)
3410 if (constant_p)
3411 vec_cst = elts.build ();
3412 else
3414 vec<constructor_elt, va_gc> *v;
3415 unsigned k;
3416 vec_alloc (v, nunits);
3417 for (k = 0; k < nunits; ++k)
3418 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3419 vec_cst = build_constructor (vector_type, v);
3421 tree init;
3422 gimple_stmt_iterator gsi;
3423 if (place_after_defs)
3425 gsi = gsi_for_stmt
3426 (vect_find_last_scalar_stmt_in_slp (slp_node));
3427 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3429 else
3430 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3431 if (ctor_seq != NULL)
3433 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3434 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3435 GSI_SAME_STMT);
3436 ctor_seq = NULL;
3438 voprnds.quick_push (init);
3439 place_after_defs = false;
3440 number_of_places_left_in_vector = nunits;
3441 constant_p = true;
3442 elts.new_vector (vector_type, nunits, 1);
3443 elts.quick_grow (nunits);
3448 /* Since the vectors are created in the reverse order, we should invert
3449 them. */
3450 vec_num = voprnds.length ();
3451 for (j = vec_num; j != 0; j--)
3453 vop = voprnds[j - 1];
3454 vec_oprnds->quick_push (vop);
3457 voprnds.release ();
3459 /* In case that VF is greater than the unrolling factor needed for the SLP
3460 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3461 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3462 to replicate the vectors. */
3463 while (number_of_vectors > vec_oprnds->length ())
3465 tree neutral_vec = NULL;
3467 if (neutral_op)
3469 if (!neutral_vec)
3470 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3472 vec_oprnds->quick_push (neutral_vec);
3474 else
3476 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3477 vec_oprnds->quick_push (vop);
3483 /* Get vectorized definitions from SLP_NODE that contains corresponding
3484 vectorized def-stmts. */
3486 static void
3487 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3489 tree vec_oprnd;
3490 gimple *vec_def_stmt;
3491 unsigned int i;
3493 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3495 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3497 gcc_assert (vec_def_stmt);
3498 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3499 vec_oprnd = gimple_phi_result (vec_def_stmt);
3500 else
3501 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3502 vec_oprnds->quick_push (vec_oprnd);
3507 /* Get vectorized definitions for SLP_NODE.
3508 If the scalar definitions are loop invariants or constants, collect them and
3509 call vect_get_constant_vectors() to create vector stmts.
3510 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3511 must be stored in the corresponding child of SLP_NODE, and we call
3512 vect_get_slp_vect_defs () to retrieve them. */
3514 void
3515 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3516 vec<vec<tree> > *vec_oprnds)
3518 gimple *first_stmt;
3519 int number_of_vects = 0, i;
3520 unsigned int child_index = 0;
3521 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3522 slp_tree child = NULL;
3523 vec<tree> vec_defs;
3524 tree oprnd;
3525 bool vectorized_defs;
3527 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3528 FOR_EACH_VEC_ELT (ops, i, oprnd)
3530 /* For each operand we check if it has vectorized definitions in a child
3531 node or we need to create them (for invariants and constants). We
3532 check if the LHS of the first stmt of the next child matches OPRND.
3533 If it does, we found the correct child. Otherwise, we call
3534 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3535 to check this child node for the next operand. */
3536 vectorized_defs = false;
3537 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3539 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3541 /* We have to check both pattern and original def, if available. */
3542 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3544 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3545 gimple *related
3546 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3547 tree first_def_op;
3549 if (gimple_code (first_def) == GIMPLE_PHI)
3550 first_def_op = gimple_phi_result (first_def);
3551 else
3552 first_def_op = gimple_get_lhs (first_def);
3553 if (operand_equal_p (oprnd, first_def_op, 0)
3554 || (related
3555 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3557 /* The number of vector defs is determined by the number of
3558 vector statements in the node from which we get those
3559 statements. */
3560 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3561 vectorized_defs = true;
3562 child_index++;
3565 else
3566 child_index++;
3569 if (!vectorized_defs)
3571 if (i == 0)
3573 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3574 /* Number of vector stmts was calculated according to LHS in
3575 vect_schedule_slp_instance (), fix it by replacing LHS with
3576 RHS, if necessary. See vect_get_smallest_scalar_type () for
3577 details. */
3578 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3579 &rhs_size_unit);
3580 if (rhs_size_unit != lhs_size_unit)
3582 number_of_vects *= rhs_size_unit;
3583 number_of_vects /= lhs_size_unit;
3588 /* Allocate memory for vectorized defs. */
3589 vec_defs = vNULL;
3590 vec_defs.create (number_of_vects);
3592 /* For reduction defs we call vect_get_constant_vectors (), since we are
3593 looking for initial loop invariant values. */
3594 if (vectorized_defs)
3595 /* The defs are already vectorized. */
3596 vect_get_slp_vect_defs (child, &vec_defs);
3597 else
3598 /* Build vectors from scalar defs. */
3599 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3600 number_of_vects);
3602 vec_oprnds->quick_push (vec_defs);
3606 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3607 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3608 permute statements for the SLP node NODE of the SLP instance
3609 SLP_NODE_INSTANCE. */
3611 bool
3612 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3613 gimple_stmt_iterator *gsi, poly_uint64 vf,
3614 slp_instance slp_node_instance, bool analyze_only,
3615 unsigned *n_perms)
3617 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3619 tree mask_element_type = NULL_TREE, mask_type;
3620 int vec_index = 0;
3621 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3622 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3623 unsigned int mask_element;
3624 machine_mode mode;
3625 unsigned HOST_WIDE_INT nunits, const_vf;
3627 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3628 return false;
3630 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3632 mode = TYPE_MODE (vectype);
3634 /* At the moment, all permutations are represented using per-element
3635 indices, so we can't cope with variable vector lengths or
3636 vectorization factors. */
3637 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3638 || !vf.is_constant (&const_vf))
3639 return false;
3641 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3642 same size as the vector element being permuted. */
3643 mask_element_type = lang_hooks.types.type_for_mode
3644 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3645 mask_type = get_vectype_for_scalar_type (mask_element_type);
3646 vec_perm_builder mask (nunits, nunits, 1);
3647 mask.quick_grow (nunits);
3648 vec_perm_indices indices;
3650 /* Initialize the vect stmts of NODE to properly insert the generated
3651 stmts later. */
3652 if (! analyze_only)
3653 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3654 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3655 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3657 /* Generate permutation masks for every NODE. Number of masks for each NODE
3658 is equal to GROUP_SIZE.
3659 E.g., we have a group of three nodes with three loads from the same
3660 location in each node, and the vector size is 4. I.e., we have a
3661 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3662 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3663 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3666 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3667 The last mask is illegal since we assume two operands for permute
3668 operation, and the mask element values can't be outside that range.
3669 Hence, the last mask must be converted into {2,5,5,5}.
3670 For the first two permutations we need the first and the second input
3671 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3672 we need the second and the third vectors: {b1,c1,a2,b2} and
3673 {c2,a3,b3,c3}. */
3675 int vect_stmts_counter = 0;
3676 unsigned int index = 0;
3677 int first_vec_index = -1;
3678 int second_vec_index = -1;
3679 bool noop_p = true;
3680 *n_perms = 0;
3682 for (unsigned int j = 0; j < const_vf; j++)
3684 for (int k = 0; k < group_size; k++)
3686 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3687 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3688 vec_index = i / nunits;
3689 mask_element = i % nunits;
3690 if (vec_index == first_vec_index
3691 || first_vec_index == -1)
3693 first_vec_index = vec_index;
3695 else if (vec_index == second_vec_index
3696 || second_vec_index == -1)
3698 second_vec_index = vec_index;
3699 mask_element += nunits;
3701 else
3703 if (dump_enabled_p ())
3705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3706 "permutation requires at "
3707 "least three vectors ");
3708 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3709 stmt, 0);
3711 gcc_assert (analyze_only);
3712 return false;
3715 gcc_assert (mask_element < 2 * nunits);
3716 if (mask_element != index)
3717 noop_p = false;
3718 mask[index++] = mask_element;
3720 if (index == nunits && !noop_p)
3722 indices.new_vector (mask, 2, nunits);
3723 if (!can_vec_perm_const_p (mode, indices))
3725 if (dump_enabled_p ())
3727 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3728 vect_location,
3729 "unsupported vect permute { ");
3730 for (i = 0; i < nunits; ++i)
3732 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3733 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3735 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3737 gcc_assert (analyze_only);
3738 return false;
3741 ++*n_perms;
3744 if (index == nunits)
3746 if (!analyze_only)
3748 tree mask_vec = NULL_TREE;
3750 if (! noop_p)
3751 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3753 if (second_vec_index == -1)
3754 second_vec_index = first_vec_index;
3756 /* Generate the permute statement if necessary. */
3757 tree first_vec = dr_chain[first_vec_index];
3758 tree second_vec = dr_chain[second_vec_index];
3759 gimple *perm_stmt;
3760 if (! noop_p)
3762 tree perm_dest
3763 = vect_create_destination_var (gimple_assign_lhs (stmt),
3764 vectype);
3765 perm_dest = make_ssa_name (perm_dest);
3766 perm_stmt = gimple_build_assign (perm_dest,
3767 VEC_PERM_EXPR,
3768 first_vec, second_vec,
3769 mask_vec);
3770 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3772 else
3773 /* If mask was NULL_TREE generate the requested
3774 identity transform. */
3775 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3777 /* Store the vector statement in NODE. */
3778 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3781 index = 0;
3782 first_vec_index = -1;
3783 second_vec_index = -1;
3784 noop_p = true;
3789 return true;
3792 typedef hash_map <vec <gimple *>, slp_tree,
3793 simple_hashmap_traits <bst_traits, slp_tree> >
3794 scalar_stmts_to_slp_tree_map_t;
3796 /* Vectorize SLP instance tree in postorder. */
3798 static bool
3799 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3800 scalar_stmts_to_slp_tree_map_t *bst_map)
3802 gimple *stmt;
3803 bool grouped_store, is_store;
3804 gimple_stmt_iterator si;
3805 stmt_vec_info stmt_info;
3806 unsigned int group_size;
3807 tree vectype;
3808 int i, j;
3809 slp_tree child;
3811 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3812 return false;
3814 /* See if we have already vectorized the same set of stmts and reuse their
3815 vectorized stmts. */
3816 slp_tree &leader
3817 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3818 if (leader)
3820 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3821 return false;
3824 leader = node;
3825 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3826 vect_schedule_slp_instance (child, instance, bst_map);
3828 /* Push SLP node def-type to stmts. */
3829 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3830 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3831 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3832 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3834 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3835 stmt_info = vinfo_for_stmt (stmt);
3837 /* VECTYPE is the type of the destination. */
3838 vectype = STMT_VINFO_VECTYPE (stmt_info);
3839 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3840 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3842 if (!SLP_TREE_VEC_STMTS (node).exists ())
3843 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3845 if (dump_enabled_p ())
3847 dump_printf_loc (MSG_NOTE,vect_location,
3848 "------>vectorizing SLP node starting from: ");
3849 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3852 /* Vectorized stmts go before the last scalar stmt which is where
3853 all uses are ready. */
3854 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3856 /* Mark the first element of the reduction chain as reduction to properly
3857 transform the node. In the analysis phase only the last element of the
3858 chain is marked as reduction. */
3859 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3860 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3862 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3863 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3866 /* Handle two-operation SLP nodes by vectorizing the group with
3867 both operations and then performing a merge. */
3868 if (SLP_TREE_TWO_OPERATORS (node))
3870 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3871 enum tree_code ocode = ERROR_MARK;
3872 gimple *ostmt;
3873 vec_perm_builder mask (group_size, group_size, 1);
3874 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3875 if (gimple_assign_rhs_code (ostmt) != code0)
3877 mask.quick_push (1);
3878 ocode = gimple_assign_rhs_code (ostmt);
3880 else
3881 mask.quick_push (0);
3882 if (ocode != ERROR_MARK)
3884 vec<gimple *> v0;
3885 vec<gimple *> v1;
3886 unsigned j;
3887 tree tmask = NULL_TREE;
3888 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3889 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3890 SLP_TREE_VEC_STMTS (node).truncate (0);
3891 gimple_assign_set_rhs_code (stmt, ocode);
3892 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3893 gimple_assign_set_rhs_code (stmt, code0);
3894 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3895 SLP_TREE_VEC_STMTS (node).truncate (0);
3896 tree meltype = build_nonstandard_integer_type
3897 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3898 tree mvectype = get_same_sized_vectype (meltype, vectype);
3899 unsigned k = 0, l;
3900 for (j = 0; j < v0.length (); ++j)
3902 /* Enforced by vect_build_slp_tree, which rejects variable-length
3903 vectors for SLP_TREE_TWO_OPERATORS. */
3904 unsigned int const_nunits = nunits.to_constant ();
3905 tree_vector_builder melts (mvectype, const_nunits, 1);
3906 for (l = 0; l < const_nunits; ++l)
3908 if (k >= group_size)
3909 k = 0;
3910 tree t = build_int_cst (meltype,
3911 mask[k++] * const_nunits + l);
3912 melts.quick_push (t);
3914 tmask = melts.build ();
3916 /* ??? Not all targets support a VEC_PERM_EXPR with a
3917 constant mask that would translate to a vec_merge RTX
3918 (with their vec_perm_const_ok). We can either not
3919 vectorize in that case or let veclower do its job.
3920 Unfortunately that isn't too great and at least for
3921 plus/minus we'd eventually like to match targets
3922 vector addsub instructions. */
3923 gimple *vstmt;
3924 vstmt = gimple_build_assign (make_ssa_name (vectype),
3925 VEC_PERM_EXPR,
3926 gimple_assign_lhs (v0[j]),
3927 gimple_assign_lhs (v1[j]), tmask);
3928 vect_finish_stmt_generation (stmt, vstmt, &si);
3929 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3931 v0.release ();
3932 v1.release ();
3933 return false;
3936 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3938 /* Restore stmt def-types. */
3939 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3940 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3941 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3942 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3944 return is_store;
3947 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3948 For loop vectorization this is done in vectorizable_call, but for SLP
3949 it needs to be deferred until end of vect_schedule_slp, because multiple
3950 SLP instances may refer to the same scalar stmt. */
3952 static void
3953 vect_remove_slp_scalar_calls (slp_tree node)
3955 gimple *stmt, *new_stmt;
3956 gimple_stmt_iterator gsi;
3957 int i;
3958 slp_tree child;
3959 tree lhs;
3960 stmt_vec_info stmt_info;
3962 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3963 return;
3965 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3966 vect_remove_slp_scalar_calls (child);
3968 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3970 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3971 continue;
3972 stmt_info = vinfo_for_stmt (stmt);
3973 if (stmt_info == NULL
3974 || is_pattern_stmt_p (stmt_info)
3975 || !PURE_SLP_STMT (stmt_info))
3976 continue;
3977 lhs = gimple_call_lhs (stmt);
3978 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3979 set_vinfo_for_stmt (new_stmt, stmt_info);
3980 set_vinfo_for_stmt (stmt, NULL);
3981 STMT_VINFO_STMT (stmt_info) = new_stmt;
3982 gsi = gsi_for_stmt (stmt);
3983 gsi_replace (&gsi, new_stmt, false);
3984 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3988 /* Generate vector code for all SLP instances in the loop/basic block. */
3990 bool
3991 vect_schedule_slp (vec_info *vinfo)
3993 vec<slp_instance> slp_instances;
3994 slp_instance instance;
3995 unsigned int i;
3996 bool is_store = false;
3998 slp_instances = vinfo->slp_instances;
3999 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4001 /* Schedule the tree of INSTANCE. */
4002 scalar_stmts_to_slp_tree_map_t *bst_map
4003 = new scalar_stmts_to_slp_tree_map_t ();
4004 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4005 instance, bst_map);
4006 delete bst_map;
4007 if (dump_enabled_p ())
4008 dump_printf_loc (MSG_NOTE, vect_location,
4009 "vectorizing stmts using SLP.\n");
4012 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4014 slp_tree root = SLP_INSTANCE_TREE (instance);
4015 gimple *store;
4016 unsigned int j;
4017 gimple_stmt_iterator gsi;
4019 /* Remove scalar call stmts. Do not do this for basic-block
4020 vectorization as not all uses may be vectorized.
4021 ??? Why should this be necessary? DCE should be able to
4022 remove the stmts itself.
4023 ??? For BB vectorization we can as well remove scalar
4024 stmts starting from the SLP tree root if they have no
4025 uses. */
4026 if (is_a <loop_vec_info> (vinfo))
4027 vect_remove_slp_scalar_calls (root);
4029 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4030 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4032 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4033 break;
4035 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4036 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4037 /* Free the attached stmt_vec_info and remove the stmt. */
4038 gsi = gsi_for_stmt (store);
4039 unlink_stmt_vdef (store);
4040 gsi_remove (&gsi, true);
4041 release_defs (store);
4042 free_stmt_vec_info (store);
4046 return is_store;