poly_int: current_vector_size and TARGET_AUTOVECTORIZE_VECTOR_SIZES
[official-gcc.git] / gcc / tree-vect-slp.c
blob7d8c3522050f1f438eb3f7df5f83231c3ae44091
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
50 static void
51 vect_free_slp_tree (slp_tree node)
53 int i;
54 slp_tree child;
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
57 vect_free_slp_tree (child);
59 gimple *stmt;
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 SLP_TREE_CHILDREN (node).release ();
69 SLP_TREE_SCALAR_STMTS (node).release ();
70 SLP_TREE_VEC_STMTS (node).release ();
71 SLP_TREE_LOAD_PERMUTATION (node).release ();
73 free (node);
77 /* Free the memory allocated for the SLP instance. */
79 void
80 vect_free_slp_instance (slp_instance instance)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
83 SLP_INSTANCE_LOADS (instance).release ();
84 free (instance);
88 /* Create an SLP node for SCALAR_STMTS. */
90 static slp_tree
91 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
93 slp_tree node;
94 gimple *stmt = scalar_stmts[0];
95 unsigned int nops;
97 if (is_gimple_call (stmt))
98 nops = gimple_call_num_args (stmt);
99 else if (is_gimple_assign (stmt))
101 nops = gimple_num_ops (stmt) - 1;
102 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
103 nops++;
105 else if (gimple_code (stmt) == GIMPLE_PHI)
106 nops = 0;
107 else
108 return NULL;
110 node = XNEW (struct _slp_tree);
111 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
112 SLP_TREE_VEC_STMTS (node).create (0);
113 SLP_TREE_CHILDREN (node).create (nops);
114 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
115 SLP_TREE_TWO_OPERATORS (node) = false;
116 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
118 unsigned i;
119 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
122 return node;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
128 node. */
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec<gimple *> def_stmts;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
135 stmt. */
136 tree first_op_type;
137 enum vect_def_type first_dt;
138 bool first_pattern;
139 bool second_pattern;
140 } *slp_oprnd_info;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 operand. */
145 static vec<slp_oprnd_info>
146 vect_create_oprnd_info (int nops, int group_size)
148 int i;
149 slp_oprnd_info oprnd_info;
150 vec<slp_oprnd_info> oprnds_info;
152 oprnds_info.create (nops);
153 for (i = 0; i < nops; i++)
155 oprnd_info = XNEW (struct _slp_oprnd_info);
156 oprnd_info->def_stmts.create (group_size);
157 oprnd_info->first_dt = vect_uninitialized_def;
158 oprnd_info->first_op_type = NULL_TREE;
159 oprnd_info->first_pattern = false;
160 oprnd_info->second_pattern = false;
161 oprnds_info.quick_push (oprnd_info);
164 return oprnds_info;
168 /* Free operands info. */
170 static void
171 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
173 int i;
174 slp_oprnd_info oprnd_info;
176 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
178 oprnd_info->def_stmts.release ();
179 XDELETE (oprnd_info);
182 oprnds_info.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
189 static int
190 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
192 gimple *next_stmt = first_stmt;
193 int result = 0;
195 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
196 return -1;
200 if (next_stmt == stmt)
201 return result;
202 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
203 if (next_stmt)
204 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
206 while (next_stmt);
208 return -1;
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
222 ok return 0. */
224 static int
225 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
226 gimple *stmt, unsigned stmt_num,
227 vec<slp_oprnd_info> *oprnds_info)
229 tree oprnd;
230 unsigned int i, number_of_oprnds;
231 gimple *def_stmt;
232 enum vect_def_type dt = vect_uninitialized_def;
233 bool pattern = false;
234 slp_oprnd_info oprnd_info;
235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
238 bool first = stmt_num == 0;
239 bool second = stmt_num == 1;
241 if (is_gimple_call (stmt))
243 number_of_oprnds = gimple_call_num_args (stmt);
244 first_op_idx = 3;
246 else if (is_gimple_assign (stmt))
248 enum tree_code code = gimple_assign_rhs_code (stmt);
249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
255 first_op_cond = true;
256 number_of_oprnds++;
258 else
259 commutative = commutative_tree_code (code);
261 else
262 return -1;
264 bool swapped = (*swap != 0);
265 gcc_assert (!swapped || first_op_cond);
266 for (i = 0; i < number_of_oprnds; i++)
268 again:
269 if (first_op_cond)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map = maps[*swap];
275 if (i < 2)
276 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
277 else
278 oprnd = gimple_op (stmt, map[i]);
280 else
281 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
283 oprnd_info = (*oprnds_info)[i];
285 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
295 return -1;
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
300 pattern. */
301 if (def_stmt && gimple_bb (def_stmt)
302 && vect_stmt_in_region_p (vinfo, def_stmt)
303 && vinfo_for_stmt (def_stmt)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
308 pattern = true;
309 if (!first && !oprnd_info->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info->first_dt != vect_reduction_def
313 || (!second && !oprnd_info->second_pattern)))
315 if (i == 0
316 && !swapped
317 && commutative)
319 swapped = true;
320 goto again;
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
332 return 1;
335 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
336 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
338 if (dt == vect_unknown_def_type)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
342 "Unsupported pattern.\n");
343 return -1;
346 switch (gimple_code (def_stmt))
348 case GIMPLE_PHI:
349 case GIMPLE_ASSIGN:
350 break;
352 default:
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "unsupported defining stmt:\n");
356 return -1;
360 if (second)
361 oprnd_info->second_pattern = pattern;
363 if (first)
365 oprnd_info->first_dt = dt;
366 oprnd_info->first_pattern = pattern;
367 oprnd_info->first_op_type = TREE_TYPE (oprnd);
369 else
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info->first_dt != dt
377 && !(oprnd_info->first_dt == vect_reduction_def
378 && dt == vect_internal_def)
379 && !((oprnd_info->first_dt == vect_external_def
380 || oprnd_info->first_dt == vect_constant_def)
381 && (dt == vect_external_def
382 || dt == vect_constant_def)))
383 || !types_compatible_p (oprnd_info->first_op_type,
384 TREE_TYPE (oprnd))))
386 /* Try swapping operands if we got a mismatch. */
387 if (i == 0
388 && !swapped
389 && commutative)
391 swapped = true;
392 goto again;
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: different types\n");
399 return 1;
403 /* Check the types of the definitions. */
404 switch (dt)
406 case vect_constant_def:
407 case vect_external_def:
408 break;
410 case vect_reduction_def:
411 case vect_induction_def:
412 case vect_internal_def:
413 oprnd_info->def_stmts.quick_push (def_stmt);
414 break;
416 default:
417 /* FORNOW: Not supported. */
418 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "Build SLP failed: illegal type of def ");
422 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
423 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
426 return -1;
430 /* Swap operands. */
431 if (swapped)
433 /* If there are already uses of this stmt in a SLP instance then
434 we've committed to the operand order and can't swap it. */
435 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
437 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 "Build SLP failed: cannot swap operands of "
441 "shared stmt ");
442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
444 return -1;
447 if (first_op_cond)
449 tree cond = gimple_assign_rhs1 (stmt);
450 enum tree_code code = TREE_CODE (cond);
452 /* Swap. */
453 if (*swap == 1)
455 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
456 &TREE_OPERAND (cond, 1));
457 TREE_SET_CODE (cond, swap_tree_comparison (code));
459 /* Invert. */
460 else
462 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
463 gimple_assign_rhs3_ptr (stmt));
464 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
465 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
466 gcc_assert (code != ERROR_MARK);
467 TREE_SET_CODE (cond, code);
470 else
471 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
472 gimple_assign_rhs2_ptr (stmt));
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location,
476 "swapped operands to match def types in ");
477 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
481 *swap = swapped;
482 return 0;
485 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
486 caller's attempt to find the vector type in STMT with the narrowest
487 element type. Return true if VECTYPE is nonnull and if it is valid
488 for VINFO. When returning true, update MAX_NUNITS to reflect the
489 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
490 as for vect_build_slp_tree. */
492 static bool
493 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
494 tree vectype, poly_uint64 *max_nunits)
496 if (!vectype)
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "Build SLP failed: unsupported data-type in ");
502 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
505 /* Fatal mismatch. */
506 return false;
509 /* If populating the vector type requires unrolling then fail
510 before adjusting *max_nunits for basic-block vectorization. */
511 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
512 unsigned HOST_WIDE_INT const_nunits;
513 if (is_a <bb_vec_info> (vinfo)
514 && (!nunits.is_constant (&const_nunits)
515 || const_nunits > group_size))
517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
518 "Build SLP failed: unrolling required "
519 "in basic block SLP\n");
520 /* Fatal mismatch. */
521 return false;
524 /* In case of multiple types we need to detect the smallest type. */
525 vect_update_max_nunits (max_nunits, vectype);
526 return true;
529 /* Verify if the scalar stmts STMTS are isomorphic, require data
530 permutation or are of unsupported types of operation. Return
531 true if they are, otherwise return false and indicate in *MATCHES
532 which stmts are not isomorphic to the first one. If MATCHES[0]
533 is false then this indicates the comparison could not be
534 carried out or the stmts will never be vectorized by SLP.
536 Note COND_EXPR is possibly ismorphic to another one after swapping its
537 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
538 the first stmt by swapping the two operands of comparison; set SWAP[i]
539 to 2 if stmt I is isormorphic to the first stmt by inverting the code
540 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
541 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
543 static bool
544 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
545 vec<gimple *> stmts, unsigned int group_size,
546 unsigned nops, poly_uint64 *max_nunits,
547 bool *matches, bool *two_operators)
549 unsigned int i;
550 gimple *first_stmt = stmts[0], *stmt = stmts[0];
551 enum tree_code first_stmt_code = ERROR_MARK;
552 enum tree_code alt_stmt_code = ERROR_MARK;
553 enum tree_code rhs_code = ERROR_MARK;
554 enum tree_code first_cond_code = ERROR_MARK;
555 tree lhs;
556 bool need_same_oprnds = false;
557 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
558 optab optab;
559 int icode;
560 machine_mode optab_op2_mode;
561 machine_mode vec_mode;
562 HOST_WIDE_INT dummy;
563 gimple *first_load = NULL, *prev_first_load = NULL;
565 /* For every stmt in NODE find its def stmt/s. */
566 FOR_EACH_VEC_ELT (stmts, i, stmt)
568 swap[i] = 0;
569 matches[i] = false;
571 if (dump_enabled_p ())
573 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
574 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
577 /* Fail to vectorize statements marked as unvectorizable. */
578 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
580 if (dump_enabled_p ())
582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
583 "Build SLP failed: unvectorizable statement ");
584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
591 lhs = gimple_get_lhs (stmt);
592 if (lhs == NULL_TREE)
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: not GIMPLE_ASSIGN nor "
598 "GIMPLE_CALL ");
599 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
601 /* Fatal mismatch. */
602 matches[0] = false;
603 return false;
606 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
607 vectype = get_vectype_for_scalar_type (scalar_type);
608 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
609 max_nunits))
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
616 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
618 rhs_code = CALL_EXPR;
619 if (gimple_call_internal_p (call_stmt)
620 || gimple_call_tail_p (call_stmt)
621 || gimple_call_noreturn_p (call_stmt)
622 || !gimple_call_nothrow_p (call_stmt)
623 || gimple_call_chain (call_stmt))
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
628 "Build SLP failed: unsupported call type ");
629 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
630 call_stmt, 0);
632 /* Fatal mismatch. */
633 matches[0] = false;
634 return false;
637 else
638 rhs_code = gimple_assign_rhs_code (stmt);
640 /* Check the operation. */
641 if (i == 0)
643 first_stmt_code = rhs_code;
645 /* Shift arguments should be equal in all the packed stmts for a
646 vector shift with scalar shift operand. */
647 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
648 || rhs_code == LROTATE_EXPR
649 || rhs_code == RROTATE_EXPR)
651 vec_mode = TYPE_MODE (vectype);
653 /* First see if we have a vector/vector shift. */
654 optab = optab_for_tree_code (rhs_code, vectype,
655 optab_vector);
657 if (!optab
658 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
660 /* No vector/vector shift, try for a vector/scalar shift. */
661 optab = optab_for_tree_code (rhs_code, vectype,
662 optab_scalar);
664 if (!optab)
666 if (dump_enabled_p ())
667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
668 "Build SLP failed: no optab.\n");
669 /* Fatal mismatch. */
670 matches[0] = false;
671 return false;
673 icode = (int) optab_handler (optab, vec_mode);
674 if (icode == CODE_FOR_nothing)
676 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
678 "Build SLP failed: "
679 "op not supported by target.\n");
680 /* Fatal mismatch. */
681 matches[0] = false;
682 return false;
684 optab_op2_mode = insn_data[icode].operand[2].mode;
685 if (!VECTOR_MODE_P (optab_op2_mode))
687 need_same_oprnds = true;
688 first_op1 = gimple_assign_rhs2 (stmt);
692 else if (rhs_code == WIDEN_LSHIFT_EXPR)
694 need_same_oprnds = true;
695 first_op1 = gimple_assign_rhs2 (stmt);
698 else
700 if (first_stmt_code != rhs_code
701 && alt_stmt_code == ERROR_MARK)
702 alt_stmt_code = rhs_code;
703 if (first_stmt_code != rhs_code
704 && (first_stmt_code != IMAGPART_EXPR
705 || rhs_code != REALPART_EXPR)
706 && (first_stmt_code != REALPART_EXPR
707 || rhs_code != IMAGPART_EXPR)
708 /* Handle mismatches in plus/minus by computing both
709 and merging the results. */
710 && !((first_stmt_code == PLUS_EXPR
711 || first_stmt_code == MINUS_EXPR)
712 && (alt_stmt_code == PLUS_EXPR
713 || alt_stmt_code == MINUS_EXPR)
714 && rhs_code == alt_stmt_code)
715 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
716 && (first_stmt_code == ARRAY_REF
717 || first_stmt_code == BIT_FIELD_REF
718 || first_stmt_code == INDIRECT_REF
719 || first_stmt_code == COMPONENT_REF
720 || first_stmt_code == MEM_REF)))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725 "Build SLP failed: different operation "
726 "in stmt ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
729 "original stmt ");
730 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
731 first_stmt, 0);
733 /* Mismatch. */
734 continue;
737 if (need_same_oprnds
738 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
740 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "Build SLP failed: different shift "
744 "arguments in ");
745 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
747 /* Mismatch. */
748 continue;
751 if (rhs_code == CALL_EXPR)
753 gimple *first_stmt = stmts[0];
754 if (gimple_call_num_args (stmt) != nops
755 || !operand_equal_p (gimple_call_fn (first_stmt),
756 gimple_call_fn (stmt), 0)
757 || gimple_call_fntype (first_stmt)
758 != gimple_call_fntype (stmt))
760 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
763 "Build SLP failed: different calls in ");
764 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
765 stmt, 0);
767 /* Mismatch. */
768 continue;
773 /* Grouped store or load. */
774 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
776 if (REFERENCE_CLASS_P (lhs))
778 /* Store. */
781 else
783 /* Load. */
784 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
785 if (prev_first_load)
787 /* Check that there are no loads from different interleaving
788 chains in the same node. */
789 if (prev_first_load != first_load)
791 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
794 vect_location,
795 "Build SLP failed: different "
796 "interleaving chains in one node ");
797 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
798 stmt, 0);
800 /* Mismatch. */
801 continue;
804 else
805 prev_first_load = first_load;
807 } /* Grouped access. */
808 else
810 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
812 /* Not grouped load. */
813 if (dump_enabled_p ())
815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
816 "Build SLP failed: not grouped load ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
820 /* FORNOW: Not grouped loads are not supported. */
821 /* Fatal mismatch. */
822 matches[0] = false;
823 return false;
826 /* Not memory operation. */
827 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
828 && TREE_CODE_CLASS (rhs_code) != tcc_unary
829 && TREE_CODE_CLASS (rhs_code) != tcc_expression
830 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
831 && rhs_code != CALL_EXPR)
833 if (dump_enabled_p ())
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
836 "Build SLP failed: operation");
837 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
838 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
840 /* Fatal mismatch. */
841 matches[0] = false;
842 return false;
845 if (rhs_code == COND_EXPR)
847 tree cond_expr = gimple_assign_rhs1 (stmt);
848 enum tree_code cond_code = TREE_CODE (cond_expr);
849 enum tree_code swap_code = ERROR_MARK;
850 enum tree_code invert_code = ERROR_MARK;
852 if (i == 0)
853 first_cond_code = TREE_CODE (cond_expr);
854 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
856 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
857 swap_code = swap_tree_comparison (cond_code);
858 invert_code = invert_tree_comparison (cond_code, honor_nans);
861 if (first_cond_code == cond_code)
863 /* Isomorphic can be achieved by swapping. */
864 else if (first_cond_code == swap_code)
865 swap[i] = 1;
866 /* Isomorphic can be achieved by inverting. */
867 else if (first_cond_code == invert_code)
868 swap[i] = 2;
869 else
871 if (dump_enabled_p ())
873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
874 "Build SLP failed: different"
875 " operation");
876 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
877 stmt, 0);
879 /* Mismatch. */
880 continue;
885 matches[i] = true;
888 for (i = 0; i < group_size; ++i)
889 if (!matches[i])
890 return false;
892 /* If we allowed a two-operation SLP node verify the target can cope
893 with the permute we are going to use. */
894 if (alt_stmt_code != ERROR_MARK
895 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
897 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
898 vec_perm_builder sel (count, count, 1);
899 for (i = 0; i < count; ++i)
901 unsigned int elt = i;
902 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
903 elt += count;
904 sel.quick_push (elt);
906 vec_perm_indices indices (sel, 2, count);
907 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
909 for (i = 0; i < group_size; ++i)
910 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
912 matches[i] = false;
913 if (dump_enabled_p ())
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916 "Build SLP failed: different operation "
917 "in stmt ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 stmts[i], 0);
920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
921 "original stmt ");
922 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
923 first_stmt, 0);
926 return false;
928 *two_operators = true;
931 return true;
934 /* Traits for the hash_set to record failed SLP builds for a stmt set.
935 Note we never remove apart from at destruction time so we do not
936 need a special value for deleted that differs from empty. */
937 struct bst_traits
939 typedef vec <gimple *> value_type;
940 typedef vec <gimple *> compare_type;
941 static inline hashval_t hash (value_type);
942 static inline bool equal (value_type existing, value_type candidate);
943 static inline bool is_empty (value_type x) { return !x.exists (); }
944 static inline bool is_deleted (value_type x) { return !x.exists (); }
945 static inline void mark_empty (value_type &x) { x.release (); }
946 static inline void mark_deleted (value_type &x) { x.release (); }
947 static inline void remove (value_type &x) { x.release (); }
949 inline hashval_t
950 bst_traits::hash (value_type x)
952 inchash::hash h;
953 for (unsigned i = 0; i < x.length (); ++i)
954 h.add_int (gimple_uid (x[i]));
955 return h.end ();
957 inline bool
958 bst_traits::equal (value_type existing, value_type candidate)
960 if (existing.length () != candidate.length ())
961 return false;
962 for (unsigned i = 0; i < existing.length (); ++i)
963 if (existing[i] != candidate[i])
964 return false;
965 return true;
968 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
969 static scalar_stmts_set_t *bst_fail;
971 static slp_tree
972 vect_build_slp_tree_2 (vec_info *vinfo,
973 vec<gimple *> stmts, unsigned int group_size,
974 poly_uint64 *max_nunits,
975 vec<slp_tree> *loads,
976 bool *matches, unsigned *npermutes, unsigned *tree_size,
977 unsigned max_tree_size);
979 static slp_tree
980 vect_build_slp_tree (vec_info *vinfo,
981 vec<gimple *> stmts, unsigned int group_size,
982 poly_uint64 *max_nunits, vec<slp_tree> *loads,
983 bool *matches, unsigned *npermutes, unsigned *tree_size,
984 unsigned max_tree_size)
986 if (bst_fail->contains (stmts))
987 return NULL;
988 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
989 loads, matches, npermutes, tree_size,
990 max_tree_size);
991 /* When SLP build fails for stmts record this, otherwise SLP build
992 can be exponential in time when we allow to construct parts from
993 scalars, see PR81723. */
994 if (! res)
996 vec <gimple *> x;
997 x.create (stmts.length ());
998 x.splice (stmts);
999 bst_fail->add (x);
1001 return res;
1004 /* Recursively build an SLP tree starting from NODE.
1005 Fail (and return a value not equal to zero) if def-stmts are not
1006 isomorphic, require data permutation or are of unsupported types of
1007 operation. Otherwise, return 0.
1008 The value returned is the depth in the SLP tree where a mismatch
1009 was found. */
1011 static slp_tree
1012 vect_build_slp_tree_2 (vec_info *vinfo,
1013 vec<gimple *> stmts, unsigned int group_size,
1014 poly_uint64 *max_nunits,
1015 vec<slp_tree> *loads,
1016 bool *matches, unsigned *npermutes, unsigned *tree_size,
1017 unsigned max_tree_size)
1019 unsigned nops, i, this_tree_size = 0;
1020 poly_uint64 this_max_nunits = *max_nunits;
1021 gimple *stmt;
1022 slp_tree node;
1024 matches[0] = false;
1026 stmt = stmts[0];
1027 if (is_gimple_call (stmt))
1028 nops = gimple_call_num_args (stmt);
1029 else if (is_gimple_assign (stmt))
1031 nops = gimple_num_ops (stmt) - 1;
1032 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1033 nops++;
1035 else if (gimple_code (stmt) == GIMPLE_PHI)
1036 nops = 0;
1037 else
1038 return NULL;
1040 /* If the SLP node is a PHI (induction or reduction), terminate
1041 the recursion. */
1042 if (gimple_code (stmt) == GIMPLE_PHI)
1044 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1045 tree vectype = get_vectype_for_scalar_type (scalar_type);
1046 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1047 max_nunits))
1048 return NULL;
1050 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1051 /* Induction from different IVs is not supported. */
1052 if (def_type == vect_induction_def)
1054 FOR_EACH_VEC_ELT (stmts, i, stmt)
1055 if (stmt != stmts[0])
1056 return NULL;
1058 else
1060 /* Else def types have to match. */
1061 FOR_EACH_VEC_ELT (stmts, i, stmt)
1063 /* But for reduction chains only check on the first stmt. */
1064 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1065 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1066 continue;
1067 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1068 return NULL;
1071 node = vect_create_new_slp_node (stmts);
1072 return node;
1076 bool two_operators = false;
1077 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1078 if (!vect_build_slp_tree_1 (vinfo, swap,
1079 stmts, group_size, nops,
1080 &this_max_nunits, matches, &two_operators))
1081 return NULL;
1083 /* If the SLP node is a load, terminate the recursion. */
1084 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1085 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1087 *max_nunits = this_max_nunits;
1088 node = vect_create_new_slp_node (stmts);
1089 loads->safe_push (node);
1090 return node;
1093 /* Get at the operands, verifying they are compatible. */
1094 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1095 slp_oprnd_info oprnd_info;
1096 FOR_EACH_VEC_ELT (stmts, i, stmt)
1098 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1099 stmt, i, &oprnds_info);
1100 if (res != 0)
1101 matches[(res == -1) ? 0 : i] = false;
1102 if (!matches[0])
1103 break;
1105 for (i = 0; i < group_size; ++i)
1106 if (!matches[i])
1108 vect_free_oprnd_info (oprnds_info);
1109 return NULL;
1112 auto_vec<slp_tree, 4> children;
1113 auto_vec<slp_tree> this_loads;
1115 stmt = stmts[0];
1117 if (tree_size)
1118 max_tree_size -= *tree_size;
1120 /* Create SLP_TREE nodes for the definition node/s. */
1121 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1123 slp_tree child;
1124 unsigned old_nloads = this_loads.length ();
1125 unsigned old_tree_size = this_tree_size;
1126 unsigned int j;
1128 if (oprnd_info->first_dt != vect_internal_def
1129 && oprnd_info->first_dt != vect_reduction_def
1130 && oprnd_info->first_dt != vect_induction_def)
1131 continue;
1133 if (++this_tree_size > max_tree_size)
1135 FOR_EACH_VEC_ELT (children, j, child)
1136 vect_free_slp_tree (child);
1137 vect_free_oprnd_info (oprnds_info);
1138 return NULL;
1141 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1142 group_size, &this_max_nunits,
1143 &this_loads, matches, npermutes,
1144 &this_tree_size,
1145 max_tree_size)) != NULL)
1147 /* If we have all children of child built up from scalars then just
1148 throw that away and build it up this node from scalars. */
1149 if (!SLP_TREE_CHILDREN (child).is_empty ()
1150 /* ??? Rejecting patterns this way doesn't work. We'd have to
1151 do extra work to cancel the pattern so the uses see the
1152 scalar version. */
1153 && !is_pattern_stmt_p
1154 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1156 slp_tree grandchild;
1158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1159 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1160 break;
1161 if (!grandchild)
1163 /* Roll back. */
1164 this_loads.truncate (old_nloads);
1165 this_tree_size = old_tree_size;
1166 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1167 vect_free_slp_tree (grandchild);
1168 SLP_TREE_CHILDREN (child).truncate (0);
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "Building parent vector operands from "
1172 "scalars instead\n");
1173 oprnd_info->def_stmts = vNULL;
1174 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1175 children.safe_push (child);
1176 continue;
1180 oprnd_info->def_stmts = vNULL;
1181 children.safe_push (child);
1182 continue;
1185 /* If the SLP build failed fatally and we analyze a basic-block
1186 simply treat nodes we fail to build as externally defined
1187 (and thus build vectors from the scalar defs).
1188 The cost model will reject outright expensive cases.
1189 ??? This doesn't treat cases where permutation ultimatively
1190 fails (or we don't try permutation below). Ideally we'd
1191 even compute a permutation that will end up with the maximum
1192 SLP tree size... */
1193 if (is_a <bb_vec_info> (vinfo)
1194 && !matches[0]
1195 /* ??? Rejecting patterns this way doesn't work. We'd have to
1196 do extra work to cancel the pattern so the uses see the
1197 scalar version. */
1198 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "Building vector operands from scalars\n");
1202 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1203 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1204 children.safe_push (child);
1205 oprnd_info->def_stmts = vNULL;
1206 continue;
1209 /* If the SLP build for operand zero failed and operand zero
1210 and one can be commutated try that for the scalar stmts
1211 that failed the match. */
1212 if (i == 0
1213 /* A first scalar stmt mismatch signals a fatal mismatch. */
1214 && matches[0]
1215 /* ??? For COND_EXPRs we can swap the comparison operands
1216 as well as the arms under some constraints. */
1217 && nops == 2
1218 && oprnds_info[1]->first_dt == vect_internal_def
1219 && is_gimple_assign (stmt)
1220 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1221 && ! two_operators
1222 /* Do so only if the number of not successful permutes was nor more
1223 than a cut-ff as re-trying the recursive match on
1224 possibly each level of the tree would expose exponential
1225 behavior. */
1226 && *npermutes < 4)
1228 /* Verify if we can safely swap or if we committed to a specific
1229 operand order already. */
1230 for (j = 0; j < group_size; ++j)
1231 if (!matches[j]
1232 && (swap[j] != 0
1233 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "Build SLP failed: cannot swap operands "
1239 "of shared stmt ");
1240 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1241 stmts[j], 0);
1243 goto fail;
1246 /* Swap mismatched definition stmts. */
1247 dump_printf_loc (MSG_NOTE, vect_location,
1248 "Re-trying with swapped operands of stmts ");
1249 for (j = 0; j < group_size; ++j)
1250 if (!matches[j])
1252 std::swap (oprnds_info[0]->def_stmts[j],
1253 oprnds_info[1]->def_stmts[j]);
1254 dump_printf (MSG_NOTE, "%d ", j);
1256 dump_printf (MSG_NOTE, "\n");
1257 /* And try again with scratch 'matches' ... */
1258 bool *tem = XALLOCAVEC (bool, group_size);
1259 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1260 group_size, &this_max_nunits,
1261 &this_loads, tem, npermutes,
1262 &this_tree_size,
1263 max_tree_size)) != NULL)
1265 /* ... so if successful we can apply the operand swapping
1266 to the GIMPLE IL. This is necessary because for example
1267 vect_get_slp_defs uses operand indexes and thus expects
1268 canonical operand order. This is also necessary even
1269 if we end up building the operand from scalars as
1270 we'll continue to process swapped operand two. */
1271 for (j = 0; j < group_size; ++j)
1273 gimple *stmt = stmts[j];
1274 gimple_set_plf (stmt, GF_PLF_1, false);
1276 for (j = 0; j < group_size; ++j)
1278 gimple *stmt = stmts[j];
1279 if (!matches[j])
1281 /* Avoid swapping operands twice. */
1282 if (gimple_plf (stmt, GF_PLF_1))
1283 continue;
1284 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1285 gimple_assign_rhs2_ptr (stmt));
1286 gimple_set_plf (stmt, GF_PLF_1, true);
1289 /* Verify we swap all duplicates or none. */
1290 if (flag_checking)
1291 for (j = 0; j < group_size; ++j)
1293 gimple *stmt = stmts[j];
1294 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1297 /* If we have all children of child built up from scalars then
1298 just throw that away and build it up this node from scalars. */
1299 if (!SLP_TREE_CHILDREN (child).is_empty ()
1300 /* ??? Rejecting patterns this way doesn't work. We'd have
1301 to do extra work to cancel the pattern so the uses see the
1302 scalar version. */
1303 && !is_pattern_stmt_p
1304 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1306 unsigned int j;
1307 slp_tree grandchild;
1309 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1310 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1311 break;
1312 if (!grandchild)
1314 /* Roll back. */
1315 this_loads.truncate (old_nloads);
1316 this_tree_size = old_tree_size;
1317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1318 vect_free_slp_tree (grandchild);
1319 SLP_TREE_CHILDREN (child).truncate (0);
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "Building parent vector operands from "
1323 "scalars instead\n");
1324 oprnd_info->def_stmts = vNULL;
1325 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1326 children.safe_push (child);
1327 continue;
1331 oprnd_info->def_stmts = vNULL;
1332 children.safe_push (child);
1333 continue;
1336 ++*npermutes;
1339 fail:
1340 gcc_assert (child == NULL);
1341 FOR_EACH_VEC_ELT (children, j, child)
1342 vect_free_slp_tree (child);
1343 vect_free_oprnd_info (oprnds_info);
1344 return NULL;
1347 vect_free_oprnd_info (oprnds_info);
1349 if (tree_size)
1350 *tree_size += this_tree_size;
1351 *max_nunits = this_max_nunits;
1352 loads->safe_splice (this_loads);
1354 node = vect_create_new_slp_node (stmts);
1355 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1356 SLP_TREE_CHILDREN (node).splice (children);
1357 return node;
1360 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1362 static void
1363 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1365 int i;
1366 gimple *stmt;
1367 slp_tree child;
1369 dump_printf_loc (dump_kind, loc, "node%s\n",
1370 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1371 ? " (external)" : "");
1372 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1374 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1375 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1377 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1378 vect_print_slp_tree (dump_kind, loc, child);
1382 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1383 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1384 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1385 stmts in NODE are to be marked. */
1387 static void
1388 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1390 int i;
1391 gimple *stmt;
1392 slp_tree child;
1394 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1395 return;
1397 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1398 if (j < 0 || i == j)
1399 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1401 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1402 vect_mark_slp_stmts (child, mark, j);
1406 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1408 static void
1409 vect_mark_slp_stmts_relevant (slp_tree node)
1411 int i;
1412 gimple *stmt;
1413 stmt_vec_info stmt_info;
1414 slp_tree child;
1416 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1417 return;
1419 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1421 stmt_info = vinfo_for_stmt (stmt);
1422 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1423 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1424 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1427 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1428 vect_mark_slp_stmts_relevant (child);
1432 /* Rearrange the statements of NODE according to PERMUTATION. */
1434 static void
1435 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1436 vec<unsigned> permutation)
1438 gimple *stmt;
1439 vec<gimple *> tmp_stmts;
1440 unsigned int i;
1441 slp_tree child;
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1444 vect_slp_rearrange_stmts (child, group_size, permutation);
1446 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1447 tmp_stmts.create (group_size);
1448 tmp_stmts.quick_grow_cleared (group_size);
1450 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1451 tmp_stmts[permutation[i]] = stmt;
1453 SLP_TREE_SCALAR_STMTS (node).release ();
1454 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1458 /* Attempt to reorder stmts in a reduction chain so that we don't
1459 require any load permutation. Return true if that was possible,
1460 otherwise return false. */
1462 static bool
1463 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1465 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1466 unsigned int i, j;
1467 unsigned int lidx;
1468 slp_tree node, load;
1470 /* Compare all the permutation sequences to the first one. We know
1471 that at least one load is permuted. */
1472 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1473 if (!node->load_permutation.exists ())
1474 return false;
1475 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1477 if (!load->load_permutation.exists ())
1478 return false;
1479 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1480 if (lidx != node->load_permutation[j])
1481 return false;
1484 /* Check that the loads in the first sequence are different and there
1485 are no gaps between them. */
1486 auto_sbitmap load_index (group_size);
1487 bitmap_clear (load_index);
1488 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1490 if (lidx >= group_size)
1491 return false;
1492 if (bitmap_bit_p (load_index, lidx))
1493 return false;
1495 bitmap_set_bit (load_index, lidx);
1497 for (i = 0; i < group_size; i++)
1498 if (!bitmap_bit_p (load_index, i))
1499 return false;
1501 /* This permutation is valid for reduction. Since the order of the
1502 statements in the nodes is not important unless they are memory
1503 accesses, we can rearrange the statements in all the nodes
1504 according to the order of the loads. */
1505 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1506 node->load_permutation);
1508 /* We are done, no actual permutations need to be generated. */
1509 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1510 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1512 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1513 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1514 /* But we have to keep those permutations that are required because
1515 of handling of gaps. */
1516 if (known_eq (unrolling_factor, 1U)
1517 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1518 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1519 SLP_TREE_LOAD_PERMUTATION (node).release ();
1520 else
1521 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1522 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1525 return true;
1528 /* Check if the required load permutations in the SLP instance
1529 SLP_INSTN are supported. */
1531 static bool
1532 vect_supported_load_permutation_p (slp_instance slp_instn)
1534 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1535 unsigned int i, j, k, next;
1536 slp_tree node;
1537 gimple *stmt, *load, *next_load;
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1542 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1543 if (node->load_permutation.exists ())
1544 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1545 dump_printf (MSG_NOTE, "%d ", next);
1546 else
1547 for (k = 0; k < group_size; ++k)
1548 dump_printf (MSG_NOTE, "%d ", k);
1549 dump_printf (MSG_NOTE, "\n");
1552 /* In case of reduction every load permutation is allowed, since the order
1553 of the reduction statements is not important (as opposed to the case of
1554 grouped stores). The only condition we need to check is that all the
1555 load nodes are of the same size and have the same permutation (and then
1556 rearrange all the nodes of the SLP instance according to this
1557 permutation). */
1559 /* Check that all the load nodes are of the same size. */
1560 /* ??? Can't we assert this? */
1561 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1562 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1563 return false;
1565 node = SLP_INSTANCE_TREE (slp_instn);
1566 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1568 /* Reduction (there are no data-refs in the root).
1569 In reduction chain the order of the loads is not important. */
1570 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1571 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1572 vect_attempt_slp_rearrange_stmts (slp_instn);
1574 /* In basic block vectorization we allow any subchain of an interleaving
1575 chain.
1576 FORNOW: not supported in loop SLP because of realignment compications. */
1577 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1579 /* Check whether the loads in an instance form a subchain and thus
1580 no permutation is necessary. */
1581 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1583 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1584 continue;
1585 bool subchain_p = true;
1586 next_load = NULL;
1587 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1589 if (j != 0
1590 && (next_load != load
1591 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1593 subchain_p = false;
1594 break;
1596 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1598 if (subchain_p)
1599 SLP_TREE_LOAD_PERMUTATION (node).release ();
1600 else
1602 stmt_vec_info group_info
1603 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1604 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1605 unsigned nunits
1606 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1607 unsigned k, maxk = 0;
1608 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1609 if (k > maxk)
1610 maxk = k;
1611 /* In BB vectorization we may not actually use a loaded vector
1612 accessing elements in excess of GROUP_SIZE. */
1613 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616 "BB vectorization with gaps at the end of "
1617 "a load is not supported\n");
1618 return false;
1621 /* Verify the permutation can be generated. */
1622 vec<tree> tem;
1623 unsigned n_perms;
1624 if (!vect_transform_slp_perm_load (node, tem, NULL,
1625 1, slp_instn, true, &n_perms))
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1628 vect_location,
1629 "unsupported load permutation\n");
1630 return false;
1634 return true;
1637 /* For loop vectorization verify we can generate the permutation. Be
1638 conservative about the vectorization factor, there are permutations
1639 that will use three vector inputs only starting from a specific factor
1640 and the vectorization factor is not yet final.
1641 ??? The SLP instance unrolling factor might not be the maximum one. */
1642 unsigned n_perms;
1643 poly_uint64 test_vf
1644 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1645 LOOP_VINFO_VECT_FACTOR
1646 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1647 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1648 if (node->load_permutation.exists ()
1649 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1650 slp_instn, true, &n_perms))
1651 return false;
1653 return true;
1657 /* Find the last store in SLP INSTANCE. */
1659 gimple *
1660 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1662 gimple *last = NULL, *stmt;
1664 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1666 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1667 if (is_pattern_stmt_p (stmt_vinfo))
1668 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1669 else
1670 last = get_later_stmt (stmt, last);
1673 return last;
1676 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1678 static void
1679 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1680 stmt_vector_for_cost *prologue_cost_vec,
1681 stmt_vector_for_cost *body_cost_vec,
1682 unsigned ncopies_for_cost,
1683 scalar_stmts_set_t* visited)
1685 unsigned i, j;
1686 slp_tree child;
1687 gimple *stmt;
1688 stmt_vec_info stmt_info;
1689 tree lhs;
1691 /* If we already costed the exact same set of scalar stmts we're done.
1692 We share the generated vector stmts for those. */
1693 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1694 return;
1696 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1698 /* Recurse down the SLP tree. */
1699 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1700 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1701 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1702 body_cost_vec, ncopies_for_cost, visited);
1704 /* Look at the first scalar stmt to determine the cost. */
1705 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1706 stmt_info = vinfo_for_stmt (stmt);
1707 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1709 vect_memory_access_type memory_access_type
1710 = (STMT_VINFO_STRIDED_P (stmt_info)
1711 ? VMAT_STRIDED_SLP
1712 : VMAT_CONTIGUOUS);
1713 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1714 vect_model_store_cost (stmt_info, ncopies_for_cost,
1715 memory_access_type, vect_uninitialized_def,
1716 node, prologue_cost_vec, body_cost_vec);
1717 else
1719 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1720 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1722 /* If the load is permuted then the alignment is determined by
1723 the first group element not by the first scalar stmt DR. */
1724 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1725 stmt_info = vinfo_for_stmt (stmt);
1726 /* Record the cost for the permutation. */
1727 unsigned n_perms;
1728 vect_transform_slp_perm_load (node, vNULL, NULL,
1729 ncopies_for_cost, instance, true,
1730 &n_perms);
1731 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1732 stmt_info, 0, vect_body);
1733 unsigned assumed_nunits
1734 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1735 /* And adjust the number of loads performed. This handles
1736 redundancies as well as loads that are later dead. */
1737 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1738 bitmap_clear (perm);
1739 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1740 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1741 ncopies_for_cost = 0;
1742 bool load_seen = false;
1743 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1745 if (i % assumed_nunits == 0)
1747 if (load_seen)
1748 ncopies_for_cost++;
1749 load_seen = false;
1751 if (bitmap_bit_p (perm, i))
1752 load_seen = true;
1754 if (load_seen)
1755 ncopies_for_cost++;
1756 gcc_assert (ncopies_for_cost
1757 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1758 + assumed_nunits - 1) / assumed_nunits);
1759 poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1760 ncopies_for_cost *= estimated_poly_value (uf);
1762 /* Record the cost for the vector loads. */
1763 vect_model_load_cost (stmt_info, ncopies_for_cost,
1764 memory_access_type, node, prologue_cost_vec,
1765 body_cost_vec);
1766 return;
1769 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1771 /* ncopies_for_cost is the number of IVs we generate. */
1772 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1773 stmt_info, 0, vect_body);
1775 /* Prologue cost for the initial values and step vector. */
1776 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1777 CONSTANT_CLASS_P
1778 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1779 (stmt_info))
1780 ? vector_load : vec_construct,
1781 stmt_info, 0, vect_prologue);
1782 record_stmt_cost (prologue_cost_vec, 1,
1783 CONSTANT_CLASS_P
1784 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1785 ? vector_load : vec_construct,
1786 stmt_info, 0, vect_prologue);
1788 /* ??? No easy way to get at the actual number of vector stmts
1789 to be geneated and thus the derived IVs. */
1791 else
1793 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1794 stmt_info, 0, vect_body);
1795 if (SLP_TREE_TWO_OPERATORS (node))
1797 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1798 stmt_info, 0, vect_body);
1799 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1800 stmt_info, 0, vect_body);
1804 /* Push SLP node def-type to stmts. */
1805 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1806 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1807 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1808 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1810 /* Scan operands and account for prologue cost of constants/externals.
1811 ??? This over-estimates cost for multiple uses and should be
1812 re-engineered. */
1813 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1814 lhs = gimple_get_lhs (stmt);
1815 for (i = 0; i < gimple_num_ops (stmt); ++i)
1817 tree op = gimple_op (stmt, i);
1818 gimple *def_stmt;
1819 enum vect_def_type dt;
1820 if (!op || op == lhs)
1821 continue;
1822 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1824 /* Without looking at the actual initializer a vector of
1825 constants can be implemented as load from the constant pool.
1826 ??? We need to pass down stmt_info for a vector type
1827 even if it points to the wrong stmt. */
1828 if (dt == vect_constant_def)
1829 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1830 stmt_info, 0, vect_prologue);
1831 else if (dt == vect_external_def)
1832 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1833 stmt_info, 0, vect_prologue);
1837 /* Restore stmt def-types. */
1838 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1839 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1840 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1841 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1844 /* Compute the cost for the SLP instance INSTANCE. */
1846 static void
1847 vect_analyze_slp_cost (slp_instance instance, void *data)
1849 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1850 unsigned ncopies_for_cost;
1851 stmt_info_for_cost *si;
1852 unsigned i;
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE, vect_location,
1856 "=== vect_analyze_slp_cost ===\n");
1858 /* Calculate the number of vector stmts to create based on the unrolling
1859 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1860 GROUP_SIZE / NUNITS otherwise. */
1861 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1862 slp_tree node = SLP_INSTANCE_TREE (instance);
1863 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1864 /* Get the estimated vectorization factor, which is always one for
1865 basic-block vectorization. */
1866 unsigned int assumed_vf;
1867 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1868 assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
1869 else
1870 assumed_vf = 1;
1871 /* For reductions look at a reduction operand in case the reduction
1872 operation is widening like DOT_PROD or SAD. */
1873 tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
1874 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1876 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1877 switch (gimple_assign_rhs_code (stmt))
1879 case DOT_PROD_EXPR:
1880 case SAD_EXPR:
1881 vectype_for_cost = get_vectype_for_scalar_type
1882 (TREE_TYPE (gimple_assign_rhs1 (stmt)));
1883 break;
1884 default:;
1887 unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
1888 ncopies_for_cost = (least_common_multiple (assumed_nunits,
1889 group_size * assumed_vf)
1890 / assumed_nunits);
1892 prologue_cost_vec.create (10);
1893 body_cost_vec.create (10);
1894 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1895 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1896 &prologue_cost_vec, &body_cost_vec,
1897 ncopies_for_cost, visited);
1898 delete visited;
1900 /* Record the prologue costs, which were delayed until we were
1901 sure that SLP was successful. */
1902 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1904 struct _stmt_vec_info *stmt_info
1905 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1906 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1907 si->misalign, vect_prologue);
1910 /* Record the instance's instructions in the target cost model. */
1911 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1913 struct _stmt_vec_info *stmt_info
1914 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1915 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1916 si->misalign, vect_body);
1919 prologue_cost_vec.release ();
1920 body_cost_vec.release ();
1923 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1924 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1925 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1926 containing the remainder.
1927 Return the first stmt in the second group. */
1929 static gimple *
1930 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1932 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1933 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1934 gcc_assert (group1_size > 0);
1935 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1936 gcc_assert (group2_size > 0);
1937 GROUP_SIZE (first_vinfo) = group1_size;
1939 gimple *stmt = first_stmt;
1940 for (unsigned i = group1_size; i > 1; i--)
1942 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1943 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1945 /* STMT is now the last element of the first group. */
1946 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1947 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1949 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1950 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1952 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1953 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1956 /* For the second group, the GROUP_GAP is that before the original group,
1957 plus skipping over the first vector. */
1958 GROUP_GAP (vinfo_for_stmt (group2)) =
1959 GROUP_GAP (first_vinfo) + group1_size;
1961 /* GROUP_GAP of the first group now has to skip over the second group too. */
1962 GROUP_GAP (first_vinfo) += group2_size;
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1966 group1_size, group2_size);
1968 return group2;
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1974 static poly_uint64
1975 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1977 return exact_div (common_multiple (nunits, group_size), group_size);
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1984 static bool
1985 vect_analyze_slp_instance (vec_info *vinfo,
1986 gimple *stmt, unsigned max_tree_size)
1988 slp_instance new_instance;
1989 slp_tree node;
1990 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1991 tree vectype, scalar_type = NULL_TREE;
1992 gimple *next;
1993 unsigned int i;
1994 vec<slp_tree> loads;
1995 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1996 vec<gimple *> scalar_stmts;
1998 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2000 if (dr)
2002 scalar_type = TREE_TYPE (DR_REF (dr));
2003 vectype = get_vectype_for_scalar_type (scalar_type);
2005 else
2007 gcc_assert (is_a <loop_vec_info> (vinfo));
2008 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2011 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
2013 else
2015 gcc_assert (is_a <loop_vec_info> (vinfo));
2016 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2017 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2020 if (!vectype)
2022 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2025 "Build SLP failed: unsupported data-type ");
2026 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2027 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2030 return false;
2032 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts.create (group_size);
2036 next = stmt;
2037 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2040 while (next)
2042 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2043 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2044 scalar_stmts.safe_push (
2045 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2046 else
2047 scalar_stmts.safe_push (next);
2048 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2050 /* Mark the first element of the reduction chain as reduction to properly
2051 transform the node. In the reduction analysis phase only the last
2052 element of the chain is marked as reduction. */
2053 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2054 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2056 else
2058 /* Collect reduction statements. */
2059 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2060 for (i = 0; reductions.iterate (i, &next); i++)
2061 scalar_stmts.safe_push (next);
2064 loads.create (group_size);
2066 /* Build the tree for the SLP instance. */
2067 bool *matches = XALLOCAVEC (bool, group_size);
2068 unsigned npermutes = 0;
2069 bst_fail = new scalar_stmts_set_t ();
2070 poly_uint64 max_nunits = nunits;
2071 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2072 &max_nunits, &loads, matches, &npermutes,
2073 NULL, max_tree_size);
2074 delete bst_fail;
2075 if (node != NULL)
2077 /* Calculate the unrolling factor based on the smallest type. */
2078 poly_uint64 unrolling_factor
2079 = calculate_unrolling_factor (max_nunits, group_size);
2081 if (maybe_ne (unrolling_factor, 1U)
2082 && is_a <bb_vec_info> (vinfo))
2084 unsigned HOST_WIDE_INT const_max_nunits;
2085 if (!max_nunits.is_constant (&const_max_nunits)
2086 || const_max_nunits > group_size)
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2090 "Build SLP failed: store group "
2091 "size not a multiple of the vector size "
2092 "in basic block SLP\n");
2093 vect_free_slp_tree (node);
2094 loads.release ();
2095 return false;
2097 /* Fatal mismatch. */
2098 matches[group_size / const_max_nunits * const_max_nunits] = false;
2099 vect_free_slp_tree (node);
2100 loads.release ();
2102 else
2104 /* Create a new SLP instance. */
2105 new_instance = XNEW (struct _slp_instance);
2106 SLP_INSTANCE_TREE (new_instance) = node;
2107 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2108 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2109 SLP_INSTANCE_LOADS (new_instance) = loads;
2111 /* Compute the load permutation. */
2112 slp_tree load_node;
2113 bool loads_permuted = false;
2114 FOR_EACH_VEC_ELT (loads, i, load_node)
2116 vec<unsigned> load_permutation;
2117 int j;
2118 gimple *load, *first_stmt;
2119 bool this_load_permuted = false;
2120 load_permutation.create (group_size);
2121 first_stmt = GROUP_FIRST_ELEMENT
2122 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2123 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2125 int load_place = vect_get_place_in_interleaving_chain
2126 (load, first_stmt);
2127 gcc_assert (load_place != -1);
2128 if (load_place != j)
2129 this_load_permuted = true;
2130 load_permutation.safe_push (load_place);
2132 if (!this_load_permuted
2133 /* The load requires permutation when unrolling exposes
2134 a gap either because the group is larger than the SLP
2135 group-size or because there is a gap between the groups. */
2136 && (known_eq (unrolling_factor, 1U)
2137 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2138 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2140 load_permutation.release ();
2141 continue;
2143 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2144 loads_permuted = true;
2147 if (loads_permuted)
2149 if (!vect_supported_load_permutation_p (new_instance))
2151 if (dump_enabled_p ())
2153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2154 "Build SLP failed: unsupported load "
2155 "permutation ");
2156 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2157 TDF_SLIM, stmt, 0);
2159 vect_free_slp_instance (new_instance);
2160 return false;
2164 /* If the loads and stores can be handled with load/store-lan
2165 instructions do not generate this SLP instance. */
2166 if (is_a <loop_vec_info> (vinfo)
2167 && loads_permuted
2168 && dr && vect_store_lanes_supported (vectype, group_size))
2170 slp_tree load_node;
2171 FOR_EACH_VEC_ELT (loads, i, load_node)
2173 gimple *first_stmt = GROUP_FIRST_ELEMENT
2174 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2175 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2176 /* Use SLP for strided accesses (or if we
2177 can't load-lanes). */
2178 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2179 || ! vect_load_lanes_supported
2180 (STMT_VINFO_VECTYPE (stmt_vinfo),
2181 GROUP_SIZE (stmt_vinfo)))
2182 break;
2184 if (i == loads.length ())
2186 if (dump_enabled_p ())
2187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2188 "Built SLP cancelled: can use "
2189 "load/store-lanes\n");
2190 vect_free_slp_instance (new_instance);
2191 return false;
2195 vinfo->slp_instances.safe_push (new_instance);
2197 if (dump_enabled_p ())
2199 dump_printf_loc (MSG_NOTE, vect_location,
2200 "Final SLP tree for instance:\n");
2201 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2204 return true;
2207 else
2209 /* Failed to SLP. */
2210 /* Free the allocated memory. */
2211 scalar_stmts.release ();
2212 loads.release ();
2215 /* For basic block SLP, try to break the group up into multiples of the
2216 vector size. */
2217 unsigned HOST_WIDE_INT const_nunits;
2218 if (is_a <bb_vec_info> (vinfo)
2219 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2220 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2221 && nunits.is_constant (&const_nunits))
2223 /* We consider breaking the group only on VF boundaries from the existing
2224 start. */
2225 for (i = 0; i < group_size; i++)
2226 if (!matches[i]) break;
2228 if (i >= const_nunits && i < group_size)
2230 /* Split into two groups at the first vector boundary before i. */
2231 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2232 unsigned group1_size = i & ~(const_nunits - 1);
2234 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2235 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2236 /* If the first non-match was in the middle of a vector,
2237 skip the rest of that vector. */
2238 if (group1_size < i)
2240 i = group1_size + const_nunits;
2241 if (i < group_size)
2242 rest = vect_split_slp_store_group (rest, const_nunits);
2244 if (i < group_size)
2245 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2246 return res;
2248 /* Even though the first vector did not all match, we might be able to SLP
2249 (some) of the remainder. FORNOW ignore this possibility. */
2252 return false;
2256 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2257 trees of packed scalar stmts if SLP is possible. */
2259 bool
2260 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2262 unsigned int i;
2263 gimple *first_element;
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2268 /* Find SLP sequences starting from groups of grouped stores. */
2269 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2270 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2272 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2274 if (loop_vinfo->reduction_chains.length () > 0)
2276 /* Find SLP sequences starting from reduction chains. */
2277 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2278 if (! vect_analyze_slp_instance (vinfo, first_element,
2279 max_tree_size))
2281 /* Dissolve reduction chain group. */
2282 gimple *next, *stmt = first_element;
2283 while (stmt)
2285 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2286 next = GROUP_NEXT_ELEMENT (vinfo);
2287 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2288 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2289 stmt = next;
2291 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2292 = vect_internal_def;
2296 /* Find SLP sequences starting from groups of reductions. */
2297 if (loop_vinfo->reductions.length () > 1)
2298 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2299 max_tree_size);
2302 return true;
2306 /* For each possible SLP instance decide whether to SLP it and calculate overall
2307 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2308 least one instance. */
2310 bool
2311 vect_make_slp_decision (loop_vec_info loop_vinfo)
2313 unsigned int i;
2314 poly_uint64 unrolling_factor = 1;
2315 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2316 slp_instance instance;
2317 int decided_to_slp = 0;
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2321 "\n");
2323 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2325 /* FORNOW: SLP if you can. */
2326 /* All unroll factors have the form current_vector_size * X for some
2327 rational X, so they must have a common multiple. */
2328 unrolling_factor
2329 = force_common_multiple (unrolling_factor,
2330 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2332 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2333 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2334 loop-based vectorization. Such stmts will be marked as HYBRID. */
2335 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2336 decided_to_slp++;
2339 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2341 if (decided_to_slp && dump_enabled_p ())
2343 dump_printf_loc (MSG_NOTE, vect_location,
2344 "Decided to SLP %d instances. Unrolling factor ",
2345 decided_to_slp);
2346 dump_dec (MSG_NOTE, unrolling_factor);
2347 dump_printf (MSG_NOTE, "\n");
2350 return (decided_to_slp > 0);
2354 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2355 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2357 static void
2358 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2360 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2361 imm_use_iterator imm_iter;
2362 gimple *use_stmt;
2363 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2364 slp_tree child;
2365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2366 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2367 int j;
2369 /* Propagate hybrid down the SLP tree. */
2370 if (stype == hybrid)
2372 else if (HYBRID_SLP_STMT (stmt_vinfo))
2373 stype = hybrid;
2374 else
2376 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2377 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2378 /* If we get a pattern stmt here we have to use the LHS of the
2379 original stmt for immediate uses. */
2380 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2381 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2383 tree def;
2384 if (gimple_code (stmt) == GIMPLE_PHI)
2385 def = gimple_phi_result (stmt);
2386 else
2387 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2388 if (def)
2389 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2391 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2392 continue;
2393 use_vinfo = vinfo_for_stmt (use_stmt);
2394 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2395 && STMT_VINFO_RELATED_STMT (use_vinfo))
2396 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2397 if (!STMT_SLP_TYPE (use_vinfo)
2398 && (STMT_VINFO_RELEVANT (use_vinfo)
2399 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2400 && !(gimple_code (use_stmt) == GIMPLE_PHI
2401 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2403 if (dump_enabled_p ())
2405 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2406 "def in non-SLP stmt: ");
2407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2409 stype = hybrid;
2414 if (stype == hybrid
2415 && !HYBRID_SLP_STMT (stmt_vinfo))
2417 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2420 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2422 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2425 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2426 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2427 vect_detect_hybrid_slp_stmts (child, i, stype);
2430 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2432 static tree
2433 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2435 walk_stmt_info *wi = (walk_stmt_info *)data;
2436 struct loop *loopp = (struct loop *)wi->info;
2438 if (wi->is_lhs)
2439 return NULL_TREE;
2441 if (TREE_CODE (*tp) == SSA_NAME
2442 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2444 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2445 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2446 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2448 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2451 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2453 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2457 return NULL_TREE;
2460 static tree
2461 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2462 walk_stmt_info *)
2464 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2465 /* If the stmt is in a SLP instance then this isn't a reason
2466 to mark use definitions in other SLP instances as hybrid. */
2467 if (! STMT_SLP_TYPE (use_vinfo)
2468 && (STMT_VINFO_RELEVANT (use_vinfo)
2469 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2470 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2471 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2473 else
2474 *handled = true;
2475 return NULL_TREE;
2478 /* Find stmts that must be both vectorized and SLPed. */
2480 void
2481 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2483 unsigned int i;
2484 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2485 slp_instance instance;
2487 if (dump_enabled_p ())
2488 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2489 "\n");
2491 /* First walk all pattern stmt in the loop and mark defs of uses as
2492 hybrid because immediate uses in them are not recorded. */
2493 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2495 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2496 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2497 gsi_next (&gsi))
2499 gimple *stmt = gsi_stmt (gsi);
2500 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2501 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2503 walk_stmt_info wi;
2504 memset (&wi, 0, sizeof (wi));
2505 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2506 gimple_stmt_iterator gsi2
2507 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2508 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2509 vect_detect_hybrid_slp_1, &wi);
2510 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2511 vect_detect_hybrid_slp_2,
2512 vect_detect_hybrid_slp_1, &wi);
2517 /* Then walk the SLP instance trees marking stmts with uses in
2518 non-SLP stmts as hybrid, also propagating hybrid down the
2519 SLP tree, collecting the above info on-the-fly. */
2520 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2522 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2523 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2524 i, pure_slp);
2529 /* Initialize a bb_vec_info struct for the statements between
2530 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2532 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2533 gimple_stmt_iterator region_end_in)
2534 : vec_info (vec_info::bb, init_cost (NULL)),
2535 bb (gsi_bb (region_begin_in)),
2536 region_begin (region_begin_in),
2537 region_end (region_end_in)
2539 gimple_stmt_iterator gsi;
2541 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2542 gsi_next (&gsi))
2544 gimple *stmt = gsi_stmt (gsi);
2545 gimple_set_uid (stmt, 0);
2546 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2549 bb->aux = this;
2553 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2554 stmts in the basic block. */
2556 _bb_vec_info::~_bb_vec_info ()
2558 for (gimple_stmt_iterator si = region_begin;
2559 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2561 gimple *stmt = gsi_stmt (si);
2562 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2564 if (stmt_info)
2565 /* Free stmt_vec_info. */
2566 free_stmt_vec_info (stmt);
2568 /* Reset region marker. */
2569 gimple_set_uid (stmt, -1);
2572 bb->aux = NULL;
2576 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2577 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2579 Return true if the operations are supported. */
2581 static bool
2582 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2583 slp_instance node_instance)
2585 bool dummy;
2586 int i, j;
2587 gimple *stmt;
2588 slp_tree child;
2590 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2591 return true;
2593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2594 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2595 return false;
2597 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2599 gcc_assert (stmt_info);
2600 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2602 /* For BB vectorization vector types are assigned here.
2603 Memory accesses already got their vector type assigned
2604 in vect_analyze_data_refs. */
2605 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2606 if (bb_vinfo
2607 && ! STMT_VINFO_DATA_REF (stmt_info))
2609 gcc_assert (PURE_SLP_STMT (stmt_info));
2611 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2612 if (dump_enabled_p ())
2614 dump_printf_loc (MSG_NOTE, vect_location,
2615 "get vectype for scalar type: ");
2616 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2617 dump_printf (MSG_NOTE, "\n");
2620 tree vectype = get_vectype_for_scalar_type (scalar_type);
2621 if (!vectype)
2623 if (dump_enabled_p ())
2625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2626 "not SLPed: unsupported data-type ");
2627 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2628 scalar_type);
2629 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2631 return false;
2634 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2637 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2638 dump_printf (MSG_NOTE, "\n");
2641 gimple *sstmt;
2642 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2643 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2646 /* Calculate the number of vector statements to be created for the
2647 scalar stmts in this node. For SLP reductions it is equal to the
2648 number of vector statements in the children (which has already been
2649 calculated by the recursive call). Otherwise it is the number of
2650 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2651 VF divided by the number of elements in a vector. */
2652 if (GROUP_FIRST_ELEMENT (stmt_info)
2653 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2654 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2655 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2656 else
2658 poly_uint64 vf;
2659 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2660 vf = loop_vinfo->vectorization_factor;
2661 else
2662 vf = 1;
2663 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2664 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2665 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2666 = vect_get_num_vectors (vf * group_size, vectype);
2669 /* Push SLP node def-type to stmt operands. */
2670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2671 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2672 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2673 = SLP_TREE_DEF_TYPE (child);
2674 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2675 /* Restore def-types. */
2676 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2677 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2678 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2679 = vect_internal_def;
2680 if (! res)
2681 return false;
2683 return true;
2687 /* Analyze statements in SLP instances of VINFO. Return true if the
2688 operations are supported. */
2690 bool
2691 vect_slp_analyze_operations (vec_info *vinfo)
2693 slp_instance instance;
2694 int i;
2696 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_NOTE, vect_location,
2698 "=== vect_slp_analyze_operations ===\n");
2700 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2702 if (!vect_slp_analyze_node_operations (vinfo,
2703 SLP_INSTANCE_TREE (instance),
2704 instance))
2706 dump_printf_loc (MSG_NOTE, vect_location,
2707 "removing SLP instance operations starting from: ");
2708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2709 SLP_TREE_SCALAR_STMTS
2710 (SLP_INSTANCE_TREE (instance))[0], 0);
2711 vect_free_slp_instance (instance);
2712 vinfo->slp_instances.ordered_remove (i);
2714 else
2716 /* Compute the costs of the SLP instance. */
2717 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2718 i++;
2722 return !vinfo->slp_instances.is_empty ();
2726 /* Compute the scalar cost of the SLP node NODE and its children
2727 and return it. Do not account defs that are marked in LIFE and
2728 update LIFE according to uses of NODE. */
2730 static unsigned
2731 vect_bb_slp_scalar_cost (basic_block bb,
2732 slp_tree node, vec<bool, va_heap> *life)
2734 unsigned scalar_cost = 0;
2735 unsigned i;
2736 gimple *stmt;
2737 slp_tree child;
2739 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2741 unsigned stmt_cost;
2742 ssa_op_iter op_iter;
2743 def_operand_p def_p;
2744 stmt_vec_info stmt_info;
2746 if ((*life)[i])
2747 continue;
2749 /* If there is a non-vectorized use of the defs then the scalar
2750 stmt is kept live in which case we do not account it or any
2751 required defs in the SLP children in the scalar cost. This
2752 way we make the vectorization more costly when compared to
2753 the scalar cost. */
2754 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2756 imm_use_iterator use_iter;
2757 gimple *use_stmt;
2758 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2759 if (!is_gimple_debug (use_stmt)
2760 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2761 use_stmt)
2762 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2764 (*life)[i] = true;
2765 BREAK_FROM_IMM_USE_STMT (use_iter);
2768 if ((*life)[i])
2769 continue;
2771 /* Count scalar stmts only once. */
2772 if (gimple_visited_p (stmt))
2773 continue;
2774 gimple_set_visited (stmt, true);
2776 stmt_info = vinfo_for_stmt (stmt);
2777 if (STMT_VINFO_DATA_REF (stmt_info))
2779 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2780 stmt_cost = vect_get_stmt_cost (scalar_load);
2781 else
2782 stmt_cost = vect_get_stmt_cost (scalar_store);
2784 else
2785 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2787 scalar_cost += stmt_cost;
2790 auto_vec<bool, 20> subtree_life;
2791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2793 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2795 /* Do not directly pass LIFE to the recursive call, copy it to
2796 confine changes in the callee to the current child/subtree. */
2797 subtree_life.safe_splice (*life);
2798 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2799 subtree_life.truncate (0);
2803 return scalar_cost;
2806 /* Check if vectorization of the basic block is profitable. */
2808 static bool
2809 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2811 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2812 slp_instance instance;
2813 int i;
2814 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2815 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2817 /* Calculate scalar cost. */
2818 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2820 auto_vec<bool, 20> life;
2821 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2822 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2823 SLP_INSTANCE_TREE (instance),
2824 &life);
2827 /* Unset visited flag. */
2828 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2829 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2830 gimple_set_visited (gsi_stmt (gsi), false);
2832 /* Complete the target-specific cost calculation. */
2833 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2834 &vec_inside_cost, &vec_epilogue_cost);
2836 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2838 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2841 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2842 vec_inside_cost);
2843 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2844 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2845 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2848 /* Vectorization is profitable if its cost is more than the cost of scalar
2849 version. Note that we err on the vector side for equal cost because
2850 the cost estimate is otherwise quite pessimistic (constant uses are
2851 free on the scalar side but cost a load on the vector side for
2852 example). */
2853 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2854 return false;
2856 return true;
2859 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2860 if so and sets fatal to true if failure is independent of
2861 current_vector_size. */
2863 static bb_vec_info
2864 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2865 gimple_stmt_iterator region_end,
2866 vec<data_reference_p> datarefs, int n_stmts,
2867 bool &fatal)
2869 bb_vec_info bb_vinfo;
2870 slp_instance instance;
2871 int i;
2872 poly_uint64 min_vf = 2;
2874 /* The first group of checks is independent of the vector size. */
2875 fatal = true;
2877 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2881 "not vectorized: too many instructions in "
2882 "basic block.\n");
2883 free_data_refs (datarefs);
2884 return NULL;
2887 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2888 if (!bb_vinfo)
2889 return NULL;
2891 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2893 /* Analyze the data references. */
2895 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2897 if (dump_enabled_p ())
2898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2899 "not vectorized: unhandled data-ref in basic "
2900 "block.\n");
2902 delete bb_vinfo;
2903 return NULL;
2906 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2908 if (dump_enabled_p ())
2909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2910 "not vectorized: not enough data-refs in "
2911 "basic block.\n");
2913 delete bb_vinfo;
2914 return NULL;
2917 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2921 "not vectorized: unhandled data access in "
2922 "basic block.\n");
2924 delete bb_vinfo;
2925 return NULL;
2928 /* If there are no grouped stores in the region there is no need
2929 to continue with pattern recog as vect_analyze_slp will fail
2930 anyway. */
2931 if (bb_vinfo->grouped_stores.is_empty ())
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2935 "not vectorized: no grouped stores in "
2936 "basic block.\n");
2938 delete bb_vinfo;
2939 return NULL;
2942 /* While the rest of the analysis below depends on it in some way. */
2943 fatal = false;
2945 vect_pattern_recog (bb_vinfo);
2947 /* Check the SLP opportunities in the basic block, analyze and build SLP
2948 trees. */
2949 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2951 if (dump_enabled_p ())
2953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2954 "Failed to SLP the basic block.\n");
2955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2956 "not vectorized: failed to find SLP opportunities "
2957 "in basic block.\n");
2960 delete bb_vinfo;
2961 return NULL;
2964 vect_record_base_alignments (bb_vinfo);
2966 /* Analyze and verify the alignment of data references and the
2967 dependence in the SLP instances. */
2968 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2970 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2971 || ! vect_slp_analyze_instance_dependence (instance))
2973 dump_printf_loc (MSG_NOTE, vect_location,
2974 "removing SLP instance operations starting from: ");
2975 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2976 SLP_TREE_SCALAR_STMTS
2977 (SLP_INSTANCE_TREE (instance))[0], 0);
2978 vect_free_slp_instance (instance);
2979 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2980 continue;
2983 /* Mark all the statements that we want to vectorize as pure SLP and
2984 relevant. */
2985 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2986 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2988 i++;
2990 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2992 delete bb_vinfo;
2993 return NULL;
2996 if (!vect_slp_analyze_operations (bb_vinfo))
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3000 "not vectorized: bad operation in basic block.\n");
3002 delete bb_vinfo;
3003 return NULL;
3006 /* Cost model: check if the vectorization is worthwhile. */
3007 if (!unlimited_cost_model (NULL)
3008 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3012 "not vectorized: vectorization is not "
3013 "profitable.\n");
3015 delete bb_vinfo;
3016 return NULL;
3019 if (dump_enabled_p ())
3020 dump_printf_loc (MSG_NOTE, vect_location,
3021 "Basic block will be vectorized using SLP\n");
3023 return bb_vinfo;
3027 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3028 true if anything in the basic-block was vectorized. */
3030 bool
3031 vect_slp_bb (basic_block bb)
3033 bb_vec_info bb_vinfo;
3034 gimple_stmt_iterator gsi;
3035 bool any_vectorized = false;
3036 auto_vector_sizes vector_sizes;
3038 if (dump_enabled_p ())
3039 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3041 /* Autodetect first vector size we try. */
3042 current_vector_size = 0;
3043 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3044 unsigned int next_size = 0;
3046 gsi = gsi_start_bb (bb);
3048 poly_uint64 autodetected_vector_size = 0;
3049 while (1)
3051 if (gsi_end_p (gsi))
3052 break;
3054 gimple_stmt_iterator region_begin = gsi;
3055 vec<data_reference_p> datarefs = vNULL;
3056 int insns = 0;
3058 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3060 gimple *stmt = gsi_stmt (gsi);
3061 if (is_gimple_debug (stmt))
3062 continue;
3063 insns++;
3065 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3066 vect_location = gimple_location (stmt);
3068 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3069 break;
3072 /* Skip leading unhandled stmts. */
3073 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3075 gsi_next (&gsi);
3076 continue;
3079 gimple_stmt_iterator region_end = gsi;
3081 bool vectorized = false;
3082 bool fatal = false;
3083 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3084 datarefs, insns, fatal);
3085 if (bb_vinfo
3086 && dbg_cnt (vect_slp))
3088 if (dump_enabled_p ())
3089 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3091 vect_schedule_slp (bb_vinfo);
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_NOTE, vect_location,
3095 "basic block part vectorized\n");
3097 vectorized = true;
3099 delete bb_vinfo;
3101 any_vectorized |= vectorized;
3103 if (next_size == 0)
3104 autodetected_vector_size = current_vector_size;
3106 if (next_size < vector_sizes.length ()
3107 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3108 next_size += 1;
3110 if (vectorized
3111 || next_size == vector_sizes.length ()
3112 || known_eq (current_vector_size, 0U)
3113 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3114 vector sizes will fail do not bother iterating. */
3115 || fatal)
3117 if (gsi_end_p (region_end))
3118 break;
3120 /* Skip the unhandled stmt. */
3121 gsi_next (&gsi);
3123 /* And reset vector sizes. */
3124 current_vector_size = 0;
3125 next_size = 0;
3127 else
3129 /* Try the next biggest vector size. */
3130 current_vector_size = vector_sizes[next_size++];
3131 if (dump_enabled_p ())
3133 dump_printf_loc (MSG_NOTE, vect_location,
3134 "***** Re-trying analysis with "
3135 "vector size ");
3136 dump_dec (MSG_NOTE, current_vector_size);
3137 dump_printf (MSG_NOTE, "\n");
3140 /* Start over. */
3141 gsi = region_begin;
3145 return any_vectorized;
3149 /* Return 1 if vector type of boolean constant which is OPNUM
3150 operand in statement STMT is a boolean vector. */
3152 static bool
3153 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3155 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3156 enum tree_code code = gimple_expr_code (stmt);
3157 tree op, vectype;
3158 gimple *def_stmt;
3159 enum vect_def_type dt;
3161 /* For comparison and COND_EXPR type is chosen depending
3162 on the other comparison operand. */
3163 if (TREE_CODE_CLASS (code) == tcc_comparison)
3165 if (opnum)
3166 op = gimple_assign_rhs1 (stmt);
3167 else
3168 op = gimple_assign_rhs2 (stmt);
3170 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3171 &dt, &vectype))
3172 gcc_unreachable ();
3174 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3177 if (code == COND_EXPR)
3179 tree cond = gimple_assign_rhs1 (stmt);
3181 if (TREE_CODE (cond) == SSA_NAME)
3182 op = cond;
3183 else if (opnum)
3184 op = TREE_OPERAND (cond, 1);
3185 else
3186 op = TREE_OPERAND (cond, 0);
3188 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3189 &dt, &vectype))
3190 gcc_unreachable ();
3192 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3195 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3199 /* For constant and loop invariant defs of SLP_NODE this function returns
3200 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3201 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3202 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3203 REDUC_INDEX is the index of the reduction operand in the statements, unless
3204 it is -1. */
3206 static void
3207 vect_get_constant_vectors (tree op, slp_tree slp_node,
3208 vec<tree> *vec_oprnds,
3209 unsigned int op_num, unsigned int number_of_vectors)
3211 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3212 gimple *stmt = stmts[0];
3213 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3214 unsigned nunits;
3215 tree vec_cst;
3216 unsigned j, number_of_places_left_in_vector;
3217 tree vector_type;
3218 tree vop;
3219 int group_size = stmts.length ();
3220 unsigned int vec_num, i;
3221 unsigned number_of_copies = 1;
3222 vec<tree> voprnds;
3223 voprnds.create (number_of_vectors);
3224 bool constant_p, is_store;
3225 tree neutral_op = NULL;
3226 enum tree_code code = gimple_expr_code (stmt);
3227 gimple_seq ctor_seq = NULL;
3229 /* Check if vector type is a boolean vector. */
3230 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3231 && vect_mask_constant_operand_p (stmt, op_num))
3232 vector_type
3233 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3234 else
3235 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3236 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3238 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3240 is_store = true;
3241 op = gimple_assign_rhs1 (stmt);
3243 else
3244 is_store = false;
3246 gcc_assert (op);
3248 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3249 created vectors. It is greater than 1 if unrolling is performed.
3251 For example, we have two scalar operands, s1 and s2 (e.g., group of
3252 strided accesses of size two), while NUNITS is four (i.e., four scalars
3253 of this type can be packed in a vector). The output vector will contain
3254 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3255 will be 2).
3257 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3258 containing the operands.
3260 For example, NUNITS is four as before, and the group size is 8
3261 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3262 {s5, s6, s7, s8}. */
3264 number_of_copies = nunits * number_of_vectors / group_size;
3266 number_of_places_left_in_vector = nunits;
3267 constant_p = true;
3268 tree_vector_builder elts (vector_type, nunits, 1);
3269 elts.quick_grow (nunits);
3270 bool place_after_defs = false;
3271 for (j = 0; j < number_of_copies; j++)
3273 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3275 if (is_store)
3276 op = gimple_assign_rhs1 (stmt);
3277 else
3279 switch (code)
3281 case COND_EXPR:
3283 tree cond = gimple_assign_rhs1 (stmt);
3284 if (TREE_CODE (cond) == SSA_NAME)
3285 op = gimple_op (stmt, op_num + 1);
3286 else if (op_num == 0 || op_num == 1)
3287 op = TREE_OPERAND (cond, op_num);
3288 else
3290 if (op_num == 2)
3291 op = gimple_assign_rhs2 (stmt);
3292 else
3293 op = gimple_assign_rhs3 (stmt);
3296 break;
3298 case CALL_EXPR:
3299 op = gimple_call_arg (stmt, op_num);
3300 break;
3302 case LSHIFT_EXPR:
3303 case RSHIFT_EXPR:
3304 case LROTATE_EXPR:
3305 case RROTATE_EXPR:
3306 op = gimple_op (stmt, op_num + 1);
3307 /* Unlike the other binary operators, shifts/rotates have
3308 the shift count being int, instead of the same type as
3309 the lhs, so make sure the scalar is the right type if
3310 we are dealing with vectors of
3311 long long/long/short/char. */
3312 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3313 op = fold_convert (TREE_TYPE (vector_type), op);
3314 break;
3316 default:
3317 op = gimple_op (stmt, op_num + 1);
3318 break;
3322 /* Create 'vect_ = {op0,op1,...,opn}'. */
3323 number_of_places_left_in_vector--;
3324 tree orig_op = op;
3325 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3327 if (CONSTANT_CLASS_P (op))
3329 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3331 /* Can't use VIEW_CONVERT_EXPR for booleans because
3332 of possibly different sizes of scalar value and
3333 vector element. */
3334 if (integer_zerop (op))
3335 op = build_int_cst (TREE_TYPE (vector_type), 0);
3336 else if (integer_onep (op))
3337 op = build_all_ones_cst (TREE_TYPE (vector_type));
3338 else
3339 gcc_unreachable ();
3341 else
3342 op = fold_unary (VIEW_CONVERT_EXPR,
3343 TREE_TYPE (vector_type), op);
3344 gcc_assert (op && CONSTANT_CLASS_P (op));
3346 else
3348 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3349 gimple *init_stmt;
3350 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3352 tree true_val
3353 = build_all_ones_cst (TREE_TYPE (vector_type));
3354 tree false_val
3355 = build_zero_cst (TREE_TYPE (vector_type));
3356 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3357 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3358 op, true_val,
3359 false_val);
3361 else
3363 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3364 op);
3365 init_stmt
3366 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3367 op);
3369 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3370 op = new_temp;
3373 elts[number_of_places_left_in_vector] = op;
3374 if (!CONSTANT_CLASS_P (op))
3375 constant_p = false;
3376 if (TREE_CODE (orig_op) == SSA_NAME
3377 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3378 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3379 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3380 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3381 place_after_defs = true;
3383 if (number_of_places_left_in_vector == 0)
3385 if (constant_p)
3386 vec_cst = elts.build ();
3387 else
3389 vec<constructor_elt, va_gc> *v;
3390 unsigned k;
3391 vec_alloc (v, nunits);
3392 for (k = 0; k < nunits; ++k)
3393 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3394 vec_cst = build_constructor (vector_type, v);
3396 tree init;
3397 gimple_stmt_iterator gsi;
3398 if (place_after_defs)
3400 gsi = gsi_for_stmt
3401 (vect_find_last_scalar_stmt_in_slp (slp_node));
3402 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3404 else
3405 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3406 if (ctor_seq != NULL)
3408 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3409 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3410 GSI_SAME_STMT);
3411 ctor_seq = NULL;
3413 voprnds.quick_push (init);
3414 place_after_defs = false;
3415 number_of_places_left_in_vector = nunits;
3416 constant_p = true;
3417 elts.new_vector (vector_type, nunits, 1);
3418 elts.quick_grow (nunits);
3423 /* Since the vectors are created in the reverse order, we should invert
3424 them. */
3425 vec_num = voprnds.length ();
3426 for (j = vec_num; j != 0; j--)
3428 vop = voprnds[j - 1];
3429 vec_oprnds->quick_push (vop);
3432 voprnds.release ();
3434 /* In case that VF is greater than the unrolling factor needed for the SLP
3435 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3436 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3437 to replicate the vectors. */
3438 while (number_of_vectors > vec_oprnds->length ())
3440 tree neutral_vec = NULL;
3442 if (neutral_op)
3444 if (!neutral_vec)
3445 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3447 vec_oprnds->quick_push (neutral_vec);
3449 else
3451 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3452 vec_oprnds->quick_push (vop);
3458 /* Get vectorized definitions from SLP_NODE that contains corresponding
3459 vectorized def-stmts. */
3461 static void
3462 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3464 tree vec_oprnd;
3465 gimple *vec_def_stmt;
3466 unsigned int i;
3468 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3470 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3472 gcc_assert (vec_def_stmt);
3473 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3474 vec_oprnd = gimple_phi_result (vec_def_stmt);
3475 else
3476 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3477 vec_oprnds->quick_push (vec_oprnd);
3482 /* Get vectorized definitions for SLP_NODE.
3483 If the scalar definitions are loop invariants or constants, collect them and
3484 call vect_get_constant_vectors() to create vector stmts.
3485 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3486 must be stored in the corresponding child of SLP_NODE, and we call
3487 vect_get_slp_vect_defs () to retrieve them. */
3489 void
3490 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3491 vec<vec<tree> > *vec_oprnds)
3493 gimple *first_stmt;
3494 int number_of_vects = 0, i;
3495 unsigned int child_index = 0;
3496 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3497 slp_tree child = NULL;
3498 vec<tree> vec_defs;
3499 tree oprnd;
3500 bool vectorized_defs;
3502 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3503 FOR_EACH_VEC_ELT (ops, i, oprnd)
3505 /* For each operand we check if it has vectorized definitions in a child
3506 node or we need to create them (for invariants and constants). We
3507 check if the LHS of the first stmt of the next child matches OPRND.
3508 If it does, we found the correct child. Otherwise, we call
3509 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3510 to check this child node for the next operand. */
3511 vectorized_defs = false;
3512 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3514 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3516 /* We have to check both pattern and original def, if available. */
3517 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3519 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3520 gimple *related
3521 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3522 tree first_def_op;
3524 if (gimple_code (first_def) == GIMPLE_PHI)
3525 first_def_op = gimple_phi_result (first_def);
3526 else
3527 first_def_op = gimple_get_lhs (first_def);
3528 if (operand_equal_p (oprnd, first_def_op, 0)
3529 || (related
3530 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3532 /* The number of vector defs is determined by the number of
3533 vector statements in the node from which we get those
3534 statements. */
3535 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3536 vectorized_defs = true;
3537 child_index++;
3540 else
3541 child_index++;
3544 if (!vectorized_defs)
3546 if (i == 0)
3548 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3549 /* Number of vector stmts was calculated according to LHS in
3550 vect_schedule_slp_instance (), fix it by replacing LHS with
3551 RHS, if necessary. See vect_get_smallest_scalar_type () for
3552 details. */
3553 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3554 &rhs_size_unit);
3555 if (rhs_size_unit != lhs_size_unit)
3557 number_of_vects *= rhs_size_unit;
3558 number_of_vects /= lhs_size_unit;
3563 /* Allocate memory for vectorized defs. */
3564 vec_defs = vNULL;
3565 vec_defs.create (number_of_vects);
3567 /* For reduction defs we call vect_get_constant_vectors (), since we are
3568 looking for initial loop invariant values. */
3569 if (vectorized_defs)
3570 /* The defs are already vectorized. */
3571 vect_get_slp_vect_defs (child, &vec_defs);
3572 else
3573 /* Build vectors from scalar defs. */
3574 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3575 number_of_vects);
3577 vec_oprnds->quick_push (vec_defs);
3581 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3582 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3583 permute statements for the SLP node NODE of the SLP instance
3584 SLP_NODE_INSTANCE. */
3586 bool
3587 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3588 gimple_stmt_iterator *gsi, poly_uint64 vf,
3589 slp_instance slp_node_instance, bool analyze_only,
3590 unsigned *n_perms)
3592 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3594 tree mask_element_type = NULL_TREE, mask_type;
3595 int nunits, vec_index = 0;
3596 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3597 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3598 int mask_element;
3599 machine_mode mode;
3600 unsigned HOST_WIDE_INT const_vf;
3602 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3603 return false;
3605 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3607 mode = TYPE_MODE (vectype);
3609 /* At the moment, all permutations are represented using per-element
3610 indices, so we can't cope with variable vectorization factors. */
3611 if (!vf.is_constant (&const_vf))
3612 return false;
3614 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3615 same size as the vector element being permuted. */
3616 mask_element_type = lang_hooks.types.type_for_mode
3617 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3618 mask_type = get_vectype_for_scalar_type (mask_element_type);
3619 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3620 vec_perm_builder mask (nunits, nunits, 1);
3621 mask.quick_grow (nunits);
3622 vec_perm_indices indices;
3624 /* Initialize the vect stmts of NODE to properly insert the generated
3625 stmts later. */
3626 if (! analyze_only)
3627 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3628 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3629 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3631 /* Generate permutation masks for every NODE. Number of masks for each NODE
3632 is equal to GROUP_SIZE.
3633 E.g., we have a group of three nodes with three loads from the same
3634 location in each node, and the vector size is 4. I.e., we have a
3635 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3636 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3637 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3640 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3641 The last mask is illegal since we assume two operands for permute
3642 operation, and the mask element values can't be outside that range.
3643 Hence, the last mask must be converted into {2,5,5,5}.
3644 For the first two permutations we need the first and the second input
3645 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3646 we need the second and the third vectors: {b1,c1,a2,b2} and
3647 {c2,a3,b3,c3}. */
3649 int vect_stmts_counter = 0;
3650 int index = 0;
3651 int first_vec_index = -1;
3652 int second_vec_index = -1;
3653 bool noop_p = true;
3654 *n_perms = 0;
3656 for (unsigned int j = 0; j < const_vf; j++)
3658 for (int k = 0; k < group_size; k++)
3660 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3661 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3662 vec_index = i / nunits;
3663 mask_element = i % nunits;
3664 if (vec_index == first_vec_index
3665 || first_vec_index == -1)
3667 first_vec_index = vec_index;
3669 else if (vec_index == second_vec_index
3670 || second_vec_index == -1)
3672 second_vec_index = vec_index;
3673 mask_element += nunits;
3675 else
3677 if (dump_enabled_p ())
3679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3680 "permutation requires at "
3681 "least three vectors ");
3682 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3683 stmt, 0);
3685 gcc_assert (analyze_only);
3686 return false;
3689 gcc_assert (mask_element >= 0
3690 && mask_element < 2 * nunits);
3691 if (mask_element != index)
3692 noop_p = false;
3693 mask[index++] = mask_element;
3695 if (index == nunits && !noop_p)
3697 indices.new_vector (mask, 2, nunits);
3698 if (!can_vec_perm_const_p (mode, indices))
3700 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3703 vect_location,
3704 "unsupported vect permute { ");
3705 for (i = 0; i < nunits; ++i)
3706 dump_printf (MSG_MISSED_OPTIMIZATION,
3707 HOST_WIDE_INT_PRINT_DEC " ", mask[i]);
3708 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3710 gcc_assert (analyze_only);
3711 return false;
3714 ++*n_perms;
3717 if (index == nunits)
3719 if (!analyze_only)
3721 tree mask_vec = NULL_TREE;
3723 if (! noop_p)
3724 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3726 if (second_vec_index == -1)
3727 second_vec_index = first_vec_index;
3729 /* Generate the permute statement if necessary. */
3730 tree first_vec = dr_chain[first_vec_index];
3731 tree second_vec = dr_chain[second_vec_index];
3732 gimple *perm_stmt;
3733 if (! noop_p)
3735 tree perm_dest
3736 = vect_create_destination_var (gimple_assign_lhs (stmt),
3737 vectype);
3738 perm_dest = make_ssa_name (perm_dest);
3739 perm_stmt = gimple_build_assign (perm_dest,
3740 VEC_PERM_EXPR,
3741 first_vec, second_vec,
3742 mask_vec);
3743 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3745 else
3746 /* If mask was NULL_TREE generate the requested
3747 identity transform. */
3748 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3750 /* Store the vector statement in NODE. */
3751 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3754 index = 0;
3755 first_vec_index = -1;
3756 second_vec_index = -1;
3757 noop_p = true;
3762 return true;
3765 typedef hash_map <vec <gimple *>, slp_tree,
3766 simple_hashmap_traits <bst_traits, slp_tree> >
3767 scalar_stmts_to_slp_tree_map_t;
3769 /* Vectorize SLP instance tree in postorder. */
3771 static bool
3772 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3773 scalar_stmts_to_slp_tree_map_t *bst_map)
3775 gimple *stmt;
3776 bool grouped_store, is_store;
3777 gimple_stmt_iterator si;
3778 stmt_vec_info stmt_info;
3779 unsigned int group_size;
3780 tree vectype;
3781 int i, j;
3782 slp_tree child;
3784 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3785 return false;
3787 /* See if we have already vectorized the same set of stmts and reuse their
3788 vectorized stmts. */
3789 slp_tree &leader
3790 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3791 if (leader)
3793 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3794 return false;
3797 leader = node;
3798 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3799 vect_schedule_slp_instance (child, instance, bst_map);
3801 /* Push SLP node def-type to stmts. */
3802 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3803 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3804 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3805 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3807 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3808 stmt_info = vinfo_for_stmt (stmt);
3810 /* VECTYPE is the type of the destination. */
3811 vectype = STMT_VINFO_VECTYPE (stmt_info);
3812 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3814 if (!SLP_TREE_VEC_STMTS (node).exists ())
3815 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3817 if (dump_enabled_p ())
3819 dump_printf_loc (MSG_NOTE,vect_location,
3820 "------>vectorizing SLP node starting from: ");
3821 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3824 /* Vectorized stmts go before the last scalar stmt which is where
3825 all uses are ready. */
3826 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3828 /* Mark the first element of the reduction chain as reduction to properly
3829 transform the node. In the analysis phase only the last element of the
3830 chain is marked as reduction. */
3831 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3832 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3834 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3835 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3838 /* Handle two-operation SLP nodes by vectorizing the group with
3839 both operations and then performing a merge. */
3840 if (SLP_TREE_TWO_OPERATORS (node))
3842 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3843 enum tree_code ocode = ERROR_MARK;
3844 gimple *ostmt;
3845 vec_perm_builder mask (group_size, group_size, 1);
3846 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3847 if (gimple_assign_rhs_code (ostmt) != code0)
3849 mask.quick_push (1);
3850 ocode = gimple_assign_rhs_code (ostmt);
3852 else
3853 mask.quick_push (0);
3854 if (ocode != ERROR_MARK)
3856 vec<gimple *> v0;
3857 vec<gimple *> v1;
3858 unsigned j;
3859 tree tmask = NULL_TREE;
3860 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3861 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3862 SLP_TREE_VEC_STMTS (node).truncate (0);
3863 gimple_assign_set_rhs_code (stmt, ocode);
3864 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3865 gimple_assign_set_rhs_code (stmt, code0);
3866 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3867 SLP_TREE_VEC_STMTS (node).truncate (0);
3868 tree meltype = build_nonstandard_integer_type
3869 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3870 tree mvectype = get_same_sized_vectype (meltype, vectype);
3871 unsigned k = 0, l;
3872 for (j = 0; j < v0.length (); ++j)
3874 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3875 tree_vector_builder melts (mvectype, nunits, 1);
3876 for (l = 0; l < nunits; ++l)
3878 if (k >= group_size)
3879 k = 0;
3880 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3881 melts.quick_push (t);
3883 tmask = melts.build ();
3885 /* ??? Not all targets support a VEC_PERM_EXPR with a
3886 constant mask that would translate to a vec_merge RTX
3887 (with their vec_perm_const_ok). We can either not
3888 vectorize in that case or let veclower do its job.
3889 Unfortunately that isn't too great and at least for
3890 plus/minus we'd eventually like to match targets
3891 vector addsub instructions. */
3892 gimple *vstmt;
3893 vstmt = gimple_build_assign (make_ssa_name (vectype),
3894 VEC_PERM_EXPR,
3895 gimple_assign_lhs (v0[j]),
3896 gimple_assign_lhs (v1[j]), tmask);
3897 vect_finish_stmt_generation (stmt, vstmt, &si);
3898 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3900 v0.release ();
3901 v1.release ();
3902 return false;
3905 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3907 /* Restore stmt def-types. */
3908 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3909 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3910 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3911 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3913 return is_store;
3916 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3917 For loop vectorization this is done in vectorizable_call, but for SLP
3918 it needs to be deferred until end of vect_schedule_slp, because multiple
3919 SLP instances may refer to the same scalar stmt. */
3921 static void
3922 vect_remove_slp_scalar_calls (slp_tree node)
3924 gimple *stmt, *new_stmt;
3925 gimple_stmt_iterator gsi;
3926 int i;
3927 slp_tree child;
3928 tree lhs;
3929 stmt_vec_info stmt_info;
3931 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3932 return;
3934 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3935 vect_remove_slp_scalar_calls (child);
3937 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3939 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3940 continue;
3941 stmt_info = vinfo_for_stmt (stmt);
3942 if (stmt_info == NULL
3943 || is_pattern_stmt_p (stmt_info)
3944 || !PURE_SLP_STMT (stmt_info))
3945 continue;
3946 lhs = gimple_call_lhs (stmt);
3947 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3948 set_vinfo_for_stmt (new_stmt, stmt_info);
3949 set_vinfo_for_stmt (stmt, NULL);
3950 STMT_VINFO_STMT (stmt_info) = new_stmt;
3951 gsi = gsi_for_stmt (stmt);
3952 gsi_replace (&gsi, new_stmt, false);
3953 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3957 /* Generate vector code for all SLP instances in the loop/basic block. */
3959 bool
3960 vect_schedule_slp (vec_info *vinfo)
3962 vec<slp_instance> slp_instances;
3963 slp_instance instance;
3964 unsigned int i;
3965 bool is_store = false;
3967 slp_instances = vinfo->slp_instances;
3968 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3970 /* Schedule the tree of INSTANCE. */
3971 scalar_stmts_to_slp_tree_map_t *bst_map
3972 = new scalar_stmts_to_slp_tree_map_t ();
3973 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3974 instance, bst_map);
3975 delete bst_map;
3976 if (dump_enabled_p ())
3977 dump_printf_loc (MSG_NOTE, vect_location,
3978 "vectorizing stmts using SLP.\n");
3981 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3983 slp_tree root = SLP_INSTANCE_TREE (instance);
3984 gimple *store;
3985 unsigned int j;
3986 gimple_stmt_iterator gsi;
3988 /* Remove scalar call stmts. Do not do this for basic-block
3989 vectorization as not all uses may be vectorized.
3990 ??? Why should this be necessary? DCE should be able to
3991 remove the stmts itself.
3992 ??? For BB vectorization we can as well remove scalar
3993 stmts starting from the SLP tree root if they have no
3994 uses. */
3995 if (is_a <loop_vec_info> (vinfo))
3996 vect_remove_slp_scalar_calls (root);
3998 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3999 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4001 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4002 break;
4004 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4005 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4006 /* Free the attached stmt_vec_info and remove the stmt. */
4007 gsi = gsi_for_stmt (store);
4008 unlink_stmt_vdef (store);
4009 gsi_remove (&gsi, true);
4010 release_defs (store);
4011 free_stmt_vec_info (store);
4015 return is_store;