PR testsuite/86649
[official-gcc.git] / gcc / tree-vect-slp.c
blob8dc5763015280acf8473a2651bb8180ff60c45cd
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
52 static void
53 vect_free_slp_tree (slp_tree node)
55 int i;
56 slp_tree child;
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
59 vect_free_slp_tree (child);
61 gimple *stmt;
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
70 SLP_TREE_CHILDREN (node).release ();
71 SLP_TREE_SCALAR_STMTS (node).release ();
72 SLP_TREE_VEC_STMTS (node).release ();
73 SLP_TREE_LOAD_PERMUTATION (node).release ();
75 free (node);
79 /* Free the memory allocated for the SLP instance. */
81 void
82 vect_free_slp_instance (slp_instance instance)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
85 SLP_INSTANCE_LOADS (instance).release ();
86 free (instance);
90 /* Create an SLP node for SCALAR_STMTS. */
92 static slp_tree
93 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
95 slp_tree node;
96 gimple *stmt = scalar_stmts[0];
97 unsigned int nops;
99 if (is_gimple_call (stmt))
100 nops = gimple_call_num_args (stmt);
101 else if (is_gimple_assign (stmt))
103 nops = gimple_num_ops (stmt) - 1;
104 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
105 nops++;
107 else if (gimple_code (stmt) == GIMPLE_PHI)
108 nops = 0;
109 else
110 return NULL;
112 node = XNEW (struct _slp_tree);
113 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
114 SLP_TREE_VEC_STMTS (node).create (0);
115 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
116 SLP_TREE_CHILDREN (node).create (nops);
117 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
118 SLP_TREE_TWO_OPERATORS (node) = false;
119 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
121 unsigned i;
122 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
125 return node;
129 /* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
131 node. */
132 typedef struct _slp_oprnd_info
134 /* Def-stmts for the operands. */
135 vec<gimple *> def_stmts;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
138 stmt. */
139 tree first_op_type;
140 enum vect_def_type first_dt;
141 bool first_pattern;
142 bool second_pattern;
143 } *slp_oprnd_info;
146 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 operand. */
148 static vec<slp_oprnd_info>
149 vect_create_oprnd_info (int nops, int group_size)
151 int i;
152 slp_oprnd_info oprnd_info;
153 vec<slp_oprnd_info> oprnds_info;
155 oprnds_info.create (nops);
156 for (i = 0; i < nops; i++)
158 oprnd_info = XNEW (struct _slp_oprnd_info);
159 oprnd_info->def_stmts.create (group_size);
160 oprnd_info->first_dt = vect_uninitialized_def;
161 oprnd_info->first_op_type = NULL_TREE;
162 oprnd_info->first_pattern = false;
163 oprnd_info->second_pattern = false;
164 oprnds_info.quick_push (oprnd_info);
167 return oprnds_info;
171 /* Free operands info. */
173 static void
174 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
176 int i;
177 slp_oprnd_info oprnd_info;
179 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
181 oprnd_info->def_stmts.release ();
182 XDELETE (oprnd_info);
185 oprnds_info.release ();
189 /* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
195 gimple *next_stmt = first_stmt;
196 int result = 0;
198 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
199 return -1;
203 if (next_stmt == stmt)
204 return result;
205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
206 if (next_stmt)
207 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
209 while (next_stmt);
211 return -1;
214 /* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
218 (if nonnull). */
220 bool
221 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
222 unsigned int *nvectors_out,
223 tree *vector_type_out,
224 tree *permutes)
226 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
227 poly_int64 nelts;
228 unsigned int nvectors = 1;
229 for (;;)
231 scalar_int_mode int_mode;
232 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
233 if (multiple_p (current_vector_size, elt_bytes, &nelts)
234 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
236 tree int_type = build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode), 1);
238 tree vector_type = build_vector_type (int_type, nelts);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
241 vec_perm_builder sel1 (nelts, 2, 3);
242 vec_perm_builder sel2 (nelts, 2, 3);
243 poly_int64 half_nelts = exact_div (nelts, 2);
244 for (unsigned int i = 0; i < 3; ++i)
246 sel1.quick_push (i);
247 sel1.quick_push (i + nelts);
248 sel2.quick_push (half_nelts + i);
249 sel2.quick_push (half_nelts + i + nelts);
251 vec_perm_indices indices1 (sel1, 2, nelts);
252 vec_perm_indices indices2 (sel2, 2, nelts);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
256 if (nvectors_out)
257 *nvectors_out = nvectors;
258 if (vector_type_out)
259 *vector_type_out = vector_type;
260 if (permutes)
262 permutes[0] = vect_gen_perm_mask_checked (vector_type,
263 indices1);
264 permutes[1] = vect_gen_perm_mask_checked (vector_type,
265 indices2);
267 return true;
271 if (!multiple_p (elt_bytes, 2, &elt_bytes))
272 return false;
273 nvectors *= 2;
277 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
285 value.
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
288 ok return 0. */
289 static int
290 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
291 vec<gimple *> stmts, unsigned stmt_num,
292 vec<slp_oprnd_info> *oprnds_info)
294 gimple *stmt = stmts[stmt_num];
295 tree oprnd;
296 unsigned int i, number_of_oprnds;
297 gimple *def_stmt;
298 enum vect_def_type dt = vect_uninitialized_def;
299 bool pattern = false;
300 slp_oprnd_info oprnd_info;
301 int first_op_idx = 1;
302 bool commutative = false;
303 bool first_op_cond = false;
304 bool first = stmt_num == 0;
305 bool second = stmt_num == 1;
307 if (is_gimple_call (stmt))
309 number_of_oprnds = gimple_call_num_args (stmt);
310 first_op_idx = 3;
312 else if (is_gimple_assign (stmt))
314 enum tree_code code = gimple_assign_rhs_code (stmt);
315 number_of_oprnds = gimple_num_ops (stmt) - 1;
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
318 if (gimple_assign_rhs_code (stmt) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
321 first_op_cond = true;
322 number_of_oprnds++;
324 else
325 commutative = commutative_tree_code (code);
327 else
328 return -1;
330 bool swapped = (*swap != 0);
331 gcc_assert (!swapped || first_op_cond);
332 for (i = 0; i < number_of_oprnds; i++)
334 again:
335 if (first_op_cond)
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map = maps[*swap];
341 if (i < 2)
342 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
343 else
344 oprnd = gimple_op (stmt, map[i]);
346 else
347 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
349 oprnd_info = (*oprnds_info)[i];
351 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt))
353 if (dump_enabled_p ())
355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
356 "Build SLP failed: can't analyze def for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
358 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
361 return -1;
364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
365 from the pattern. Check that all the stmts of the node are in the
366 pattern. */
367 if (def_stmt && gimple_bb (def_stmt)
368 && vect_stmt_in_region_p (vinfo, def_stmt)
369 && vinfo_for_stmt (def_stmt)
370 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt)))
372 pattern = true;
373 if (!first && !oprnd_info->first_pattern
374 /* Allow different pattern state for the defs of the
375 first stmt in reduction chains. */
376 && (oprnd_info->first_dt != vect_reduction_def
377 || (!second && !oprnd_info->second_pattern)))
379 if (i == 0
380 && !swapped
381 && commutative)
383 swapped = true;
384 goto again;
387 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
390 "Build SLP failed: some of the stmts"
391 " are in a pattern, and others are not ");
392 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
393 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
396 return 1;
399 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
401 if (dt == vect_unknown_def_type)
403 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Unsupported pattern.\n");
406 return -1;
409 switch (gimple_code (def_stmt))
411 case GIMPLE_PHI:
412 case GIMPLE_ASSIGN:
413 break;
415 default:
416 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 "unsupported defining stmt:\n");
419 return -1;
423 if (second)
424 oprnd_info->second_pattern = pattern;
426 if (first)
428 oprnd_info->first_dt = dt;
429 oprnd_info->first_pattern = pattern;
430 oprnd_info->first_op_type = TREE_TYPE (oprnd);
432 else
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
438 vect_internal_def. */
439 tree type = TREE_TYPE (oprnd);
440 if ((oprnd_info->first_dt != dt
441 && !(oprnd_info->first_dt == vect_reduction_def
442 && dt == vect_internal_def)
443 && !((oprnd_info->first_dt == vect_external_def
444 || oprnd_info->first_dt == vect_constant_def)
445 && (dt == vect_external_def
446 || dt == vect_constant_def)))
447 || !types_compatible_p (oprnd_info->first_op_type, type))
449 /* Try swapping operands if we got a mismatch. */
450 if (i == 0
451 && !swapped
452 && commutative)
454 swapped = true;
455 goto again;
458 if (dump_enabled_p ())
459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
460 "Build SLP failed: different types\n");
462 return 1;
464 if ((dt == vect_constant_def
465 || dt == vect_external_def)
466 && !current_vector_size.is_constant ()
467 && (TREE_CODE (type) == BOOLEAN_TYPE
468 || !can_duplicate_and_interleave_p (stmts.length (),
469 TYPE_MODE (type))))
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
474 "Build SLP failed: invalid type of def "
475 "for variable-length SLP ");
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
477 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
479 return -1;
483 /* Check the types of the definitions. */
484 switch (dt)
486 case vect_constant_def:
487 case vect_external_def:
488 break;
490 case vect_reduction_def:
491 case vect_induction_def:
492 case vect_internal_def:
493 oprnd_info->def_stmts.quick_push (def_stmt);
494 break;
496 default:
497 /* FORNOW: Not supported. */
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "Build SLP failed: illegal type of def ");
502 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
506 return -1;
510 /* Swap operands. */
511 if (swapped)
513 /* If there are already uses of this stmt in a SLP instance then
514 we've committed to the operand order and can't swap it. */
515 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
517 if (dump_enabled_p ())
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
520 "Build SLP failed: cannot swap operands of "
521 "shared stmt ");
522 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
524 return -1;
527 if (first_op_cond)
529 tree cond = gimple_assign_rhs1 (stmt);
530 enum tree_code code = TREE_CODE (cond);
532 /* Swap. */
533 if (*swap == 1)
535 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
536 &TREE_OPERAND (cond, 1));
537 TREE_SET_CODE (cond, swap_tree_comparison (code));
539 /* Invert. */
540 else
542 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
543 gimple_assign_rhs3_ptr (stmt));
544 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
545 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
546 gcc_assert (code != ERROR_MARK);
547 TREE_SET_CODE (cond, code);
550 else
551 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
552 gimple_assign_rhs2_ptr (stmt));
553 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "swapped operands to match def types in ");
557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
561 *swap = swapped;
562 return 0;
565 /* Return true if call statements CALL1 and CALL2 are similar enough
566 to be combined into the same SLP group. */
568 static bool
569 compatible_calls_p (gcall *call1, gcall *call2)
571 unsigned int nargs = gimple_call_num_args (call1);
572 if (nargs != gimple_call_num_args (call2))
573 return false;
575 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
576 return false;
578 if (gimple_call_internal_p (call1))
580 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
581 TREE_TYPE (gimple_call_lhs (call2))))
582 return false;
583 for (unsigned int i = 0; i < nargs; ++i)
584 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
585 TREE_TYPE (gimple_call_arg (call2, i))))
586 return false;
588 else
590 if (!operand_equal_p (gimple_call_fn (call1),
591 gimple_call_fn (call2), 0))
592 return false;
594 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
595 return false;
597 return true;
600 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
601 caller's attempt to find the vector type in STMT with the narrowest
602 element type. Return true if VECTYPE is nonnull and if it is valid
603 for VINFO. When returning true, update MAX_NUNITS to reflect the
604 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
605 as for vect_build_slp_tree. */
607 static bool
608 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
609 tree vectype, poly_uint64 *max_nunits)
611 if (!vectype)
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: unsupported data-type in ");
617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
618 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
620 /* Fatal mismatch. */
621 return false;
624 /* If populating the vector type requires unrolling then fail
625 before adjusting *max_nunits for basic-block vectorization. */
626 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
627 unsigned HOST_WIDE_INT const_nunits;
628 if (is_a <bb_vec_info> (vinfo)
629 && (!nunits.is_constant (&const_nunits)
630 || const_nunits > group_size))
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633 "Build SLP failed: unrolling required "
634 "in basic block SLP\n");
635 /* Fatal mismatch. */
636 return false;
639 /* In case of multiple types we need to detect the smallest type. */
640 vect_update_max_nunits (max_nunits, vectype);
641 return true;
644 /* STMTS is a group of GROUP_SIZE SLP statements in which some
645 statements do the same operation as the first statement and in which
646 the others do ALT_STMT_CODE. Return true if we can take one vector
647 of the first operation and one vector of the second and permute them
648 to get the required result. VECTYPE is the type of the vector that
649 would be permuted. */
651 static bool
652 vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
653 tree vectype, tree_code alt_stmt_code)
655 unsigned HOST_WIDE_INT count;
656 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
657 return false;
659 vec_perm_builder sel (count, count, 1);
660 for (unsigned int i = 0; i < count; ++i)
662 unsigned int elt = i;
663 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
664 elt += count;
665 sel.quick_push (elt);
667 vec_perm_indices indices (sel, 2, count);
668 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
671 /* Verify if the scalar stmts STMTS are isomorphic, require data
672 permutation or are of unsupported types of operation. Return
673 true if they are, otherwise return false and indicate in *MATCHES
674 which stmts are not isomorphic to the first one. If MATCHES[0]
675 is false then this indicates the comparison could not be
676 carried out or the stmts will never be vectorized by SLP.
678 Note COND_EXPR is possibly ismorphic to another one after swapping its
679 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
680 the first stmt by swapping the two operands of comparison; set SWAP[i]
681 to 2 if stmt I is isormorphic to the first stmt by inverting the code
682 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
683 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
685 static bool
686 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
687 vec<gimple *> stmts, unsigned int group_size,
688 poly_uint64 *max_nunits, bool *matches,
689 bool *two_operators)
691 unsigned int i;
692 gimple *first_stmt = stmts[0], *stmt = stmts[0];
693 enum tree_code first_stmt_code = ERROR_MARK;
694 enum tree_code alt_stmt_code = ERROR_MARK;
695 enum tree_code rhs_code = ERROR_MARK;
696 enum tree_code first_cond_code = ERROR_MARK;
697 tree lhs;
698 bool need_same_oprnds = false;
699 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
700 optab optab;
701 int icode;
702 machine_mode optab_op2_mode;
703 machine_mode vec_mode;
704 gimple *first_load = NULL, *prev_first_load = NULL;
706 /* For every stmt in NODE find its def stmt/s. */
707 FOR_EACH_VEC_ELT (stmts, i, stmt)
709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
710 swap[i] = 0;
711 matches[i] = false;
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
719 /* Fail to vectorize statements marked as unvectorizable. */
720 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
722 if (dump_enabled_p ())
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725 "Build SLP failed: unvectorizable statement ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
728 /* Fatal mismatch. */
729 matches[0] = false;
730 return false;
733 lhs = gimple_get_lhs (stmt);
734 if (lhs == NULL_TREE)
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: not GIMPLE_ASSIGN nor "
740 "GIMPLE_CALL ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
743 /* Fatal mismatch. */
744 matches[0] = false;
745 return false;
748 tree nunits_vectype;
749 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
750 &nunits_vectype)
751 || (nunits_vectype
752 && !vect_record_max_nunits (vinfo, stmt, group_size,
753 nunits_vectype, max_nunits)))
755 /* Fatal mismatch. */
756 matches[0] = false;
757 return false;
760 gcc_assert (vectype);
762 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
764 rhs_code = CALL_EXPR;
765 if ((gimple_call_internal_p (call_stmt)
766 && (!vectorizable_internal_fn_p
767 (gimple_call_internal_fn (call_stmt))))
768 || gimple_call_tail_p (call_stmt)
769 || gimple_call_noreturn_p (call_stmt)
770 || !gimple_call_nothrow_p (call_stmt)
771 || gimple_call_chain (call_stmt))
773 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: unsupported call type ");
777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
778 call_stmt, 0);
780 /* Fatal mismatch. */
781 matches[0] = false;
782 return false;
785 else
786 rhs_code = gimple_assign_rhs_code (stmt);
788 /* Check the operation. */
789 if (i == 0)
791 first_stmt_code = rhs_code;
793 /* Shift arguments should be equal in all the packed stmts for a
794 vector shift with scalar shift operand. */
795 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
796 || rhs_code == LROTATE_EXPR
797 || rhs_code == RROTATE_EXPR)
799 if (vectype == boolean_type_node)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
803 "Build SLP failed: shift of a"
804 " boolean.\n");
805 /* Fatal mismatch. */
806 matches[0] = false;
807 return false;
810 vec_mode = TYPE_MODE (vectype);
812 /* First see if we have a vector/vector shift. */
813 optab = optab_for_tree_code (rhs_code, vectype,
814 optab_vector);
816 if (!optab
817 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
819 /* No vector/vector shift, try for a vector/scalar shift. */
820 optab = optab_for_tree_code (rhs_code, vectype,
821 optab_scalar);
823 if (!optab)
825 if (dump_enabled_p ())
826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
827 "Build SLP failed: no optab.\n");
828 /* Fatal mismatch. */
829 matches[0] = false;
830 return false;
832 icode = (int) optab_handler (optab, vec_mode);
833 if (icode == CODE_FOR_nothing)
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: "
838 "op not supported by target.\n");
839 /* Fatal mismatch. */
840 matches[0] = false;
841 return false;
843 optab_op2_mode = insn_data[icode].operand[2].mode;
844 if (!VECTOR_MODE_P (optab_op2_mode))
846 need_same_oprnds = true;
847 first_op1 = gimple_assign_rhs2 (stmt);
851 else if (rhs_code == WIDEN_LSHIFT_EXPR)
853 need_same_oprnds = true;
854 first_op1 = gimple_assign_rhs2 (stmt);
857 else
859 if (first_stmt_code != rhs_code
860 && alt_stmt_code == ERROR_MARK)
861 alt_stmt_code = rhs_code;
862 if (first_stmt_code != rhs_code
863 && (first_stmt_code != IMAGPART_EXPR
864 || rhs_code != REALPART_EXPR)
865 && (first_stmt_code != REALPART_EXPR
866 || rhs_code != IMAGPART_EXPR)
867 /* Handle mismatches in plus/minus by computing both
868 and merging the results. */
869 && !((first_stmt_code == PLUS_EXPR
870 || first_stmt_code == MINUS_EXPR)
871 && (alt_stmt_code == PLUS_EXPR
872 || alt_stmt_code == MINUS_EXPR)
873 && rhs_code == alt_stmt_code)
874 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
875 && (first_stmt_code == ARRAY_REF
876 || first_stmt_code == BIT_FIELD_REF
877 || first_stmt_code == INDIRECT_REF
878 || first_stmt_code == COMPONENT_REF
879 || first_stmt_code == MEM_REF)))
881 if (dump_enabled_p ())
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "Build SLP failed: different operation "
885 "in stmt ");
886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "original stmt ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890 first_stmt, 0);
892 /* Mismatch. */
893 continue;
896 if (need_same_oprnds
897 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
899 if (dump_enabled_p ())
901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
902 "Build SLP failed: different shift "
903 "arguments in ");
904 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
906 /* Mismatch. */
907 continue;
910 if (rhs_code == CALL_EXPR)
912 gimple *first_stmt = stmts[0];
913 if (!compatible_calls_p (as_a <gcall *> (first_stmt),
914 as_a <gcall *> (stmt)))
916 if (dump_enabled_p ())
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
919 "Build SLP failed: different calls in ");
920 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
921 stmt, 0);
923 /* Mismatch. */
924 continue;
929 /* Grouped store or load. */
930 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
932 if (REFERENCE_CLASS_P (lhs))
934 /* Store. */
937 else
939 /* Load. */
940 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
941 if (prev_first_load)
943 /* Check that there are no loads from different interleaving
944 chains in the same node. */
945 if (prev_first_load != first_load)
947 if (dump_enabled_p ())
949 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
950 vect_location,
951 "Build SLP failed: different "
952 "interleaving chains in one node ");
953 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
954 stmt, 0);
956 /* Mismatch. */
957 continue;
960 else
961 prev_first_load = first_load;
963 } /* Grouped access. */
964 else
966 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
968 /* Not grouped load. */
969 if (dump_enabled_p ())
971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
972 "Build SLP failed: not grouped load ");
973 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
976 /* FORNOW: Not grouped loads are not supported. */
977 /* Fatal mismatch. */
978 matches[0] = false;
979 return false;
982 /* Not memory operation. */
983 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
984 && TREE_CODE_CLASS (rhs_code) != tcc_unary
985 && TREE_CODE_CLASS (rhs_code) != tcc_expression
986 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
987 && rhs_code != CALL_EXPR)
989 if (dump_enabled_p ())
991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
992 "Build SLP failed: operation");
993 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
994 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
996 /* Fatal mismatch. */
997 matches[0] = false;
998 return false;
1001 if (rhs_code == COND_EXPR)
1003 tree cond_expr = gimple_assign_rhs1 (stmt);
1004 enum tree_code cond_code = TREE_CODE (cond_expr);
1005 enum tree_code swap_code = ERROR_MARK;
1006 enum tree_code invert_code = ERROR_MARK;
1008 if (i == 0)
1009 first_cond_code = TREE_CODE (cond_expr);
1010 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1012 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1013 swap_code = swap_tree_comparison (cond_code);
1014 invert_code = invert_tree_comparison (cond_code, honor_nans);
1017 if (first_cond_code == cond_code)
1019 /* Isomorphic can be achieved by swapping. */
1020 else if (first_cond_code == swap_code)
1021 swap[i] = 1;
1022 /* Isomorphic can be achieved by inverting. */
1023 else if (first_cond_code == invert_code)
1024 swap[i] = 2;
1025 else
1027 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1030 "Build SLP failed: different"
1031 " operation");
1032 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1033 stmt, 0);
1035 /* Mismatch. */
1036 continue;
1041 matches[i] = true;
1044 for (i = 0; i < group_size; ++i)
1045 if (!matches[i])
1046 return false;
1048 /* If we allowed a two-operation SLP node verify the target can cope
1049 with the permute we are going to use. */
1050 if (alt_stmt_code != ERROR_MARK
1051 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1053 if (vectype == boolean_type_node
1054 || !vect_two_operations_perm_ok_p (stmts, group_size,
1055 vectype, alt_stmt_code))
1057 for (i = 0; i < group_size; ++i)
1058 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1060 matches[i] = false;
1061 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1064 "Build SLP failed: different operation "
1065 "in stmt ");
1066 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1067 stmts[i], 0);
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1069 "original stmt ");
1070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1071 first_stmt, 0);
1074 return false;
1076 *two_operators = true;
1079 return true;
1082 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1083 Note we never remove apart from at destruction time so we do not
1084 need a special value for deleted that differs from empty. */
1085 struct bst_traits
1087 typedef vec <gimple *> value_type;
1088 typedef vec <gimple *> compare_type;
1089 static inline hashval_t hash (value_type);
1090 static inline bool equal (value_type existing, value_type candidate);
1091 static inline bool is_empty (value_type x) { return !x.exists (); }
1092 static inline bool is_deleted (value_type x) { return !x.exists (); }
1093 static inline void mark_empty (value_type &x) { x.release (); }
1094 static inline void mark_deleted (value_type &x) { x.release (); }
1095 static inline void remove (value_type &x) { x.release (); }
1097 inline hashval_t
1098 bst_traits::hash (value_type x)
1100 inchash::hash h;
1101 for (unsigned i = 0; i < x.length (); ++i)
1102 h.add_int (gimple_uid (x[i]));
1103 return h.end ();
1105 inline bool
1106 bst_traits::equal (value_type existing, value_type candidate)
1108 if (existing.length () != candidate.length ())
1109 return false;
1110 for (unsigned i = 0; i < existing.length (); ++i)
1111 if (existing[i] != candidate[i])
1112 return false;
1113 return true;
1116 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1117 static scalar_stmts_set_t *bst_fail;
1119 typedef hash_map <vec <gimple *>, slp_tree,
1120 simple_hashmap_traits <bst_traits, slp_tree> >
1121 scalar_stmts_to_slp_tree_map_t;
1123 static slp_tree
1124 vect_build_slp_tree_2 (vec_info *vinfo,
1125 vec<gimple *> stmts, unsigned int group_size,
1126 poly_uint64 *max_nunits,
1127 vec<slp_tree> *loads,
1128 bool *matches, unsigned *npermutes, unsigned *tree_size,
1129 unsigned max_tree_size);
1131 static slp_tree
1132 vect_build_slp_tree (vec_info *vinfo,
1133 vec<gimple *> stmts, unsigned int group_size,
1134 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1135 bool *matches, unsigned *npermutes, unsigned *tree_size,
1136 unsigned max_tree_size)
1138 if (bst_fail->contains (stmts))
1139 return NULL;
1140 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1141 loads, matches, npermutes, tree_size,
1142 max_tree_size);
1143 /* When SLP build fails for stmts record this, otherwise SLP build
1144 can be exponential in time when we allow to construct parts from
1145 scalars, see PR81723. */
1146 if (! res)
1148 vec <gimple *> x;
1149 x.create (stmts.length ());
1150 x.splice (stmts);
1151 bst_fail->add (x);
1153 return res;
1156 /* Recursively build an SLP tree starting from NODE.
1157 Fail (and return a value not equal to zero) if def-stmts are not
1158 isomorphic, require data permutation or are of unsupported types of
1159 operation. Otherwise, return 0.
1160 The value returned is the depth in the SLP tree where a mismatch
1161 was found. */
1163 static slp_tree
1164 vect_build_slp_tree_2 (vec_info *vinfo,
1165 vec<gimple *> stmts, unsigned int group_size,
1166 poly_uint64 *max_nunits,
1167 vec<slp_tree> *loads,
1168 bool *matches, unsigned *npermutes, unsigned *tree_size,
1169 unsigned max_tree_size)
1171 unsigned nops, i, this_tree_size = 0;
1172 poly_uint64 this_max_nunits = *max_nunits;
1173 gimple *stmt;
1174 slp_tree node;
1176 matches[0] = false;
1178 stmt = stmts[0];
1179 if (is_gimple_call (stmt))
1180 nops = gimple_call_num_args (stmt);
1181 else if (is_gimple_assign (stmt))
1183 nops = gimple_num_ops (stmt) - 1;
1184 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1185 nops++;
1187 else if (gimple_code (stmt) == GIMPLE_PHI)
1188 nops = 0;
1189 else
1190 return NULL;
1192 /* If the SLP node is a PHI (induction or reduction), terminate
1193 the recursion. */
1194 if (gimple_code (stmt) == GIMPLE_PHI)
1196 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1197 tree vectype = get_vectype_for_scalar_type (scalar_type);
1198 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1199 max_nunits))
1200 return NULL;
1202 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1203 /* Induction from different IVs is not supported. */
1204 if (def_type == vect_induction_def)
1206 FOR_EACH_VEC_ELT (stmts, i, stmt)
1207 if (stmt != stmts[0])
1208 return NULL;
1210 else
1212 /* Else def types have to match. */
1213 FOR_EACH_VEC_ELT (stmts, i, stmt)
1215 /* But for reduction chains only check on the first stmt. */
1216 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1217 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1218 continue;
1219 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1220 return NULL;
1223 node = vect_create_new_slp_node (stmts);
1224 return node;
1228 bool two_operators = false;
1229 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1230 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
1231 &this_max_nunits, matches, &two_operators))
1232 return NULL;
1234 /* If the SLP node is a load, terminate the recursion. */
1235 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1236 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1238 *max_nunits = this_max_nunits;
1239 node = vect_create_new_slp_node (stmts);
1240 loads->safe_push (node);
1241 return node;
1244 /* Get at the operands, verifying they are compatible. */
1245 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1246 slp_oprnd_info oprnd_info;
1247 FOR_EACH_VEC_ELT (stmts, i, stmt)
1249 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1250 stmts, i, &oprnds_info);
1251 if (res != 0)
1252 matches[(res == -1) ? 0 : i] = false;
1253 if (!matches[0])
1254 break;
1256 for (i = 0; i < group_size; ++i)
1257 if (!matches[i])
1259 vect_free_oprnd_info (oprnds_info);
1260 return NULL;
1263 auto_vec<slp_tree, 4> children;
1264 auto_vec<slp_tree> this_loads;
1266 stmt = stmts[0];
1268 if (tree_size)
1269 max_tree_size -= *tree_size;
1271 /* Create SLP_TREE nodes for the definition node/s. */
1272 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1274 slp_tree child;
1275 unsigned old_nloads = this_loads.length ();
1276 unsigned old_tree_size = this_tree_size;
1277 unsigned int j;
1279 if (oprnd_info->first_dt != vect_internal_def
1280 && oprnd_info->first_dt != vect_reduction_def
1281 && oprnd_info->first_dt != vect_induction_def)
1282 continue;
1284 if (++this_tree_size > max_tree_size)
1286 FOR_EACH_VEC_ELT (children, j, child)
1287 vect_free_slp_tree (child);
1288 vect_free_oprnd_info (oprnds_info);
1289 return NULL;
1292 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1293 group_size, &this_max_nunits,
1294 &this_loads, matches, npermutes,
1295 &this_tree_size,
1296 max_tree_size)) != NULL)
1298 /* If we have all children of child built up from scalars then just
1299 throw that away and build it up this node from scalars. */
1300 if (!SLP_TREE_CHILDREN (child).is_empty ()
1301 /* ??? Rejecting patterns this way doesn't work. We'd have to
1302 do extra work to cancel the pattern so the uses see the
1303 scalar version. */
1304 && !is_pattern_stmt_p
1305 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1307 slp_tree grandchild;
1309 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1310 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1311 break;
1312 if (!grandchild)
1314 /* Roll back. */
1315 this_loads.truncate (old_nloads);
1316 this_tree_size = old_tree_size;
1317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1318 vect_free_slp_tree (grandchild);
1319 SLP_TREE_CHILDREN (child).truncate (0);
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "Building parent vector operands from "
1323 "scalars instead\n");
1324 oprnd_info->def_stmts = vNULL;
1325 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1326 children.safe_push (child);
1327 continue;
1331 oprnd_info->def_stmts = vNULL;
1332 children.safe_push (child);
1333 continue;
1336 /* If the SLP build failed fatally and we analyze a basic-block
1337 simply treat nodes we fail to build as externally defined
1338 (and thus build vectors from the scalar defs).
1339 The cost model will reject outright expensive cases.
1340 ??? This doesn't treat cases where permutation ultimatively
1341 fails (or we don't try permutation below). Ideally we'd
1342 even compute a permutation that will end up with the maximum
1343 SLP tree size... */
1344 if (is_a <bb_vec_info> (vinfo)
1345 && !matches[0]
1346 /* ??? Rejecting patterns this way doesn't work. We'd have to
1347 do extra work to cancel the pattern so the uses see the
1348 scalar version. */
1349 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1351 dump_printf_loc (MSG_NOTE, vect_location,
1352 "Building vector operands from scalars\n");
1353 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1354 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1355 children.safe_push (child);
1356 oprnd_info->def_stmts = vNULL;
1357 continue;
1360 /* If the SLP build for operand zero failed and operand zero
1361 and one can be commutated try that for the scalar stmts
1362 that failed the match. */
1363 if (i == 0
1364 /* A first scalar stmt mismatch signals a fatal mismatch. */
1365 && matches[0]
1366 /* ??? For COND_EXPRs we can swap the comparison operands
1367 as well as the arms under some constraints. */
1368 && nops == 2
1369 && oprnds_info[1]->first_dt == vect_internal_def
1370 && is_gimple_assign (stmt)
1371 /* Do so only if the number of not successful permutes was nor more
1372 than a cut-ff as re-trying the recursive match on
1373 possibly each level of the tree would expose exponential
1374 behavior. */
1375 && *npermutes < 4)
1377 /* See whether we can swap the matching or the non-matching
1378 stmt operands. */
1379 bool swap_not_matching = true;
1382 for (j = 0; j < group_size; ++j)
1384 if (matches[j] != !swap_not_matching)
1385 continue;
1386 gimple *stmt = stmts[j];
1387 /* Verify if we can swap operands of this stmt. */
1388 if (!is_gimple_assign (stmt)
1389 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1391 if (!swap_not_matching)
1392 goto fail;
1393 swap_not_matching = false;
1394 break;
1396 /* Verify if we can safely swap or if we committed to a
1397 specific operand order already.
1398 ??? Instead of modifying GIMPLE stmts here we could
1399 record whether we want to swap operands in the SLP
1400 node and temporarily do that when processing it
1401 (or wrap operand accessors in a helper). */
1402 else if (swap[j] != 0
1403 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1405 if (!swap_not_matching)
1407 if (dump_enabled_p ())
1409 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1410 vect_location,
1411 "Build SLP failed: cannot swap "
1412 "operands of shared stmt ");
1413 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1414 TDF_SLIM, stmts[j], 0);
1416 goto fail;
1418 swap_not_matching = false;
1419 break;
1423 while (j != group_size);
1425 /* Swap mismatched definition stmts. */
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Re-trying with swapped operands of stmts ");
1428 for (j = 0; j < group_size; ++j)
1429 if (matches[j] == !swap_not_matching)
1431 std::swap (oprnds_info[0]->def_stmts[j],
1432 oprnds_info[1]->def_stmts[j]);
1433 dump_printf (MSG_NOTE, "%d ", j);
1435 dump_printf (MSG_NOTE, "\n");
1436 /* And try again with scratch 'matches' ... */
1437 bool *tem = XALLOCAVEC (bool, group_size);
1438 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1439 group_size, &this_max_nunits,
1440 &this_loads, tem, npermutes,
1441 &this_tree_size,
1442 max_tree_size)) != NULL)
1444 /* ... so if successful we can apply the operand swapping
1445 to the GIMPLE IL. This is necessary because for example
1446 vect_get_slp_defs uses operand indexes and thus expects
1447 canonical operand order. This is also necessary even
1448 if we end up building the operand from scalars as
1449 we'll continue to process swapped operand two. */
1450 for (j = 0; j < group_size; ++j)
1452 gimple *stmt = stmts[j];
1453 gimple_set_plf (stmt, GF_PLF_1, false);
1455 for (j = 0; j < group_size; ++j)
1457 gimple *stmt = stmts[j];
1458 if (matches[j] == !swap_not_matching)
1460 /* Avoid swapping operands twice. */
1461 if (gimple_plf (stmt, GF_PLF_1))
1462 continue;
1463 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1464 gimple_assign_rhs2_ptr (stmt));
1465 gimple_set_plf (stmt, GF_PLF_1, true);
1468 /* Verify we swap all duplicates or none. */
1469 if (flag_checking)
1470 for (j = 0; j < group_size; ++j)
1472 gimple *stmt = stmts[j];
1473 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1474 == (matches[j] == !swap_not_matching));
1477 /* If we have all children of child built up from scalars then
1478 just throw that away and build it up this node from scalars. */
1479 if (!SLP_TREE_CHILDREN (child).is_empty ()
1480 /* ??? Rejecting patterns this way doesn't work. We'd have
1481 to do extra work to cancel the pattern so the uses see the
1482 scalar version. */
1483 && !is_pattern_stmt_p
1484 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1486 unsigned int j;
1487 slp_tree grandchild;
1489 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1490 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1491 break;
1492 if (!grandchild)
1494 /* Roll back. */
1495 this_loads.truncate (old_nloads);
1496 this_tree_size = old_tree_size;
1497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1498 vect_free_slp_tree (grandchild);
1499 SLP_TREE_CHILDREN (child).truncate (0);
1501 dump_printf_loc (MSG_NOTE, vect_location,
1502 "Building parent vector operands from "
1503 "scalars instead\n");
1504 oprnd_info->def_stmts = vNULL;
1505 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1506 children.safe_push (child);
1507 continue;
1511 oprnd_info->def_stmts = vNULL;
1512 children.safe_push (child);
1513 continue;
1516 ++*npermutes;
1519 fail:
1520 gcc_assert (child == NULL);
1521 FOR_EACH_VEC_ELT (children, j, child)
1522 vect_free_slp_tree (child);
1523 vect_free_oprnd_info (oprnds_info);
1524 return NULL;
1527 vect_free_oprnd_info (oprnds_info);
1529 if (tree_size)
1530 *tree_size += this_tree_size;
1531 *max_nunits = this_max_nunits;
1532 loads->safe_splice (this_loads);
1534 node = vect_create_new_slp_node (stmts);
1535 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1536 SLP_TREE_CHILDREN (node).splice (children);
1537 return node;
1540 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1542 static void
1543 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1544 slp_tree node)
1546 int i;
1547 gimple *stmt;
1548 slp_tree child;
1550 dump_printf_loc (dump_kind, loc, "node%s\n",
1551 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1552 ? " (external)" : "");
1553 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1555 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1556 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1558 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1559 vect_print_slp_tree (dump_kind, loc, child);
1563 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1564 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1565 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1566 stmts in NODE are to be marked. */
1568 static void
1569 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1571 int i;
1572 gimple *stmt;
1573 slp_tree child;
1575 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1576 return;
1578 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1579 if (j < 0 || i == j)
1580 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1583 vect_mark_slp_stmts (child, mark, j);
1587 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1589 static void
1590 vect_mark_slp_stmts_relevant (slp_tree node)
1592 int i;
1593 gimple *stmt;
1594 stmt_vec_info stmt_info;
1595 slp_tree child;
1597 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1598 return;
1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1602 stmt_info = vinfo_for_stmt (stmt);
1603 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1604 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1605 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1608 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1609 vect_mark_slp_stmts_relevant (child);
1613 /* Rearrange the statements of NODE according to PERMUTATION. */
1615 static void
1616 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1617 vec<unsigned> permutation)
1619 gimple *stmt;
1620 vec<gimple *> tmp_stmts;
1621 unsigned int i;
1622 slp_tree child;
1624 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1625 vect_slp_rearrange_stmts (child, group_size, permutation);
1627 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1628 tmp_stmts.create (group_size);
1629 tmp_stmts.quick_grow_cleared (group_size);
1631 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1632 tmp_stmts[permutation[i]] = stmt;
1634 SLP_TREE_SCALAR_STMTS (node).release ();
1635 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1639 /* Attempt to reorder stmts in a reduction chain so that we don't
1640 require any load permutation. Return true if that was possible,
1641 otherwise return false. */
1643 static bool
1644 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1646 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1647 unsigned int i, j;
1648 unsigned int lidx;
1649 slp_tree node, load;
1651 /* Compare all the permutation sequences to the first one. We know
1652 that at least one load is permuted. */
1653 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1654 if (!node->load_permutation.exists ())
1655 return false;
1656 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1658 if (!load->load_permutation.exists ())
1659 return false;
1660 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1661 if (lidx != node->load_permutation[j])
1662 return false;
1665 /* Check that the loads in the first sequence are different and there
1666 are no gaps between them. */
1667 auto_sbitmap load_index (group_size);
1668 bitmap_clear (load_index);
1669 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1671 if (lidx >= group_size)
1672 return false;
1673 if (bitmap_bit_p (load_index, lidx))
1674 return false;
1676 bitmap_set_bit (load_index, lidx);
1678 for (i = 0; i < group_size; i++)
1679 if (!bitmap_bit_p (load_index, i))
1680 return false;
1682 /* This permutation is valid for reduction. Since the order of the
1683 statements in the nodes is not important unless they are memory
1684 accesses, we can rearrange the statements in all the nodes
1685 according to the order of the loads. */
1686 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1687 node->load_permutation);
1689 /* We are done, no actual permutations need to be generated. */
1690 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1691 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1693 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1694 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1695 /* But we have to keep those permutations that are required because
1696 of handling of gaps. */
1697 if (known_eq (unrolling_factor, 1U)
1698 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1699 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1700 SLP_TREE_LOAD_PERMUTATION (node).release ();
1701 else
1702 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1703 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1706 return true;
1709 /* Check if the required load permutations in the SLP instance
1710 SLP_INSTN are supported. */
1712 static bool
1713 vect_supported_load_permutation_p (slp_instance slp_instn)
1715 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1716 unsigned int i, j, k, next;
1717 slp_tree node;
1718 gimple *stmt, *load, *next_load;
1720 if (dump_enabled_p ())
1722 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1724 if (node->load_permutation.exists ())
1725 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1726 dump_printf (MSG_NOTE, "%d ", next);
1727 else
1728 for (k = 0; k < group_size; ++k)
1729 dump_printf (MSG_NOTE, "%d ", k);
1730 dump_printf (MSG_NOTE, "\n");
1733 /* In case of reduction every load permutation is allowed, since the order
1734 of the reduction statements is not important (as opposed to the case of
1735 grouped stores). The only condition we need to check is that all the
1736 load nodes are of the same size and have the same permutation (and then
1737 rearrange all the nodes of the SLP instance according to this
1738 permutation). */
1740 /* Check that all the load nodes are of the same size. */
1741 /* ??? Can't we assert this? */
1742 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1743 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1744 return false;
1746 node = SLP_INSTANCE_TREE (slp_instn);
1747 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1749 /* Reduction (there are no data-refs in the root).
1750 In reduction chain the order of the loads is not important. */
1751 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1752 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1753 vect_attempt_slp_rearrange_stmts (slp_instn);
1755 /* In basic block vectorization we allow any subchain of an interleaving
1756 chain.
1757 FORNOW: not supported in loop SLP because of realignment compications. */
1758 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1760 /* Check whether the loads in an instance form a subchain and thus
1761 no permutation is necessary. */
1762 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1764 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1765 continue;
1766 bool subchain_p = true;
1767 next_load = NULL;
1768 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1770 if (j != 0
1771 && (next_load != load
1772 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
1774 subchain_p = false;
1775 break;
1777 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1779 if (subchain_p)
1780 SLP_TREE_LOAD_PERMUTATION (node).release ();
1781 else
1783 stmt_vec_info group_info
1784 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1785 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
1786 unsigned HOST_WIDE_INT nunits;
1787 unsigned k, maxk = 0;
1788 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1789 if (k > maxk)
1790 maxk = k;
1791 /* In BB vectorization we may not actually use a loaded vector
1792 accessing elements in excess of DR_GROUP_SIZE. */
1793 tree vectype = STMT_VINFO_VECTYPE (group_info);
1794 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1795 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "BB vectorization with gaps at the end of "
1799 "a load is not supported\n");
1800 return false;
1803 /* Verify the permutation can be generated. */
1804 vec<tree> tem;
1805 unsigned n_perms;
1806 if (!vect_transform_slp_perm_load (node, tem, NULL,
1807 1, slp_instn, true, &n_perms))
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1810 vect_location,
1811 "unsupported load permutation\n");
1812 return false;
1816 return true;
1819 /* For loop vectorization verify we can generate the permutation. Be
1820 conservative about the vectorization factor, there are permutations
1821 that will use three vector inputs only starting from a specific factor
1822 and the vectorization factor is not yet final.
1823 ??? The SLP instance unrolling factor might not be the maximum one. */
1824 unsigned n_perms;
1825 poly_uint64 test_vf
1826 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1827 LOOP_VINFO_VECT_FACTOR
1828 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1830 if (node->load_permutation.exists ()
1831 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1832 slp_instn, true, &n_perms))
1833 return false;
1835 return true;
1839 /* Find the last store in SLP INSTANCE. */
1841 gimple *
1842 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1844 gimple *last = NULL, *stmt;
1846 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1848 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1849 if (is_pattern_stmt_p (stmt_vinfo))
1850 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1851 else
1852 last = get_later_stmt (stmt, last);
1855 return last;
1858 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1859 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1860 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1861 containing the remainder.
1862 Return the first stmt in the second group. */
1864 static gimple *
1865 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1867 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1868 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1869 gcc_assert (group1_size > 0);
1870 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1871 gcc_assert (group2_size > 0);
1872 DR_GROUP_SIZE (first_vinfo) = group1_size;
1874 gimple *stmt = first_stmt;
1875 for (unsigned i = group1_size; i > 1; i--)
1877 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1878 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1880 /* STMT is now the last element of the first group. */
1881 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1882 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1884 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1885 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1887 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1888 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1891 /* For the second group, the DR_GROUP_GAP is that before the original group,
1892 plus skipping over the first vector. */
1893 DR_GROUP_GAP (vinfo_for_stmt (group2))
1894 = DR_GROUP_GAP (first_vinfo) + group1_size;
1896 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1897 DR_GROUP_GAP (first_vinfo) += group2_size;
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1901 group1_size, group2_size);
1903 return group2;
1906 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1907 statements and a vector of NUNITS elements. */
1909 static poly_uint64
1910 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1912 return exact_div (common_multiple (nunits, group_size), group_size);
1915 /* Analyze an SLP instance starting from a group of grouped stores. Call
1916 vect_build_slp_tree to build a tree of packed stmts if possible.
1917 Return FALSE if it's impossible to SLP any stmt in the loop. */
1919 static bool
1920 vect_analyze_slp_instance (vec_info *vinfo,
1921 gimple *stmt, unsigned max_tree_size)
1923 slp_instance new_instance;
1924 slp_tree node;
1925 unsigned int group_size;
1926 tree vectype, scalar_type = NULL_TREE;
1927 gimple *next;
1928 unsigned int i;
1929 vec<slp_tree> loads;
1930 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1931 vec<gimple *> scalar_stmts;
1933 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1935 scalar_type = TREE_TYPE (DR_REF (dr));
1936 vectype = get_vectype_for_scalar_type (scalar_type);
1937 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1939 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1941 gcc_assert (is_a <loop_vec_info> (vinfo));
1942 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1943 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
1945 else
1947 gcc_assert (is_a <loop_vec_info> (vinfo));
1948 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1949 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1952 if (!vectype)
1954 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1957 "Build SLP failed: unsupported data-type ");
1958 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1959 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1962 return false;
1964 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1967 scalar_stmts.create (group_size);
1968 next = stmt;
1969 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1971 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1972 while (next)
1974 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1975 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1976 scalar_stmts.safe_push (
1977 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1978 else
1979 scalar_stmts.safe_push (next);
1980 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1983 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1985 /* Collect the reduction stmts and store them in
1986 SLP_TREE_SCALAR_STMTS. */
1987 while (next)
1989 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1990 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1991 scalar_stmts.safe_push (
1992 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1993 else
1994 scalar_stmts.safe_push (next);
1995 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1997 /* Mark the first element of the reduction chain as reduction to properly
1998 transform the node. In the reduction analysis phase only the last
1999 element of the chain is marked as reduction. */
2000 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2002 else
2004 /* Collect reduction statements. */
2005 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2006 for (i = 0; reductions.iterate (i, &next); i++)
2007 scalar_stmts.safe_push (next);
2010 loads.create (group_size);
2012 /* Build the tree for the SLP instance. */
2013 bool *matches = XALLOCAVEC (bool, group_size);
2014 unsigned npermutes = 0;
2015 bst_fail = new scalar_stmts_set_t ();
2016 poly_uint64 max_nunits = nunits;
2017 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2018 &max_nunits, &loads, matches, &npermutes,
2019 NULL, max_tree_size);
2020 delete bst_fail;
2021 if (node != NULL)
2023 /* Calculate the unrolling factor based on the smallest type. */
2024 poly_uint64 unrolling_factor
2025 = calculate_unrolling_factor (max_nunits, group_size);
2027 if (maybe_ne (unrolling_factor, 1U)
2028 && is_a <bb_vec_info> (vinfo))
2030 unsigned HOST_WIDE_INT const_max_nunits;
2031 if (!max_nunits.is_constant (&const_max_nunits)
2032 || const_max_nunits > group_size)
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2036 "Build SLP failed: store group "
2037 "size not a multiple of the vector size "
2038 "in basic block SLP\n");
2039 vect_free_slp_tree (node);
2040 loads.release ();
2041 return false;
2043 /* Fatal mismatch. */
2044 matches[group_size / const_max_nunits * const_max_nunits] = false;
2045 vect_free_slp_tree (node);
2046 loads.release ();
2048 else
2050 /* Create a new SLP instance. */
2051 new_instance = XNEW (struct _slp_instance);
2052 SLP_INSTANCE_TREE (new_instance) = node;
2053 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2054 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2055 SLP_INSTANCE_LOADS (new_instance) = loads;
2057 /* Compute the load permutation. */
2058 slp_tree load_node;
2059 bool loads_permuted = false;
2060 FOR_EACH_VEC_ELT (loads, i, load_node)
2062 vec<unsigned> load_permutation;
2063 int j;
2064 gimple *load, *first_stmt;
2065 bool this_load_permuted = false;
2066 load_permutation.create (group_size);
2067 first_stmt = DR_GROUP_FIRST_ELEMENT
2068 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2069 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2071 int load_place = vect_get_place_in_interleaving_chain
2072 (load, first_stmt);
2073 gcc_assert (load_place != -1);
2074 if (load_place != j)
2075 this_load_permuted = true;
2076 load_permutation.safe_push (load_place);
2078 if (!this_load_permuted
2079 /* The load requires permutation when unrolling exposes
2080 a gap either because the group is larger than the SLP
2081 group-size or because there is a gap between the groups. */
2082 && (known_eq (unrolling_factor, 1U)
2083 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2084 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2086 load_permutation.release ();
2087 continue;
2089 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2090 loads_permuted = true;
2093 if (loads_permuted)
2095 if (!vect_supported_load_permutation_p (new_instance))
2097 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2100 "Build SLP failed: unsupported load "
2101 "permutation ");
2102 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2103 TDF_SLIM, stmt, 0);
2105 vect_free_slp_instance (new_instance);
2106 return false;
2110 /* If the loads and stores can be handled with load/store-lan
2111 instructions do not generate this SLP instance. */
2112 if (is_a <loop_vec_info> (vinfo)
2113 && loads_permuted
2114 && dr && vect_store_lanes_supported (vectype, group_size, false))
2116 slp_tree load_node;
2117 FOR_EACH_VEC_ELT (loads, i, load_node)
2119 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
2120 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2122 /* Use SLP for strided accesses (or if we
2123 can't load-lanes). */
2124 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2125 || ! vect_load_lanes_supported
2126 (STMT_VINFO_VECTYPE (stmt_vinfo),
2127 DR_GROUP_SIZE (stmt_vinfo), false))
2128 break;
2130 if (i == loads.length ())
2132 if (dump_enabled_p ())
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2134 "Built SLP cancelled: can use "
2135 "load/store-lanes\n");
2136 vect_free_slp_instance (new_instance);
2137 return false;
2141 vinfo->slp_instances.safe_push (new_instance);
2143 if (dump_enabled_p ())
2145 dump_printf_loc (MSG_NOTE, vect_location,
2146 "Final SLP tree for instance:\n");
2147 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2150 return true;
2153 else
2155 /* Failed to SLP. */
2156 /* Free the allocated memory. */
2157 scalar_stmts.release ();
2158 loads.release ();
2161 /* For basic block SLP, try to break the group up into multiples of the
2162 vector size. */
2163 unsigned HOST_WIDE_INT const_nunits;
2164 if (is_a <bb_vec_info> (vinfo)
2165 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2166 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2167 && nunits.is_constant (&const_nunits))
2169 /* We consider breaking the group only on VF boundaries from the existing
2170 start. */
2171 for (i = 0; i < group_size; i++)
2172 if (!matches[i]) break;
2174 if (i >= const_nunits && i < group_size)
2176 /* Split into two groups at the first vector boundary before i. */
2177 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2178 unsigned group1_size = i & ~(const_nunits - 1);
2180 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2181 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2182 /* If the first non-match was in the middle of a vector,
2183 skip the rest of that vector. */
2184 if (group1_size < i)
2186 i = group1_size + const_nunits;
2187 if (i < group_size)
2188 rest = vect_split_slp_store_group (rest, const_nunits);
2190 if (i < group_size)
2191 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2192 return res;
2194 /* Even though the first vector did not all match, we might be able to SLP
2195 (some) of the remainder. FORNOW ignore this possibility. */
2198 return false;
2202 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2203 trees of packed scalar stmts if SLP is possible. */
2205 bool
2206 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2208 unsigned int i;
2209 gimple *first_element;
2211 DUMP_VECT_SCOPE ("vect_analyze_slp");
2213 /* Find SLP sequences starting from groups of grouped stores. */
2214 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2215 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2217 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2219 if (loop_vinfo->reduction_chains.length () > 0)
2221 /* Find SLP sequences starting from reduction chains. */
2222 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2223 if (! vect_analyze_slp_instance (vinfo, first_element,
2224 max_tree_size))
2226 /* Dissolve reduction chain group. */
2227 gimple *next, *stmt = first_element;
2228 while (stmt)
2230 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2231 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2232 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2233 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2234 stmt = next;
2236 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2237 = vect_internal_def;
2241 /* Find SLP sequences starting from groups of reductions. */
2242 if (loop_vinfo->reductions.length () > 1)
2243 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2244 max_tree_size);
2247 return true;
2251 /* For each possible SLP instance decide whether to SLP it and calculate overall
2252 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2253 least one instance. */
2255 bool
2256 vect_make_slp_decision (loop_vec_info loop_vinfo)
2258 unsigned int i;
2259 poly_uint64 unrolling_factor = 1;
2260 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2261 slp_instance instance;
2262 int decided_to_slp = 0;
2264 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2266 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2268 /* FORNOW: SLP if you can. */
2269 /* All unroll factors have the form current_vector_size * X for some
2270 rational X, so they must have a common multiple. */
2271 unrolling_factor
2272 = force_common_multiple (unrolling_factor,
2273 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2275 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2276 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2277 loop-based vectorization. Such stmts will be marked as HYBRID. */
2278 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2279 decided_to_slp++;
2282 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2284 if (decided_to_slp && dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location,
2287 "Decided to SLP %d instances. Unrolling factor ",
2288 decided_to_slp);
2289 dump_dec (MSG_NOTE, unrolling_factor);
2290 dump_printf (MSG_NOTE, "\n");
2293 return (decided_to_slp > 0);
2297 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2298 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2300 static void
2301 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2303 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2304 imm_use_iterator imm_iter;
2305 gimple *use_stmt;
2306 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2307 slp_tree child;
2308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2310 int j;
2312 /* Propagate hybrid down the SLP tree. */
2313 if (stype == hybrid)
2315 else if (HYBRID_SLP_STMT (stmt_vinfo))
2316 stype = hybrid;
2317 else
2319 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2320 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2321 /* If we get a pattern stmt here we have to use the LHS of the
2322 original stmt for immediate uses. */
2323 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2324 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2325 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2326 tree def;
2327 if (gimple_code (stmt) == GIMPLE_PHI)
2328 def = gimple_phi_result (stmt);
2329 else
2330 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2331 if (def)
2332 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2334 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2335 continue;
2336 use_vinfo = vinfo_for_stmt (use_stmt);
2337 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2338 && STMT_VINFO_RELATED_STMT (use_vinfo))
2339 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2340 if (!STMT_SLP_TYPE (use_vinfo)
2341 && (STMT_VINFO_RELEVANT (use_vinfo)
2342 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2343 && !(gimple_code (use_stmt) == GIMPLE_PHI
2344 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2346 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2349 "def in non-SLP stmt: ");
2350 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2352 stype = hybrid;
2357 if (stype == hybrid
2358 && !HYBRID_SLP_STMT (stmt_vinfo))
2360 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2365 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2368 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2369 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2370 vect_detect_hybrid_slp_stmts (child, i, stype);
2373 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2375 static tree
2376 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2378 walk_stmt_info *wi = (walk_stmt_info *)data;
2379 struct loop *loopp = (struct loop *)wi->info;
2381 if (wi->is_lhs)
2382 return NULL_TREE;
2384 if (TREE_CODE (*tp) == SSA_NAME
2385 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2387 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2388 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2389 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2391 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2394 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2396 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2400 return NULL_TREE;
2403 static tree
2404 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2405 walk_stmt_info *)
2407 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2408 /* If the stmt is in a SLP instance then this isn't a reason
2409 to mark use definitions in other SLP instances as hybrid. */
2410 if (! STMT_SLP_TYPE (use_vinfo)
2411 && (STMT_VINFO_RELEVANT (use_vinfo)
2412 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2413 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2414 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2416 else
2417 *handled = true;
2418 return NULL_TREE;
2421 /* Find stmts that must be both vectorized and SLPed. */
2423 void
2424 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2426 unsigned int i;
2427 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2428 slp_instance instance;
2430 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2432 /* First walk all pattern stmt in the loop and mark defs of uses as
2433 hybrid because immediate uses in them are not recorded. */
2434 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2436 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2437 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2438 gsi_next (&gsi))
2440 gimple *stmt = gsi_stmt (gsi);
2441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2442 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2444 walk_stmt_info wi;
2445 memset (&wi, 0, sizeof (wi));
2446 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2447 gimple_stmt_iterator gsi2
2448 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2449 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2450 vect_detect_hybrid_slp_1, &wi);
2451 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2452 vect_detect_hybrid_slp_2,
2453 vect_detect_hybrid_slp_1, &wi);
2458 /* Then walk the SLP instance trees marking stmts with uses in
2459 non-SLP stmts as hybrid, also propagating hybrid down the
2460 SLP tree, collecting the above info on-the-fly. */
2461 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2463 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2464 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2465 i, pure_slp);
2470 /* Initialize a bb_vec_info struct for the statements between
2471 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2473 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2474 gimple_stmt_iterator region_end_in,
2475 vec_info_shared *shared)
2476 : vec_info (vec_info::bb, init_cost (NULL), shared),
2477 bb (gsi_bb (region_begin_in)),
2478 region_begin (region_begin_in),
2479 region_end (region_end_in)
2481 gimple_stmt_iterator gsi;
2483 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2484 gsi_next (&gsi))
2486 gimple *stmt = gsi_stmt (gsi);
2487 gimple_set_uid (stmt, 0);
2488 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2491 bb->aux = this;
2495 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2496 stmts in the basic block. */
2498 _bb_vec_info::~_bb_vec_info ()
2500 for (gimple_stmt_iterator si = region_begin;
2501 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2503 gimple *stmt = gsi_stmt (si);
2504 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2506 if (stmt_info)
2507 /* Free stmt_vec_info. */
2508 free_stmt_vec_info (stmt);
2510 /* Reset region marker. */
2511 gimple_set_uid (stmt, -1);
2514 bb->aux = NULL;
2517 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2518 given then that child nodes have already been processed, and that
2519 their def types currently match their SLP node's def type. */
2521 static bool
2522 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2523 slp_instance node_instance,
2524 stmt_vector_for_cost *cost_vec)
2526 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2527 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2528 gcc_assert (stmt_info);
2529 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2531 /* For BB vectorization vector types are assigned here.
2532 Memory accesses already got their vector type assigned
2533 in vect_analyze_data_refs. */
2534 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2535 if (bb_vinfo
2536 && ! STMT_VINFO_DATA_REF (stmt_info))
2538 tree vectype, nunits_vectype;
2539 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2540 &nunits_vectype))
2541 /* We checked this when building the node. */
2542 gcc_unreachable ();
2543 if (vectype == boolean_type_node)
2545 vectype = vect_get_mask_type_for_stmt (stmt_info);
2546 if (!vectype)
2547 /* vect_get_mask_type_for_stmt has already explained the
2548 failure. */
2549 return false;
2552 gimple *sstmt;
2553 unsigned int i;
2554 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2555 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2558 /* Calculate the number of vector statements to be created for the
2559 scalar stmts in this node. For SLP reductions it is equal to the
2560 number of vector statements in the children (which has already been
2561 calculated by the recursive call). Otherwise it is the number of
2562 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2563 VF divided by the number of elements in a vector. */
2564 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2565 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2566 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2567 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2568 else
2570 poly_uint64 vf;
2571 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2572 vf = loop_vinfo->vectorization_factor;
2573 else
2574 vf = 1;
2575 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2576 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2577 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2578 = vect_get_num_vectors (vf * group_size, vectype);
2581 bool dummy;
2582 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2585 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2586 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2588 Return true if the operations are supported. */
2590 static bool
2591 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2592 slp_instance node_instance,
2593 scalar_stmts_to_slp_tree_map_t *visited,
2594 scalar_stmts_to_slp_tree_map_t *lvisited,
2595 stmt_vector_for_cost *cost_vec)
2597 int i, j;
2598 slp_tree child;
2600 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2601 return true;
2603 /* If we already analyzed the exact same set of scalar stmts we're done.
2604 We share the generated vector stmts for those. */
2605 slp_tree *leader;
2606 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2607 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2609 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2610 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2611 return true;
2614 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2615 doesn't result in any issue since we throw away the lvisited set
2616 when we fail. */
2617 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2619 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2620 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2621 visited, lvisited, cost_vec))
2622 return false;
2624 /* Push SLP node def-type to stmt operands. */
2625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2626 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2627 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2628 = SLP_TREE_DEF_TYPE (child);
2629 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2630 cost_vec);
2631 /* Restore def-types. */
2632 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2633 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2634 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2635 = vect_internal_def;
2636 if (! res)
2637 return false;
2639 return true;
2643 /* Analyze statements in SLP instances of VINFO. Return true if the
2644 operations are supported. */
2646 bool
2647 vect_slp_analyze_operations (vec_info *vinfo)
2649 slp_instance instance;
2650 int i;
2652 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2654 scalar_stmts_to_slp_tree_map_t *visited
2655 = new scalar_stmts_to_slp_tree_map_t ();
2656 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2658 scalar_stmts_to_slp_tree_map_t lvisited;
2659 stmt_vector_for_cost cost_vec;
2660 cost_vec.create (2);
2661 if (!vect_slp_analyze_node_operations (vinfo,
2662 SLP_INSTANCE_TREE (instance),
2663 instance, visited, &lvisited,
2664 &cost_vec))
2666 dump_printf_loc (MSG_NOTE, vect_location,
2667 "removing SLP instance operations starting from: ");
2668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2669 SLP_TREE_SCALAR_STMTS
2670 (SLP_INSTANCE_TREE (instance))[0], 0);
2671 vect_free_slp_instance (instance);
2672 vinfo->slp_instances.ordered_remove (i);
2673 cost_vec.release ();
2675 else
2677 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2678 x != lvisited.end(); ++x)
2679 visited->put ((*x).first.copy (), (*x).second);
2680 i++;
2682 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2683 cost_vec.release ();
2686 delete visited;
2688 return !vinfo->slp_instances.is_empty ();
2692 /* Compute the scalar cost of the SLP node NODE and its children
2693 and return it. Do not account defs that are marked in LIFE and
2694 update LIFE according to uses of NODE. */
2696 static void
2697 vect_bb_slp_scalar_cost (basic_block bb,
2698 slp_tree node, vec<bool, va_heap> *life,
2699 stmt_vector_for_cost *cost_vec)
2701 unsigned i;
2702 gimple *stmt;
2703 slp_tree child;
2705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2707 ssa_op_iter op_iter;
2708 def_operand_p def_p;
2709 stmt_vec_info stmt_info;
2711 if ((*life)[i])
2712 continue;
2714 /* If there is a non-vectorized use of the defs then the scalar
2715 stmt is kept live in which case we do not account it or any
2716 required defs in the SLP children in the scalar cost. This
2717 way we make the vectorization more costly when compared to
2718 the scalar cost. */
2719 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2721 imm_use_iterator use_iter;
2722 gimple *use_stmt;
2723 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2724 if (!is_gimple_debug (use_stmt)
2725 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2726 use_stmt)
2727 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2729 (*life)[i] = true;
2730 BREAK_FROM_IMM_USE_STMT (use_iter);
2733 if ((*life)[i])
2734 continue;
2736 /* Count scalar stmts only once. */
2737 if (gimple_visited_p (stmt))
2738 continue;
2739 gimple_set_visited (stmt, true);
2741 stmt_info = vinfo_for_stmt (stmt);
2742 vect_cost_for_stmt kind;
2743 if (STMT_VINFO_DATA_REF (stmt_info))
2745 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2746 kind = scalar_load;
2747 else
2748 kind = scalar_store;
2750 else
2751 kind = scalar_stmt;
2752 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2755 auto_vec<bool, 20> subtree_life;
2756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2758 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2760 /* Do not directly pass LIFE to the recursive call, copy it to
2761 confine changes in the callee to the current child/subtree. */
2762 subtree_life.safe_splice (*life);
2763 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2764 subtree_life.truncate (0);
2769 /* Check if vectorization of the basic block is profitable. */
2771 static bool
2772 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2774 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2775 slp_instance instance;
2776 int i;
2777 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2778 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2780 /* Calculate scalar cost. */
2781 stmt_vector_for_cost scalar_costs;
2782 scalar_costs.create (0);
2783 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2785 auto_vec<bool, 20> life;
2786 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2787 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2788 SLP_INSTANCE_TREE (instance),
2789 &life, &scalar_costs);
2791 void *target_cost_data = init_cost (NULL);
2792 add_stmt_costs (target_cost_data, &scalar_costs);
2793 scalar_costs.release ();
2794 unsigned dummy;
2795 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2796 destroy_cost_data (target_cost_data);
2798 /* Unset visited flag. */
2799 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2800 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2801 gimple_set_visited (gsi_stmt (gsi), false);
2803 /* Complete the target-specific cost calculation. */
2804 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2805 &vec_inside_cost, &vec_epilogue_cost);
2807 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2809 if (dump_enabled_p ())
2811 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2812 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2813 vec_inside_cost);
2814 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2815 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2816 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2819 /* Vectorization is profitable if its cost is more than the cost of scalar
2820 version. Note that we err on the vector side for equal cost because
2821 the cost estimate is otherwise quite pessimistic (constant uses are
2822 free on the scalar side but cost a load on the vector side for
2823 example). */
2824 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2825 return false;
2827 return true;
2830 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2831 if so and sets fatal to true if failure is independent of
2832 current_vector_size. */
2834 static bb_vec_info
2835 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2836 gimple_stmt_iterator region_end,
2837 vec<data_reference_p> datarefs, int n_stmts,
2838 bool &fatal, vec_info_shared *shared)
2840 bb_vec_info bb_vinfo;
2841 slp_instance instance;
2842 int i;
2843 poly_uint64 min_vf = 2;
2845 /* The first group of checks is independent of the vector size. */
2846 fatal = true;
2848 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2852 "not vectorized: too many instructions in "
2853 "basic block.\n");
2854 free_data_refs (datarefs);
2855 return NULL;
2858 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2859 if (!bb_vinfo)
2860 return NULL;
2862 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2863 bb_vinfo->shared->save_datarefs ();
2865 /* Analyze the data references. */
2867 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2871 "not vectorized: unhandled data-ref in basic "
2872 "block.\n");
2874 delete bb_vinfo;
2875 return NULL;
2878 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2882 "not vectorized: not enough data-refs in "
2883 "basic block.\n");
2885 delete bb_vinfo;
2886 return NULL;
2889 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "not vectorized: unhandled data access in "
2894 "basic block.\n");
2896 delete bb_vinfo;
2897 return NULL;
2900 /* If there are no grouped stores in the region there is no need
2901 to continue with pattern recog as vect_analyze_slp will fail
2902 anyway. */
2903 if (bb_vinfo->grouped_stores.is_empty ())
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2907 "not vectorized: no grouped stores in "
2908 "basic block.\n");
2910 delete bb_vinfo;
2911 return NULL;
2914 /* While the rest of the analysis below depends on it in some way. */
2915 fatal = false;
2917 vect_pattern_recog (bb_vinfo);
2919 /* Check the SLP opportunities in the basic block, analyze and build SLP
2920 trees. */
2921 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2923 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926 "Failed to SLP the basic block.\n");
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2928 "not vectorized: failed to find SLP opportunities "
2929 "in basic block.\n");
2932 delete bb_vinfo;
2933 return NULL;
2936 vect_record_base_alignments (bb_vinfo);
2938 /* Analyze and verify the alignment of data references and the
2939 dependence in the SLP instances. */
2940 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2942 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2943 || ! vect_slp_analyze_instance_dependence (instance))
2945 dump_printf_loc (MSG_NOTE, vect_location,
2946 "removing SLP instance operations starting from: ");
2947 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2948 SLP_TREE_SCALAR_STMTS
2949 (SLP_INSTANCE_TREE (instance))[0], 0);
2950 vect_free_slp_instance (instance);
2951 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2952 continue;
2955 /* Mark all the statements that we want to vectorize as pure SLP and
2956 relevant. */
2957 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2958 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2960 i++;
2962 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2964 delete bb_vinfo;
2965 return NULL;
2968 if (!vect_slp_analyze_operations (bb_vinfo))
2970 if (dump_enabled_p ())
2971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2972 "not vectorized: bad operation in basic block.\n");
2974 delete bb_vinfo;
2975 return NULL;
2978 /* Cost model: check if the vectorization is worthwhile. */
2979 if (!unlimited_cost_model (NULL)
2980 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2982 if (dump_enabled_p ())
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2984 "not vectorized: vectorization is not "
2985 "profitable.\n");
2987 delete bb_vinfo;
2988 return NULL;
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_NOTE, vect_location,
2993 "Basic block will be vectorized using SLP\n");
2995 return bb_vinfo;
2999 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3000 true if anything in the basic-block was vectorized. */
3002 bool
3003 vect_slp_bb (basic_block bb)
3005 bb_vec_info bb_vinfo;
3006 gimple_stmt_iterator gsi;
3007 bool any_vectorized = false;
3008 auto_vector_sizes vector_sizes;
3010 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3012 /* Autodetect first vector size we try. */
3013 current_vector_size = 0;
3014 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3015 unsigned int next_size = 0;
3017 gsi = gsi_start_bb (bb);
3019 poly_uint64 autodetected_vector_size = 0;
3020 while (1)
3022 if (gsi_end_p (gsi))
3023 break;
3025 gimple_stmt_iterator region_begin = gsi;
3026 vec<data_reference_p> datarefs = vNULL;
3027 int insns = 0;
3029 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3031 gimple *stmt = gsi_stmt (gsi);
3032 if (is_gimple_debug (stmt))
3033 continue;
3034 insns++;
3036 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3037 vect_location = stmt;
3039 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3040 break;
3043 /* Skip leading unhandled stmts. */
3044 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3046 gsi_next (&gsi);
3047 continue;
3050 gimple_stmt_iterator region_end = gsi;
3052 bool vectorized = false;
3053 bool fatal = false;
3054 vec_info_shared shared;
3055 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3056 datarefs, insns, fatal, &shared);
3057 if (bb_vinfo
3058 && dbg_cnt (vect_slp))
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3063 bb_vinfo->shared->check_datarefs ();
3064 vect_schedule_slp (bb_vinfo);
3066 unsigned HOST_WIDE_INT bytes;
3067 if (current_vector_size.is_constant (&bytes))
3068 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3069 "basic block part vectorized using "
3070 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3071 "vectors\n", bytes);
3072 else
3073 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3074 "basic block part vectorized using variable "
3075 "length vectors\n");
3077 vectorized = true;
3079 delete bb_vinfo;
3081 any_vectorized |= vectorized;
3083 if (next_size == 0)
3084 autodetected_vector_size = current_vector_size;
3086 if (next_size < vector_sizes.length ()
3087 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3088 next_size += 1;
3090 if (vectorized
3091 || next_size == vector_sizes.length ()
3092 || known_eq (current_vector_size, 0U)
3093 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3094 vector sizes will fail do not bother iterating. */
3095 || fatal)
3097 if (gsi_end_p (region_end))
3098 break;
3100 /* Skip the unhandled stmt. */
3101 gsi_next (&gsi);
3103 /* And reset vector sizes. */
3104 current_vector_size = 0;
3105 next_size = 0;
3107 else
3109 /* Try the next biggest vector size. */
3110 current_vector_size = vector_sizes[next_size++];
3111 if (dump_enabled_p ())
3113 dump_printf_loc (MSG_NOTE, vect_location,
3114 "***** Re-trying analysis with "
3115 "vector size ");
3116 dump_dec (MSG_NOTE, current_vector_size);
3117 dump_printf (MSG_NOTE, "\n");
3120 /* Start over. */
3121 gsi = region_begin;
3125 return any_vectorized;
3129 /* Return 1 if vector type of boolean constant which is OPNUM
3130 operand in statement STMT is a boolean vector. */
3132 static bool
3133 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3136 enum tree_code code = gimple_expr_code (stmt);
3137 tree op, vectype;
3138 enum vect_def_type dt;
3140 /* For comparison and COND_EXPR type is chosen depending
3141 on the other comparison operand. */
3142 if (TREE_CODE_CLASS (code) == tcc_comparison)
3144 if (opnum)
3145 op = gimple_assign_rhs1 (stmt);
3146 else
3147 op = gimple_assign_rhs2 (stmt);
3149 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3150 gcc_unreachable ();
3152 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3155 if (code == COND_EXPR)
3157 tree cond = gimple_assign_rhs1 (stmt);
3159 if (TREE_CODE (cond) == SSA_NAME)
3160 op = cond;
3161 else if (opnum)
3162 op = TREE_OPERAND (cond, 1);
3163 else
3164 op = TREE_OPERAND (cond, 0);
3166 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3167 gcc_unreachable ();
3169 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3172 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3175 /* Build a variable-length vector in which the elements in ELTS are repeated
3176 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3177 RESULTS and add any new instructions to SEQ.
3179 The approach we use is:
3181 (1) Find a vector mode VM with integer elements of mode IM.
3183 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3184 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3185 from small vectors to IM.
3187 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3189 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3190 correct byte contents.
3192 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3194 We try to find the largest IM for which this sequence works, in order
3195 to cut down on the number of interleaves. */
3197 void
3198 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3199 unsigned int nresults, vec<tree> &results)
3201 unsigned int nelts = elts.length ();
3202 tree element_type = TREE_TYPE (vector_type);
3204 /* (1) Find a vector mode VM with integer elements of mode IM. */
3205 unsigned int nvectors = 1;
3206 tree new_vector_type;
3207 tree permutes[2];
3208 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3209 &nvectors, &new_vector_type,
3210 permutes))
3211 gcc_unreachable ();
3213 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3214 unsigned int partial_nelts = nelts / nvectors;
3215 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3217 tree_vector_builder partial_elts;
3218 auto_vec<tree, 32> pieces (nvectors * 2);
3219 pieces.quick_grow (nvectors * 2);
3220 for (unsigned int i = 0; i < nvectors; ++i)
3222 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3223 ELTS' has mode IM. */
3224 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3225 for (unsigned int j = 0; j < partial_nelts; ++j)
3226 partial_elts.quick_push (elts[i * partial_nelts + j]);
3227 tree t = gimple_build_vector (seq, &partial_elts);
3228 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3229 TREE_TYPE (new_vector_type), t);
3231 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3232 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3235 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3236 correct byte contents.
3238 We need to repeat the following operation log2(nvectors) times:
3240 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3241 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3243 However, if each input repeats every N elements and the VF is
3244 a multiple of N * 2, the HI result is the same as the LO. */
3245 unsigned int in_start = 0;
3246 unsigned int out_start = nvectors;
3247 unsigned int hi_start = nvectors / 2;
3248 /* A bound on the number of outputs needed to produce NRESULTS results
3249 in the final iteration. */
3250 unsigned int noutputs_bound = nvectors * nresults;
3251 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3253 noutputs_bound /= 2;
3254 unsigned int limit = MIN (noutputs_bound, nvectors);
3255 for (unsigned int i = 0; i < limit; ++i)
3257 if ((i & 1) != 0
3258 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3259 2 * in_repeat))
3261 pieces[out_start + i] = pieces[out_start + i - 1];
3262 continue;
3265 tree output = make_ssa_name (new_vector_type);
3266 tree input1 = pieces[in_start + (i / 2)];
3267 tree input2 = pieces[in_start + (i / 2) + hi_start];
3268 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3269 input1, input2,
3270 permutes[i & 1]);
3271 gimple_seq_add_stmt (seq, stmt);
3272 pieces[out_start + i] = output;
3274 std::swap (in_start, out_start);
3277 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3278 results.reserve (nresults);
3279 for (unsigned int i = 0; i < nresults; ++i)
3280 if (i < nvectors)
3281 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3282 pieces[in_start + i]));
3283 else
3284 results.quick_push (results[i - nvectors]);
3288 /* For constant and loop invariant defs of SLP_NODE this function returns
3289 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3290 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3291 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3292 REDUC_INDEX is the index of the reduction operand in the statements, unless
3293 it is -1. */
3295 static void
3296 vect_get_constant_vectors (tree op, slp_tree slp_node,
3297 vec<tree> *vec_oprnds,
3298 unsigned int op_num, unsigned int number_of_vectors)
3300 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3301 gimple *stmt = stmts[0];
3302 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3303 unsigned HOST_WIDE_INT nunits;
3304 tree vec_cst;
3305 unsigned j, number_of_places_left_in_vector;
3306 tree vector_type;
3307 tree vop;
3308 int group_size = stmts.length ();
3309 unsigned int vec_num, i;
3310 unsigned number_of_copies = 1;
3311 vec<tree> voprnds;
3312 voprnds.create (number_of_vectors);
3313 bool constant_p, is_store;
3314 tree neutral_op = NULL;
3315 enum tree_code code = gimple_expr_code (stmt);
3316 gimple_seq ctor_seq = NULL;
3317 auto_vec<tree, 16> permute_results;
3319 /* Check if vector type is a boolean vector. */
3320 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3321 && vect_mask_constant_operand_p (stmt, op_num))
3322 vector_type
3323 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3324 else
3325 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3327 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3329 is_store = true;
3330 op = gimple_assign_rhs1 (stmt);
3332 else
3333 is_store = false;
3335 gcc_assert (op);
3337 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3338 created vectors. It is greater than 1 if unrolling is performed.
3340 For example, we have two scalar operands, s1 and s2 (e.g., group of
3341 strided accesses of size two), while NUNITS is four (i.e., four scalars
3342 of this type can be packed in a vector). The output vector will contain
3343 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3344 will be 2).
3346 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3347 containing the operands.
3349 For example, NUNITS is four as before, and the group size is 8
3350 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3351 {s5, s6, s7, s8}. */
3353 /* When using duplicate_and_interleave, we just need one element for
3354 each scalar statement. */
3355 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3356 nunits = group_size;
3358 number_of_copies = nunits * number_of_vectors / group_size;
3360 number_of_places_left_in_vector = nunits;
3361 constant_p = true;
3362 tree_vector_builder elts (vector_type, nunits, 1);
3363 elts.quick_grow (nunits);
3364 bool place_after_defs = false;
3365 for (j = 0; j < number_of_copies; j++)
3367 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3369 if (is_store)
3370 op = gimple_assign_rhs1 (stmt);
3371 else
3373 switch (code)
3375 case COND_EXPR:
3377 tree cond = gimple_assign_rhs1 (stmt);
3378 if (TREE_CODE (cond) == SSA_NAME)
3379 op = gimple_op (stmt, op_num + 1);
3380 else if (op_num == 0 || op_num == 1)
3381 op = TREE_OPERAND (cond, op_num);
3382 else
3384 if (op_num == 2)
3385 op = gimple_assign_rhs2 (stmt);
3386 else
3387 op = gimple_assign_rhs3 (stmt);
3390 break;
3392 case CALL_EXPR:
3393 op = gimple_call_arg (stmt, op_num);
3394 break;
3396 case LSHIFT_EXPR:
3397 case RSHIFT_EXPR:
3398 case LROTATE_EXPR:
3399 case RROTATE_EXPR:
3400 op = gimple_op (stmt, op_num + 1);
3401 /* Unlike the other binary operators, shifts/rotates have
3402 the shift count being int, instead of the same type as
3403 the lhs, so make sure the scalar is the right type if
3404 we are dealing with vectors of
3405 long long/long/short/char. */
3406 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3407 op = fold_convert (TREE_TYPE (vector_type), op);
3408 break;
3410 default:
3411 op = gimple_op (stmt, op_num + 1);
3412 break;
3416 /* Create 'vect_ = {op0,op1,...,opn}'. */
3417 number_of_places_left_in_vector--;
3418 tree orig_op = op;
3419 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3421 if (CONSTANT_CLASS_P (op))
3423 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3425 /* Can't use VIEW_CONVERT_EXPR for booleans because
3426 of possibly different sizes of scalar value and
3427 vector element. */
3428 if (integer_zerop (op))
3429 op = build_int_cst (TREE_TYPE (vector_type), 0);
3430 else if (integer_onep (op))
3431 op = build_all_ones_cst (TREE_TYPE (vector_type));
3432 else
3433 gcc_unreachable ();
3435 else
3436 op = fold_unary (VIEW_CONVERT_EXPR,
3437 TREE_TYPE (vector_type), op);
3438 gcc_assert (op && CONSTANT_CLASS_P (op));
3440 else
3442 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3443 gimple *init_stmt;
3444 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3446 tree true_val
3447 = build_all_ones_cst (TREE_TYPE (vector_type));
3448 tree false_val
3449 = build_zero_cst (TREE_TYPE (vector_type));
3450 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3451 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3452 op, true_val,
3453 false_val);
3455 else
3457 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3458 op);
3459 init_stmt
3460 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3461 op);
3463 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3464 op = new_temp;
3467 elts[number_of_places_left_in_vector] = op;
3468 if (!CONSTANT_CLASS_P (op))
3469 constant_p = false;
3470 if (TREE_CODE (orig_op) == SSA_NAME
3471 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3472 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3473 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3474 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3475 place_after_defs = true;
3477 if (number_of_places_left_in_vector == 0)
3479 if (constant_p
3480 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3481 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3482 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3483 else
3485 if (vec_oprnds->is_empty ())
3486 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3487 number_of_vectors,
3488 permute_results);
3489 vec_cst = permute_results[number_of_vectors - j - 1];
3491 tree init;
3492 gimple_stmt_iterator gsi;
3493 if (place_after_defs)
3495 gsi = gsi_for_stmt
3496 (vect_find_last_scalar_stmt_in_slp (slp_node));
3497 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3499 else
3500 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3501 if (ctor_seq != NULL)
3503 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3504 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3505 ctor_seq = NULL;
3507 voprnds.quick_push (init);
3508 place_after_defs = false;
3509 number_of_places_left_in_vector = nunits;
3510 constant_p = true;
3511 elts.new_vector (vector_type, nunits, 1);
3512 elts.quick_grow (nunits);
3517 /* Since the vectors are created in the reverse order, we should invert
3518 them. */
3519 vec_num = voprnds.length ();
3520 for (j = vec_num; j != 0; j--)
3522 vop = voprnds[j - 1];
3523 vec_oprnds->quick_push (vop);
3526 voprnds.release ();
3528 /* In case that VF is greater than the unrolling factor needed for the SLP
3529 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3530 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3531 to replicate the vectors. */
3532 while (number_of_vectors > vec_oprnds->length ())
3534 tree neutral_vec = NULL;
3536 if (neutral_op)
3538 if (!neutral_vec)
3539 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3541 vec_oprnds->quick_push (neutral_vec);
3543 else
3545 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3546 vec_oprnds->quick_push (vop);
3552 /* Get vectorized definitions from SLP_NODE that contains corresponding
3553 vectorized def-stmts. */
3555 static void
3556 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3558 tree vec_oprnd;
3559 gimple *vec_def_stmt;
3560 unsigned int i;
3562 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3564 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3566 gcc_assert (vec_def_stmt);
3567 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3568 vec_oprnd = gimple_phi_result (vec_def_stmt);
3569 else
3570 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3571 vec_oprnds->quick_push (vec_oprnd);
3576 /* Get vectorized definitions for SLP_NODE.
3577 If the scalar definitions are loop invariants or constants, collect them and
3578 call vect_get_constant_vectors() to create vector stmts.
3579 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3580 must be stored in the corresponding child of SLP_NODE, and we call
3581 vect_get_slp_vect_defs () to retrieve them. */
3583 void
3584 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3585 vec<vec<tree> > *vec_oprnds)
3587 gimple *first_stmt;
3588 int number_of_vects = 0, i;
3589 unsigned int child_index = 0;
3590 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3591 slp_tree child = NULL;
3592 vec<tree> vec_defs;
3593 tree oprnd;
3594 bool vectorized_defs;
3596 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3597 FOR_EACH_VEC_ELT (ops, i, oprnd)
3599 /* For each operand we check if it has vectorized definitions in a child
3600 node or we need to create them (for invariants and constants). We
3601 check if the LHS of the first stmt of the next child matches OPRND.
3602 If it does, we found the correct child. Otherwise, we call
3603 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3604 to check this child node for the next operand. */
3605 vectorized_defs = false;
3606 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3608 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3610 /* We have to check both pattern and original def, if available. */
3611 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3613 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3614 gimple *related
3615 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3616 tree first_def_op;
3618 if (gimple_code (first_def) == GIMPLE_PHI)
3619 first_def_op = gimple_phi_result (first_def);
3620 else
3621 first_def_op = gimple_get_lhs (first_def);
3622 if (operand_equal_p (oprnd, first_def_op, 0)
3623 || (related
3624 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3626 /* The number of vector defs is determined by the number of
3627 vector statements in the node from which we get those
3628 statements. */
3629 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3630 vectorized_defs = true;
3631 child_index++;
3634 else
3635 child_index++;
3638 if (!vectorized_defs)
3640 if (i == 0)
3642 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3643 /* Number of vector stmts was calculated according to LHS in
3644 vect_schedule_slp_instance (), fix it by replacing LHS with
3645 RHS, if necessary. See vect_get_smallest_scalar_type () for
3646 details. */
3647 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3648 &rhs_size_unit);
3649 if (rhs_size_unit != lhs_size_unit)
3651 number_of_vects *= rhs_size_unit;
3652 number_of_vects /= lhs_size_unit;
3657 /* Allocate memory for vectorized defs. */
3658 vec_defs = vNULL;
3659 vec_defs.create (number_of_vects);
3661 /* For reduction defs we call vect_get_constant_vectors (), since we are
3662 looking for initial loop invariant values. */
3663 if (vectorized_defs)
3664 /* The defs are already vectorized. */
3665 vect_get_slp_vect_defs (child, &vec_defs);
3666 else
3667 /* Build vectors from scalar defs. */
3668 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3669 number_of_vects);
3671 vec_oprnds->quick_push (vec_defs);
3675 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3676 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3677 permute statements for the SLP node NODE of the SLP instance
3678 SLP_NODE_INSTANCE. */
3680 bool
3681 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3682 gimple_stmt_iterator *gsi, poly_uint64 vf,
3683 slp_instance slp_node_instance, bool analyze_only,
3684 unsigned *n_perms)
3686 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688 tree mask_element_type = NULL_TREE, mask_type;
3689 int vec_index = 0;
3690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3691 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3692 unsigned int mask_element;
3693 machine_mode mode;
3694 unsigned HOST_WIDE_INT nunits, const_vf;
3696 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3697 return false;
3699 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
3701 mode = TYPE_MODE (vectype);
3703 /* At the moment, all permutations are represented using per-element
3704 indices, so we can't cope with variable vector lengths or
3705 vectorization factors. */
3706 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3707 || !vf.is_constant (&const_vf))
3708 return false;
3710 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3711 same size as the vector element being permuted. */
3712 mask_element_type = lang_hooks.types.type_for_mode
3713 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3714 mask_type = get_vectype_for_scalar_type (mask_element_type);
3715 vec_perm_builder mask (nunits, nunits, 1);
3716 mask.quick_grow (nunits);
3717 vec_perm_indices indices;
3719 /* Initialize the vect stmts of NODE to properly insert the generated
3720 stmts later. */
3721 if (! analyze_only)
3722 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3723 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3724 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3726 /* Generate permutation masks for every NODE. Number of masks for each NODE
3727 is equal to GROUP_SIZE.
3728 E.g., we have a group of three nodes with three loads from the same
3729 location in each node, and the vector size is 4. I.e., we have a
3730 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3731 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3732 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3735 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3736 The last mask is illegal since we assume two operands for permute
3737 operation, and the mask element values can't be outside that range.
3738 Hence, the last mask must be converted into {2,5,5,5}.
3739 For the first two permutations we need the first and the second input
3740 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3741 we need the second and the third vectors: {b1,c1,a2,b2} and
3742 {c2,a3,b3,c3}. */
3744 int vect_stmts_counter = 0;
3745 unsigned int index = 0;
3746 int first_vec_index = -1;
3747 int second_vec_index = -1;
3748 bool noop_p = true;
3749 *n_perms = 0;
3751 for (unsigned int j = 0; j < const_vf; j++)
3753 for (int k = 0; k < group_size; k++)
3755 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3756 + j * DR_GROUP_SIZE (stmt_info));
3757 vec_index = i / nunits;
3758 mask_element = i % nunits;
3759 if (vec_index == first_vec_index
3760 || first_vec_index == -1)
3762 first_vec_index = vec_index;
3764 else if (vec_index == second_vec_index
3765 || second_vec_index == -1)
3767 second_vec_index = vec_index;
3768 mask_element += nunits;
3770 else
3772 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3775 "permutation requires at "
3776 "least three vectors ");
3777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3778 stmt, 0);
3780 gcc_assert (analyze_only);
3781 return false;
3784 gcc_assert (mask_element < 2 * nunits);
3785 if (mask_element != index)
3786 noop_p = false;
3787 mask[index++] = mask_element;
3789 if (index == nunits && !noop_p)
3791 indices.new_vector (mask, 2, nunits);
3792 if (!can_vec_perm_const_p (mode, indices))
3794 if (dump_enabled_p ())
3796 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3797 vect_location,
3798 "unsupported vect permute { ");
3799 for (i = 0; i < nunits; ++i)
3801 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3802 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3804 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3806 gcc_assert (analyze_only);
3807 return false;
3810 ++*n_perms;
3813 if (index == nunits)
3815 if (!analyze_only)
3817 tree mask_vec = NULL_TREE;
3819 if (! noop_p)
3820 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3822 if (second_vec_index == -1)
3823 second_vec_index = first_vec_index;
3825 /* Generate the permute statement if necessary. */
3826 tree first_vec = dr_chain[first_vec_index];
3827 tree second_vec = dr_chain[second_vec_index];
3828 gimple *perm_stmt;
3829 if (! noop_p)
3831 tree perm_dest
3832 = vect_create_destination_var (gimple_assign_lhs (stmt),
3833 vectype);
3834 perm_dest = make_ssa_name (perm_dest);
3835 perm_stmt = gimple_build_assign (perm_dest,
3836 VEC_PERM_EXPR,
3837 first_vec, second_vec,
3838 mask_vec);
3839 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3841 else
3842 /* If mask was NULL_TREE generate the requested
3843 identity transform. */
3844 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3846 /* Store the vector statement in NODE. */
3847 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3850 index = 0;
3851 first_vec_index = -1;
3852 second_vec_index = -1;
3853 noop_p = true;
3858 return true;
3861 /* Vectorize SLP instance tree in postorder. */
3863 static bool
3864 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3865 scalar_stmts_to_slp_tree_map_t *bst_map)
3867 gimple *stmt;
3868 bool grouped_store, is_store;
3869 gimple_stmt_iterator si;
3870 stmt_vec_info stmt_info;
3871 unsigned int group_size;
3872 tree vectype;
3873 int i, j;
3874 slp_tree child;
3876 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3877 return false;
3879 /* See if we have already vectorized the same set of stmts and reuse their
3880 vectorized stmts. */
3881 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3883 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3884 return false;
3887 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3889 vect_schedule_slp_instance (child, instance, bst_map);
3891 /* Push SLP node def-type to stmts. */
3892 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3893 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3894 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3895 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3897 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3898 stmt_info = vinfo_for_stmt (stmt);
3900 /* VECTYPE is the type of the destination. */
3901 vectype = STMT_VINFO_VECTYPE (stmt_info);
3902 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3903 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3905 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3906 if (!SLP_TREE_VEC_STMTS (node).exists ())
3907 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3909 if (dump_enabled_p ())
3911 dump_printf_loc (MSG_NOTE,vect_location,
3912 "------>vectorizing SLP node starting from: ");
3913 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3916 /* Vectorized stmts go before the last scalar stmt which is where
3917 all uses are ready. */
3918 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3920 /* Mark the first element of the reduction chain as reduction to properly
3921 transform the node. In the analysis phase only the last element of the
3922 chain is marked as reduction. */
3923 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3924 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3925 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3927 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3928 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3931 /* Handle two-operation SLP nodes by vectorizing the group with
3932 both operations and then performing a merge. */
3933 if (SLP_TREE_TWO_OPERATORS (node))
3935 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3936 enum tree_code ocode = ERROR_MARK;
3937 gimple *ostmt;
3938 vec_perm_builder mask (group_size, group_size, 1);
3939 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3940 if (gimple_assign_rhs_code (ostmt) != code0)
3942 mask.quick_push (1);
3943 ocode = gimple_assign_rhs_code (ostmt);
3945 else
3946 mask.quick_push (0);
3947 if (ocode != ERROR_MARK)
3949 vec<gimple *> v0;
3950 vec<gimple *> v1;
3951 unsigned j;
3952 tree tmask = NULL_TREE;
3953 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3954 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3955 SLP_TREE_VEC_STMTS (node).truncate (0);
3956 gimple_assign_set_rhs_code (stmt, ocode);
3957 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3958 gimple_assign_set_rhs_code (stmt, code0);
3959 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3960 SLP_TREE_VEC_STMTS (node).truncate (0);
3961 tree meltype = build_nonstandard_integer_type
3962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3963 tree mvectype = get_same_sized_vectype (meltype, vectype);
3964 unsigned k = 0, l;
3965 for (j = 0; j < v0.length (); ++j)
3967 /* Enforced by vect_build_slp_tree, which rejects variable-length
3968 vectors for SLP_TREE_TWO_OPERATORS. */
3969 unsigned int const_nunits = nunits.to_constant ();
3970 tree_vector_builder melts (mvectype, const_nunits, 1);
3971 for (l = 0; l < const_nunits; ++l)
3973 if (k >= group_size)
3974 k = 0;
3975 tree t = build_int_cst (meltype,
3976 mask[k++] * const_nunits + l);
3977 melts.quick_push (t);
3979 tmask = melts.build ();
3981 /* ??? Not all targets support a VEC_PERM_EXPR with a
3982 constant mask that would translate to a vec_merge RTX
3983 (with their vec_perm_const_ok). We can either not
3984 vectorize in that case or let veclower do its job.
3985 Unfortunately that isn't too great and at least for
3986 plus/minus we'd eventually like to match targets
3987 vector addsub instructions. */
3988 gimple *vstmt;
3989 vstmt = gimple_build_assign (make_ssa_name (vectype),
3990 VEC_PERM_EXPR,
3991 gimple_assign_lhs (v0[j]),
3992 gimple_assign_lhs (v1[j]), tmask);
3993 vect_finish_stmt_generation (stmt, vstmt, &si);
3994 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3996 v0.release ();
3997 v1.release ();
3998 return false;
4001 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
4003 /* Restore stmt def-types. */
4004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4005 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4006 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
4007 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
4009 return is_store;
4012 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4013 For loop vectorization this is done in vectorizable_call, but for SLP
4014 it needs to be deferred until end of vect_schedule_slp, because multiple
4015 SLP instances may refer to the same scalar stmt. */
4017 static void
4018 vect_remove_slp_scalar_calls (slp_tree node)
4020 gimple *stmt, *new_stmt;
4021 gimple_stmt_iterator gsi;
4022 int i;
4023 slp_tree child;
4024 tree lhs;
4025 stmt_vec_info stmt_info;
4027 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4028 return;
4030 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4031 vect_remove_slp_scalar_calls (child);
4033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
4035 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4036 continue;
4037 stmt_info = vinfo_for_stmt (stmt);
4038 if (stmt_info == NULL
4039 || is_pattern_stmt_p (stmt_info)
4040 || !PURE_SLP_STMT (stmt_info))
4041 continue;
4042 lhs = gimple_call_lhs (stmt);
4043 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4044 set_vinfo_for_stmt (new_stmt, stmt_info);
4045 set_vinfo_for_stmt (stmt, NULL);
4046 STMT_VINFO_STMT (stmt_info) = new_stmt;
4047 gsi = gsi_for_stmt (stmt);
4048 gsi_replace (&gsi, new_stmt, false);
4049 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4053 /* Generate vector code for all SLP instances in the loop/basic block. */
4055 bool
4056 vect_schedule_slp (vec_info *vinfo)
4058 vec<slp_instance> slp_instances;
4059 slp_instance instance;
4060 unsigned int i;
4061 bool is_store = false;
4064 scalar_stmts_to_slp_tree_map_t *bst_map
4065 = new scalar_stmts_to_slp_tree_map_t ();
4066 slp_instances = vinfo->slp_instances;
4067 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4069 /* Schedule the tree of INSTANCE. */
4070 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4071 instance, bst_map);
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_NOTE, vect_location,
4074 "vectorizing stmts using SLP.\n");
4076 delete bst_map;
4078 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4080 slp_tree root = SLP_INSTANCE_TREE (instance);
4081 gimple *store;
4082 unsigned int j;
4083 gimple_stmt_iterator gsi;
4085 /* Remove scalar call stmts. Do not do this for basic-block
4086 vectorization as not all uses may be vectorized.
4087 ??? Why should this be necessary? DCE should be able to
4088 remove the stmts itself.
4089 ??? For BB vectorization we can as well remove scalar
4090 stmts starting from the SLP tree root if they have no
4091 uses. */
4092 if (is_a <loop_vec_info> (vinfo))
4093 vect_remove_slp_scalar_calls (root);
4095 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4096 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4098 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4099 break;
4101 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4102 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4103 /* Free the attached stmt_vec_info and remove the stmt. */
4104 gsi = gsi_for_stmt (store);
4105 unlink_stmt_vdef (store);
4106 gsi_remove (&gsi, true);
4107 release_defs (store);
4108 free_stmt_vec_info (store);
4112 return is_store;