* gcc-interface/gigi.h (add_decl_expr): Adjust prototype.
[official-gcc.git] / gcc / tree-vect-slp.c
blob528e1d55892d3b7c8a162fe804bec036989c46cf
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
52 static void
53 vect_free_slp_tree (slp_tree node)
55 int i;
56 slp_tree child;
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
59 vect_free_slp_tree (child);
61 gimple *stmt;
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
70 SLP_TREE_CHILDREN (node).release ();
71 SLP_TREE_SCALAR_STMTS (node).release ();
72 SLP_TREE_VEC_STMTS (node).release ();
73 SLP_TREE_LOAD_PERMUTATION (node).release ();
75 free (node);
79 /* Free the memory allocated for the SLP instance. */
81 void
82 vect_free_slp_instance (slp_instance instance)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
85 SLP_INSTANCE_LOADS (instance).release ();
86 free (instance);
90 /* Create an SLP node for SCALAR_STMTS. */
92 static slp_tree
93 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
95 slp_tree node;
96 gimple *stmt = scalar_stmts[0];
97 unsigned int nops;
99 if (is_gimple_call (stmt))
100 nops = gimple_call_num_args (stmt);
101 else if (is_gimple_assign (stmt))
103 nops = gimple_num_ops (stmt) - 1;
104 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
105 nops++;
107 else if (gimple_code (stmt) == GIMPLE_PHI)
108 nops = 0;
109 else
110 return NULL;
112 node = XNEW (struct _slp_tree);
113 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
114 SLP_TREE_VEC_STMTS (node).create (0);
115 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
116 SLP_TREE_CHILDREN (node).create (nops);
117 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
118 SLP_TREE_TWO_OPERATORS (node) = false;
119 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
121 unsigned i;
122 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
125 return node;
129 /* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
131 node. */
132 typedef struct _slp_oprnd_info
134 /* Def-stmts for the operands. */
135 vec<gimple *> def_stmts;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
138 stmt. */
139 tree first_op_type;
140 enum vect_def_type first_dt;
141 bool first_pattern;
142 bool second_pattern;
143 } *slp_oprnd_info;
146 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 operand. */
148 static vec<slp_oprnd_info>
149 vect_create_oprnd_info (int nops, int group_size)
151 int i;
152 slp_oprnd_info oprnd_info;
153 vec<slp_oprnd_info> oprnds_info;
155 oprnds_info.create (nops);
156 for (i = 0; i < nops; i++)
158 oprnd_info = XNEW (struct _slp_oprnd_info);
159 oprnd_info->def_stmts.create (group_size);
160 oprnd_info->first_dt = vect_uninitialized_def;
161 oprnd_info->first_op_type = NULL_TREE;
162 oprnd_info->first_pattern = false;
163 oprnd_info->second_pattern = false;
164 oprnds_info.quick_push (oprnd_info);
167 return oprnds_info;
171 /* Free operands info. */
173 static void
174 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
176 int i;
177 slp_oprnd_info oprnd_info;
179 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
181 oprnd_info->def_stmts.release ();
182 XDELETE (oprnd_info);
185 oprnds_info.release ();
189 /* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
195 gimple *next_stmt = first_stmt;
196 int result = 0;
198 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
199 return -1;
203 if (next_stmt == stmt)
204 return result;
205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
206 if (next_stmt)
207 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
209 while (next_stmt);
211 return -1;
214 /* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
218 (if nonnull). */
220 bool
221 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
222 unsigned int *nvectors_out,
223 tree *vector_type_out,
224 tree *permutes)
226 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
227 poly_int64 nelts;
228 unsigned int nvectors = 1;
229 for (;;)
231 scalar_int_mode int_mode;
232 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
233 if (multiple_p (current_vector_size, elt_bytes, &nelts)
234 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
236 tree int_type = build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode), 1);
238 tree vector_type = build_vector_type (int_type, nelts);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
241 vec_perm_builder sel1 (nelts, 2, 3);
242 vec_perm_builder sel2 (nelts, 2, 3);
243 poly_int64 half_nelts = exact_div (nelts, 2);
244 for (unsigned int i = 0; i < 3; ++i)
246 sel1.quick_push (i);
247 sel1.quick_push (i + nelts);
248 sel2.quick_push (half_nelts + i);
249 sel2.quick_push (half_nelts + i + nelts);
251 vec_perm_indices indices1 (sel1, 2, nelts);
252 vec_perm_indices indices2 (sel2, 2, nelts);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
256 if (nvectors_out)
257 *nvectors_out = nvectors;
258 if (vector_type_out)
259 *vector_type_out = vector_type;
260 if (permutes)
262 permutes[0] = vect_gen_perm_mask_checked (vector_type,
263 indices1);
264 permutes[1] = vect_gen_perm_mask_checked (vector_type,
265 indices2);
267 return true;
271 if (!multiple_p (elt_bytes, 2, &elt_bytes))
272 return false;
273 nvectors *= 2;
277 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
285 value.
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
288 ok return 0. */
289 static int
290 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
291 vec<gimple *> stmts, unsigned stmt_num,
292 vec<slp_oprnd_info> *oprnds_info)
294 gimple *stmt = stmts[stmt_num];
295 tree oprnd;
296 unsigned int i, number_of_oprnds;
297 gimple *def_stmt;
298 enum vect_def_type dt = vect_uninitialized_def;
299 bool pattern = false;
300 slp_oprnd_info oprnd_info;
301 int first_op_idx = 1;
302 bool commutative = false;
303 bool first_op_cond = false;
304 bool first = stmt_num == 0;
305 bool second = stmt_num == 1;
307 if (is_gimple_call (stmt))
309 number_of_oprnds = gimple_call_num_args (stmt);
310 first_op_idx = 3;
312 else if (is_gimple_assign (stmt))
314 enum tree_code code = gimple_assign_rhs_code (stmt);
315 number_of_oprnds = gimple_num_ops (stmt) - 1;
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
318 if (gimple_assign_rhs_code (stmt) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
321 first_op_cond = true;
322 number_of_oprnds++;
324 else
325 commutative = commutative_tree_code (code);
327 else
328 return -1;
330 bool swapped = (*swap != 0);
331 gcc_assert (!swapped || first_op_cond);
332 for (i = 0; i < number_of_oprnds; i++)
334 again:
335 if (first_op_cond)
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map = maps[*swap];
341 if (i < 2)
342 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
343 else
344 oprnd = gimple_op (stmt, map[i]);
346 else
347 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
349 oprnd_info = (*oprnds_info)[i];
351 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt))
353 if (dump_enabled_p ())
355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
356 "Build SLP failed: can't analyze def for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
358 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
361 return -1;
364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
365 from the pattern. Check that all the stmts of the node are in the
366 pattern. */
367 if (def_stmt && gimple_bb (def_stmt)
368 && vect_stmt_in_region_p (vinfo, def_stmt)
369 && vinfo_for_stmt (def_stmt)
370 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt)))
372 pattern = true;
373 if (!first && !oprnd_info->first_pattern
374 /* Allow different pattern state for the defs of the
375 first stmt in reduction chains. */
376 && (oprnd_info->first_dt != vect_reduction_def
377 || (!second && !oprnd_info->second_pattern)))
379 if (i == 0
380 && !swapped
381 && commutative)
383 swapped = true;
384 goto again;
387 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
390 "Build SLP failed: some of the stmts"
391 " are in a pattern, and others are not ");
392 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
393 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
396 return 1;
399 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
401 if (dt == vect_unknown_def_type)
403 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Unsupported pattern.\n");
406 return -1;
409 switch (gimple_code (def_stmt))
411 case GIMPLE_PHI:
412 case GIMPLE_ASSIGN:
413 break;
415 default:
416 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 "unsupported defining stmt:\n");
419 return -1;
423 if (second)
424 oprnd_info->second_pattern = pattern;
426 if (first)
428 oprnd_info->first_dt = dt;
429 oprnd_info->first_pattern = pattern;
430 oprnd_info->first_op_type = TREE_TYPE (oprnd);
432 else
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
438 vect_internal_def. */
439 tree type = TREE_TYPE (oprnd);
440 if ((oprnd_info->first_dt != dt
441 && !(oprnd_info->first_dt == vect_reduction_def
442 && dt == vect_internal_def)
443 && !((oprnd_info->first_dt == vect_external_def
444 || oprnd_info->first_dt == vect_constant_def)
445 && (dt == vect_external_def
446 || dt == vect_constant_def)))
447 || !types_compatible_p (oprnd_info->first_op_type, type))
449 /* Try swapping operands if we got a mismatch. */
450 if (i == 0
451 && !swapped
452 && commutative)
454 swapped = true;
455 goto again;
458 if (dump_enabled_p ())
459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
460 "Build SLP failed: different types\n");
462 return 1;
464 if ((dt == vect_constant_def
465 || dt == vect_external_def)
466 && !current_vector_size.is_constant ()
467 && (TREE_CODE (type) == BOOLEAN_TYPE
468 || !can_duplicate_and_interleave_p (stmts.length (),
469 TYPE_MODE (type))))
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
474 "Build SLP failed: invalid type of def "
475 "for variable-length SLP ");
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
477 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
479 return -1;
483 /* Check the types of the definitions. */
484 switch (dt)
486 case vect_constant_def:
487 case vect_external_def:
488 break;
490 case vect_reduction_def:
491 case vect_induction_def:
492 case vect_internal_def:
493 oprnd_info->def_stmts.quick_push (def_stmt);
494 break;
496 default:
497 /* FORNOW: Not supported. */
498 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "Build SLP failed: illegal type of def ");
502 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
506 return -1;
510 /* Swap operands. */
511 if (swapped)
513 /* If there are already uses of this stmt in a SLP instance then
514 we've committed to the operand order and can't swap it. */
515 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
517 if (dump_enabled_p ())
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
520 "Build SLP failed: cannot swap operands of "
521 "shared stmt ");
522 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
524 return -1;
527 if (first_op_cond)
529 tree cond = gimple_assign_rhs1 (stmt);
530 enum tree_code code = TREE_CODE (cond);
532 /* Swap. */
533 if (*swap == 1)
535 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
536 &TREE_OPERAND (cond, 1));
537 TREE_SET_CODE (cond, swap_tree_comparison (code));
539 /* Invert. */
540 else
542 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
543 gimple_assign_rhs3_ptr (stmt));
544 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
545 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
546 gcc_assert (code != ERROR_MARK);
547 TREE_SET_CODE (cond, code);
550 else
551 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
552 gimple_assign_rhs2_ptr (stmt));
553 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "swapped operands to match def types in ");
557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
561 *swap = swapped;
562 return 0;
565 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
566 caller's attempt to find the vector type in STMT with the narrowest
567 element type. Return true if VECTYPE is nonnull and if it is valid
568 for VINFO. When returning true, update MAX_NUNITS to reflect the
569 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
570 as for vect_build_slp_tree. */
572 static bool
573 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
574 tree vectype, poly_uint64 *max_nunits)
576 if (!vectype)
578 if (dump_enabled_p ())
580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
581 "Build SLP failed: unsupported data-type in ");
582 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
583 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
585 /* Fatal mismatch. */
586 return false;
589 /* If populating the vector type requires unrolling then fail
590 before adjusting *max_nunits for basic-block vectorization. */
591 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
592 unsigned HOST_WIDE_INT const_nunits;
593 if (is_a <bb_vec_info> (vinfo)
594 && (!nunits.is_constant (&const_nunits)
595 || const_nunits > group_size))
597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
598 "Build SLP failed: unrolling required "
599 "in basic block SLP\n");
600 /* Fatal mismatch. */
601 return false;
604 /* In case of multiple types we need to detect the smallest type. */
605 vect_update_max_nunits (max_nunits, vectype);
606 return true;
609 /* STMTS is a group of GROUP_SIZE SLP statements in which some
610 statements do the same operation as the first statement and in which
611 the others do ALT_STMT_CODE. Return true if we can take one vector
612 of the first operation and one vector of the second and permute them
613 to get the required result. VECTYPE is the type of the vector that
614 would be permuted. */
616 static bool
617 vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
618 tree vectype, tree_code alt_stmt_code)
620 unsigned HOST_WIDE_INT count;
621 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
622 return false;
624 vec_perm_builder sel (count, count, 1);
625 for (unsigned int i = 0; i < count; ++i)
627 unsigned int elt = i;
628 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
629 elt += count;
630 sel.quick_push (elt);
632 vec_perm_indices indices (sel, 2, count);
633 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
636 /* Verify if the scalar stmts STMTS are isomorphic, require data
637 permutation or are of unsupported types of operation. Return
638 true if they are, otherwise return false and indicate in *MATCHES
639 which stmts are not isomorphic to the first one. If MATCHES[0]
640 is false then this indicates the comparison could not be
641 carried out or the stmts will never be vectorized by SLP.
643 Note COND_EXPR is possibly ismorphic to another one after swapping its
644 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
645 the first stmt by swapping the two operands of comparison; set SWAP[i]
646 to 2 if stmt I is isormorphic to the first stmt by inverting the code
647 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
648 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
650 static bool
651 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
652 vec<gimple *> stmts, unsigned int group_size,
653 unsigned nops, poly_uint64 *max_nunits,
654 bool *matches, bool *two_operators)
656 unsigned int i;
657 gimple *first_stmt = stmts[0], *stmt = stmts[0];
658 enum tree_code first_stmt_code = ERROR_MARK;
659 enum tree_code alt_stmt_code = ERROR_MARK;
660 enum tree_code rhs_code = ERROR_MARK;
661 enum tree_code first_cond_code = ERROR_MARK;
662 tree lhs;
663 bool need_same_oprnds = false;
664 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
665 optab optab;
666 int icode;
667 machine_mode optab_op2_mode;
668 machine_mode vec_mode;
669 gimple *first_load = NULL, *prev_first_load = NULL;
671 /* For every stmt in NODE find its def stmt/s. */
672 FOR_EACH_VEC_ELT (stmts, i, stmt)
674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
675 swap[i] = 0;
676 matches[i] = false;
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
684 /* Fail to vectorize statements marked as unvectorizable. */
685 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
690 "Build SLP failed: unvectorizable statement ");
691 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
693 /* Fatal mismatch. */
694 matches[0] = false;
695 return false;
698 lhs = gimple_get_lhs (stmt);
699 if (lhs == NULL_TREE)
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: not GIMPLE_ASSIGN nor "
705 "GIMPLE_CALL ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
708 /* Fatal mismatch. */
709 matches[0] = false;
710 return false;
713 tree nunits_vectype;
714 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
715 &nunits_vectype)
716 || (nunits_vectype
717 && !vect_record_max_nunits (vinfo, stmt, group_size,
718 nunits_vectype, max_nunits)))
720 /* Fatal mismatch. */
721 matches[0] = false;
722 return false;
725 gcc_assert (vectype);
727 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
729 rhs_code = CALL_EXPR;
730 if (gimple_call_internal_p (call_stmt)
731 || gimple_call_tail_p (call_stmt)
732 || gimple_call_noreturn_p (call_stmt)
733 || !gimple_call_nothrow_p (call_stmt)
734 || gimple_call_chain (call_stmt))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: unsupported call type ");
740 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
741 call_stmt, 0);
743 /* Fatal mismatch. */
744 matches[0] = false;
745 return false;
748 else
749 rhs_code = gimple_assign_rhs_code (stmt);
751 /* Check the operation. */
752 if (i == 0)
754 first_stmt_code = rhs_code;
756 /* Shift arguments should be equal in all the packed stmts for a
757 vector shift with scalar shift operand. */
758 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
759 || rhs_code == LROTATE_EXPR
760 || rhs_code == RROTATE_EXPR)
762 if (vectype == boolean_type_node)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "Build SLP failed: shift of a"
767 " boolean.\n");
768 /* Fatal mismatch. */
769 matches[0] = false;
770 return false;
773 vec_mode = TYPE_MODE (vectype);
775 /* First see if we have a vector/vector shift. */
776 optab = optab_for_tree_code (rhs_code, vectype,
777 optab_vector);
779 if (!optab
780 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
782 /* No vector/vector shift, try for a vector/scalar shift. */
783 optab = optab_for_tree_code (rhs_code, vectype,
784 optab_scalar);
786 if (!optab)
788 if (dump_enabled_p ())
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
790 "Build SLP failed: no optab.\n");
791 /* Fatal mismatch. */
792 matches[0] = false;
793 return false;
795 icode = (int) optab_handler (optab, vec_mode);
796 if (icode == CODE_FOR_nothing)
798 if (dump_enabled_p ())
799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
800 "Build SLP failed: "
801 "op not supported by target.\n");
802 /* Fatal mismatch. */
803 matches[0] = false;
804 return false;
806 optab_op2_mode = insn_data[icode].operand[2].mode;
807 if (!VECTOR_MODE_P (optab_op2_mode))
809 need_same_oprnds = true;
810 first_op1 = gimple_assign_rhs2 (stmt);
814 else if (rhs_code == WIDEN_LSHIFT_EXPR)
816 need_same_oprnds = true;
817 first_op1 = gimple_assign_rhs2 (stmt);
820 else
822 if (first_stmt_code != rhs_code
823 && alt_stmt_code == ERROR_MARK)
824 alt_stmt_code = rhs_code;
825 if (first_stmt_code != rhs_code
826 && (first_stmt_code != IMAGPART_EXPR
827 || rhs_code != REALPART_EXPR)
828 && (first_stmt_code != REALPART_EXPR
829 || rhs_code != IMAGPART_EXPR)
830 /* Handle mismatches in plus/minus by computing both
831 and merging the results. */
832 && !((first_stmt_code == PLUS_EXPR
833 || first_stmt_code == MINUS_EXPR)
834 && (alt_stmt_code == PLUS_EXPR
835 || alt_stmt_code == MINUS_EXPR)
836 && rhs_code == alt_stmt_code)
837 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
838 && (first_stmt_code == ARRAY_REF
839 || first_stmt_code == BIT_FIELD_REF
840 || first_stmt_code == INDIRECT_REF
841 || first_stmt_code == COMPONENT_REF
842 || first_stmt_code == MEM_REF)))
844 if (dump_enabled_p ())
846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
847 "Build SLP failed: different operation "
848 "in stmt ");
849 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "original stmt ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
853 first_stmt, 0);
855 /* Mismatch. */
856 continue;
859 if (need_same_oprnds
860 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
862 if (dump_enabled_p ())
864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
865 "Build SLP failed: different shift "
866 "arguments in ");
867 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
869 /* Mismatch. */
870 continue;
873 if (rhs_code == CALL_EXPR)
875 gimple *first_stmt = stmts[0];
876 if (gimple_call_num_args (stmt) != nops
877 || !operand_equal_p (gimple_call_fn (first_stmt),
878 gimple_call_fn (stmt), 0)
879 || gimple_call_fntype (first_stmt)
880 != gimple_call_fntype (stmt))
882 if (dump_enabled_p ())
884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
885 "Build SLP failed: different calls in ");
886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
887 stmt, 0);
889 /* Mismatch. */
890 continue;
895 /* Grouped store or load. */
896 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
898 if (REFERENCE_CLASS_P (lhs))
900 /* Store. */
903 else
905 /* Load. */
906 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
907 if (prev_first_load)
909 /* Check that there are no loads from different interleaving
910 chains in the same node. */
911 if (prev_first_load != first_load)
913 if (dump_enabled_p ())
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
916 vect_location,
917 "Build SLP failed: different "
918 "interleaving chains in one node ");
919 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
920 stmt, 0);
922 /* Mismatch. */
923 continue;
926 else
927 prev_first_load = first_load;
929 } /* Grouped access. */
930 else
932 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
934 /* Not grouped load. */
935 if (dump_enabled_p ())
937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
938 "Build SLP failed: not grouped load ");
939 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
942 /* FORNOW: Not grouped loads are not supported. */
943 /* Fatal mismatch. */
944 matches[0] = false;
945 return false;
948 /* Not memory operation. */
949 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
950 && TREE_CODE_CLASS (rhs_code) != tcc_unary
951 && TREE_CODE_CLASS (rhs_code) != tcc_expression
952 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
953 && rhs_code != CALL_EXPR)
955 if (dump_enabled_p ())
957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
958 "Build SLP failed: operation");
959 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
960 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
962 /* Fatal mismatch. */
963 matches[0] = false;
964 return false;
967 if (rhs_code == COND_EXPR)
969 tree cond_expr = gimple_assign_rhs1 (stmt);
970 enum tree_code cond_code = TREE_CODE (cond_expr);
971 enum tree_code swap_code = ERROR_MARK;
972 enum tree_code invert_code = ERROR_MARK;
974 if (i == 0)
975 first_cond_code = TREE_CODE (cond_expr);
976 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
978 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
979 swap_code = swap_tree_comparison (cond_code);
980 invert_code = invert_tree_comparison (cond_code, honor_nans);
983 if (first_cond_code == cond_code)
985 /* Isomorphic can be achieved by swapping. */
986 else if (first_cond_code == swap_code)
987 swap[i] = 1;
988 /* Isomorphic can be achieved by inverting. */
989 else if (first_cond_code == invert_code)
990 swap[i] = 2;
991 else
993 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "Build SLP failed: different"
997 " operation");
998 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
999 stmt, 0);
1001 /* Mismatch. */
1002 continue;
1007 matches[i] = true;
1010 for (i = 0; i < group_size; ++i)
1011 if (!matches[i])
1012 return false;
1014 /* If we allowed a two-operation SLP node verify the target can cope
1015 with the permute we are going to use. */
1016 if (alt_stmt_code != ERROR_MARK
1017 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1019 if (vectype == boolean_type_node
1020 || !vect_two_operations_perm_ok_p (stmts, group_size,
1021 vectype, alt_stmt_code))
1023 for (i = 0; i < group_size; ++i)
1024 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1026 matches[i] = false;
1027 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1030 "Build SLP failed: different operation "
1031 "in stmt ");
1032 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1033 stmts[i], 0);
1034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1035 "original stmt ");
1036 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1037 first_stmt, 0);
1040 return false;
1042 *two_operators = true;
1045 return true;
1048 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1049 Note we never remove apart from at destruction time so we do not
1050 need a special value for deleted that differs from empty. */
1051 struct bst_traits
1053 typedef vec <gimple *> value_type;
1054 typedef vec <gimple *> compare_type;
1055 static inline hashval_t hash (value_type);
1056 static inline bool equal (value_type existing, value_type candidate);
1057 static inline bool is_empty (value_type x) { return !x.exists (); }
1058 static inline bool is_deleted (value_type x) { return !x.exists (); }
1059 static inline void mark_empty (value_type &x) { x.release (); }
1060 static inline void mark_deleted (value_type &x) { x.release (); }
1061 static inline void remove (value_type &x) { x.release (); }
1063 inline hashval_t
1064 bst_traits::hash (value_type x)
1066 inchash::hash h;
1067 for (unsigned i = 0; i < x.length (); ++i)
1068 h.add_int (gimple_uid (x[i]));
1069 return h.end ();
1071 inline bool
1072 bst_traits::equal (value_type existing, value_type candidate)
1074 if (existing.length () != candidate.length ())
1075 return false;
1076 for (unsigned i = 0; i < existing.length (); ++i)
1077 if (existing[i] != candidate[i])
1078 return false;
1079 return true;
1082 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1083 static scalar_stmts_set_t *bst_fail;
1085 typedef hash_map <vec <gimple *>, slp_tree,
1086 simple_hashmap_traits <bst_traits, slp_tree> >
1087 scalar_stmts_to_slp_tree_map_t;
1089 static slp_tree
1090 vect_build_slp_tree_2 (vec_info *vinfo,
1091 vec<gimple *> stmts, unsigned int group_size,
1092 poly_uint64 *max_nunits,
1093 vec<slp_tree> *loads,
1094 bool *matches, unsigned *npermutes, unsigned *tree_size,
1095 unsigned max_tree_size);
1097 static slp_tree
1098 vect_build_slp_tree (vec_info *vinfo,
1099 vec<gimple *> stmts, unsigned int group_size,
1100 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1101 bool *matches, unsigned *npermutes, unsigned *tree_size,
1102 unsigned max_tree_size)
1104 if (bst_fail->contains (stmts))
1105 return NULL;
1106 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1107 loads, matches, npermutes, tree_size,
1108 max_tree_size);
1109 /* When SLP build fails for stmts record this, otherwise SLP build
1110 can be exponential in time when we allow to construct parts from
1111 scalars, see PR81723. */
1112 if (! res)
1114 vec <gimple *> x;
1115 x.create (stmts.length ());
1116 x.splice (stmts);
1117 bst_fail->add (x);
1119 return res;
1122 /* Recursively build an SLP tree starting from NODE.
1123 Fail (and return a value not equal to zero) if def-stmts are not
1124 isomorphic, require data permutation or are of unsupported types of
1125 operation. Otherwise, return 0.
1126 The value returned is the depth in the SLP tree where a mismatch
1127 was found. */
1129 static slp_tree
1130 vect_build_slp_tree_2 (vec_info *vinfo,
1131 vec<gimple *> stmts, unsigned int group_size,
1132 poly_uint64 *max_nunits,
1133 vec<slp_tree> *loads,
1134 bool *matches, unsigned *npermutes, unsigned *tree_size,
1135 unsigned max_tree_size)
1137 unsigned nops, i, this_tree_size = 0;
1138 poly_uint64 this_max_nunits = *max_nunits;
1139 gimple *stmt;
1140 slp_tree node;
1142 matches[0] = false;
1144 stmt = stmts[0];
1145 if (is_gimple_call (stmt))
1146 nops = gimple_call_num_args (stmt);
1147 else if (is_gimple_assign (stmt))
1149 nops = gimple_num_ops (stmt) - 1;
1150 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1151 nops++;
1153 else if (gimple_code (stmt) == GIMPLE_PHI)
1154 nops = 0;
1155 else
1156 return NULL;
1158 /* If the SLP node is a PHI (induction or reduction), terminate
1159 the recursion. */
1160 if (gimple_code (stmt) == GIMPLE_PHI)
1162 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1163 tree vectype = get_vectype_for_scalar_type (scalar_type);
1164 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1165 max_nunits))
1166 return NULL;
1168 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1169 /* Induction from different IVs is not supported. */
1170 if (def_type == vect_induction_def)
1172 FOR_EACH_VEC_ELT (stmts, i, stmt)
1173 if (stmt != stmts[0])
1174 return NULL;
1176 else
1178 /* Else def types have to match. */
1179 FOR_EACH_VEC_ELT (stmts, i, stmt)
1181 /* But for reduction chains only check on the first stmt. */
1182 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1183 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1184 continue;
1185 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1186 return NULL;
1189 node = vect_create_new_slp_node (stmts);
1190 return node;
1194 bool two_operators = false;
1195 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1196 if (!vect_build_slp_tree_1 (vinfo, swap,
1197 stmts, group_size, nops,
1198 &this_max_nunits, matches, &two_operators))
1199 return NULL;
1201 /* If the SLP node is a load, terminate the recursion. */
1202 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1203 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1205 *max_nunits = this_max_nunits;
1206 node = vect_create_new_slp_node (stmts);
1207 loads->safe_push (node);
1208 return node;
1211 /* Get at the operands, verifying they are compatible. */
1212 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1213 slp_oprnd_info oprnd_info;
1214 FOR_EACH_VEC_ELT (stmts, i, stmt)
1216 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1217 stmts, i, &oprnds_info);
1218 if (res != 0)
1219 matches[(res == -1) ? 0 : i] = false;
1220 if (!matches[0])
1221 break;
1223 for (i = 0; i < group_size; ++i)
1224 if (!matches[i])
1226 vect_free_oprnd_info (oprnds_info);
1227 return NULL;
1230 auto_vec<slp_tree, 4> children;
1231 auto_vec<slp_tree> this_loads;
1233 stmt = stmts[0];
1235 if (tree_size)
1236 max_tree_size -= *tree_size;
1238 /* Create SLP_TREE nodes for the definition node/s. */
1239 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1241 slp_tree child;
1242 unsigned old_nloads = this_loads.length ();
1243 unsigned old_tree_size = this_tree_size;
1244 unsigned int j;
1246 if (oprnd_info->first_dt != vect_internal_def
1247 && oprnd_info->first_dt != vect_reduction_def
1248 && oprnd_info->first_dt != vect_induction_def)
1249 continue;
1251 if (++this_tree_size > max_tree_size)
1253 FOR_EACH_VEC_ELT (children, j, child)
1254 vect_free_slp_tree (child);
1255 vect_free_oprnd_info (oprnds_info);
1256 return NULL;
1259 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1260 group_size, &this_max_nunits,
1261 &this_loads, matches, npermutes,
1262 &this_tree_size,
1263 max_tree_size)) != NULL)
1265 /* If we have all children of child built up from scalars then just
1266 throw that away and build it up this node from scalars. */
1267 if (!SLP_TREE_CHILDREN (child).is_empty ()
1268 /* ??? Rejecting patterns this way doesn't work. We'd have to
1269 do extra work to cancel the pattern so the uses see the
1270 scalar version. */
1271 && !is_pattern_stmt_p
1272 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1274 slp_tree grandchild;
1276 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1277 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1278 break;
1279 if (!grandchild)
1281 /* Roll back. */
1282 this_loads.truncate (old_nloads);
1283 this_tree_size = old_tree_size;
1284 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1285 vect_free_slp_tree (grandchild);
1286 SLP_TREE_CHILDREN (child).truncate (0);
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "Building parent vector operands from "
1290 "scalars instead\n");
1291 oprnd_info->def_stmts = vNULL;
1292 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1293 children.safe_push (child);
1294 continue;
1298 oprnd_info->def_stmts = vNULL;
1299 children.safe_push (child);
1300 continue;
1303 /* If the SLP build failed fatally and we analyze a basic-block
1304 simply treat nodes we fail to build as externally defined
1305 (and thus build vectors from the scalar defs).
1306 The cost model will reject outright expensive cases.
1307 ??? This doesn't treat cases where permutation ultimatively
1308 fails (or we don't try permutation below). Ideally we'd
1309 even compute a permutation that will end up with the maximum
1310 SLP tree size... */
1311 if (is_a <bb_vec_info> (vinfo)
1312 && !matches[0]
1313 /* ??? Rejecting patterns this way doesn't work. We'd have to
1314 do extra work to cancel the pattern so the uses see the
1315 scalar version. */
1316 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1318 dump_printf_loc (MSG_NOTE, vect_location,
1319 "Building vector operands from scalars\n");
1320 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1321 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1322 children.safe_push (child);
1323 oprnd_info->def_stmts = vNULL;
1324 continue;
1327 /* If the SLP build for operand zero failed and operand zero
1328 and one can be commutated try that for the scalar stmts
1329 that failed the match. */
1330 if (i == 0
1331 /* A first scalar stmt mismatch signals a fatal mismatch. */
1332 && matches[0]
1333 /* ??? For COND_EXPRs we can swap the comparison operands
1334 as well as the arms under some constraints. */
1335 && nops == 2
1336 && oprnds_info[1]->first_dt == vect_internal_def
1337 && is_gimple_assign (stmt)
1338 /* Do so only if the number of not successful permutes was nor more
1339 than a cut-ff as re-trying the recursive match on
1340 possibly each level of the tree would expose exponential
1341 behavior. */
1342 && *npermutes < 4)
1344 /* See whether we can swap the matching or the non-matching
1345 stmt operands. */
1346 bool swap_not_matching = true;
1349 for (j = 0; j < group_size; ++j)
1351 if (matches[j] != !swap_not_matching)
1352 continue;
1353 gimple *stmt = stmts[j];
1354 /* Verify if we can swap operands of this stmt. */
1355 if (!is_gimple_assign (stmt)
1356 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1358 if (!swap_not_matching)
1359 goto fail;
1360 swap_not_matching = false;
1361 break;
1363 /* Verify if we can safely swap or if we committed to a
1364 specific operand order already.
1365 ??? Instead of modifying GIMPLE stmts here we could
1366 record whether we want to swap operands in the SLP
1367 node and temporarily do that when processing it
1368 (or wrap operand accessors in a helper). */
1369 else if (swap[j] != 0
1370 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1372 if (!swap_not_matching)
1374 if (dump_enabled_p ())
1376 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1377 vect_location,
1378 "Build SLP failed: cannot swap "
1379 "operands of shared stmt ");
1380 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1381 TDF_SLIM, stmts[j], 0);
1383 goto fail;
1385 swap_not_matching = false;
1386 break;
1390 while (j != group_size);
1392 /* Swap mismatched definition stmts. */
1393 dump_printf_loc (MSG_NOTE, vect_location,
1394 "Re-trying with swapped operands of stmts ");
1395 for (j = 0; j < group_size; ++j)
1396 if (matches[j] == !swap_not_matching)
1398 std::swap (oprnds_info[0]->def_stmts[j],
1399 oprnds_info[1]->def_stmts[j]);
1400 dump_printf (MSG_NOTE, "%d ", j);
1402 dump_printf (MSG_NOTE, "\n");
1403 /* And try again with scratch 'matches' ... */
1404 bool *tem = XALLOCAVEC (bool, group_size);
1405 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1406 group_size, &this_max_nunits,
1407 &this_loads, tem, npermutes,
1408 &this_tree_size,
1409 max_tree_size)) != NULL)
1411 /* ... so if successful we can apply the operand swapping
1412 to the GIMPLE IL. This is necessary because for example
1413 vect_get_slp_defs uses operand indexes and thus expects
1414 canonical operand order. This is also necessary even
1415 if we end up building the operand from scalars as
1416 we'll continue to process swapped operand two. */
1417 for (j = 0; j < group_size; ++j)
1419 gimple *stmt = stmts[j];
1420 gimple_set_plf (stmt, GF_PLF_1, false);
1422 for (j = 0; j < group_size; ++j)
1424 gimple *stmt = stmts[j];
1425 if (matches[j] == !swap_not_matching)
1427 /* Avoid swapping operands twice. */
1428 if (gimple_plf (stmt, GF_PLF_1))
1429 continue;
1430 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1431 gimple_assign_rhs2_ptr (stmt));
1432 gimple_set_plf (stmt, GF_PLF_1, true);
1435 /* Verify we swap all duplicates or none. */
1436 if (flag_checking)
1437 for (j = 0; j < group_size; ++j)
1439 gimple *stmt = stmts[j];
1440 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1441 == (matches[j] == !swap_not_matching));
1444 /* If we have all children of child built up from scalars then
1445 just throw that away and build it up this node from scalars. */
1446 if (!SLP_TREE_CHILDREN (child).is_empty ()
1447 /* ??? Rejecting patterns this way doesn't work. We'd have
1448 to do extra work to cancel the pattern so the uses see the
1449 scalar version. */
1450 && !is_pattern_stmt_p
1451 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1453 unsigned int j;
1454 slp_tree grandchild;
1456 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1457 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1458 break;
1459 if (!grandchild)
1461 /* Roll back. */
1462 this_loads.truncate (old_nloads);
1463 this_tree_size = old_tree_size;
1464 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1465 vect_free_slp_tree (grandchild);
1466 SLP_TREE_CHILDREN (child).truncate (0);
1468 dump_printf_loc (MSG_NOTE, vect_location,
1469 "Building parent vector operands from "
1470 "scalars instead\n");
1471 oprnd_info->def_stmts = vNULL;
1472 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1473 children.safe_push (child);
1474 continue;
1478 oprnd_info->def_stmts = vNULL;
1479 children.safe_push (child);
1480 continue;
1483 ++*npermutes;
1486 fail:
1487 gcc_assert (child == NULL);
1488 FOR_EACH_VEC_ELT (children, j, child)
1489 vect_free_slp_tree (child);
1490 vect_free_oprnd_info (oprnds_info);
1491 return NULL;
1494 vect_free_oprnd_info (oprnds_info);
1496 if (tree_size)
1497 *tree_size += this_tree_size;
1498 *max_nunits = this_max_nunits;
1499 loads->safe_splice (this_loads);
1501 node = vect_create_new_slp_node (stmts);
1502 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1503 SLP_TREE_CHILDREN (node).splice (children);
1504 return node;
1507 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1509 static void
1510 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1511 slp_tree node)
1513 int i;
1514 gimple *stmt;
1515 slp_tree child;
1517 dump_printf_loc (dump_kind, loc, "node%s\n",
1518 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1519 ? " (external)" : "");
1520 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1522 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1523 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1525 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1526 vect_print_slp_tree (dump_kind, loc, child);
1530 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1531 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1532 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1533 stmts in NODE are to be marked. */
1535 static void
1536 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1538 int i;
1539 gimple *stmt;
1540 slp_tree child;
1542 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1543 return;
1545 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1546 if (j < 0 || i == j)
1547 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1550 vect_mark_slp_stmts (child, mark, j);
1554 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1556 static void
1557 vect_mark_slp_stmts_relevant (slp_tree node)
1559 int i;
1560 gimple *stmt;
1561 stmt_vec_info stmt_info;
1562 slp_tree child;
1564 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1565 return;
1567 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1569 stmt_info = vinfo_for_stmt (stmt);
1570 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1571 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1572 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1575 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1576 vect_mark_slp_stmts_relevant (child);
1580 /* Rearrange the statements of NODE according to PERMUTATION. */
1582 static void
1583 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1584 vec<unsigned> permutation)
1586 gimple *stmt;
1587 vec<gimple *> tmp_stmts;
1588 unsigned int i;
1589 slp_tree child;
1591 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1592 vect_slp_rearrange_stmts (child, group_size, permutation);
1594 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1595 tmp_stmts.create (group_size);
1596 tmp_stmts.quick_grow_cleared (group_size);
1598 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1599 tmp_stmts[permutation[i]] = stmt;
1601 SLP_TREE_SCALAR_STMTS (node).release ();
1602 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1606 /* Attempt to reorder stmts in a reduction chain so that we don't
1607 require any load permutation. Return true if that was possible,
1608 otherwise return false. */
1610 static bool
1611 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1613 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1614 unsigned int i, j;
1615 unsigned int lidx;
1616 slp_tree node, load;
1618 /* Compare all the permutation sequences to the first one. We know
1619 that at least one load is permuted. */
1620 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1621 if (!node->load_permutation.exists ())
1622 return false;
1623 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1625 if (!load->load_permutation.exists ())
1626 return false;
1627 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1628 if (lidx != node->load_permutation[j])
1629 return false;
1632 /* Check that the loads in the first sequence are different and there
1633 are no gaps between them. */
1634 auto_sbitmap load_index (group_size);
1635 bitmap_clear (load_index);
1636 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1638 if (lidx >= group_size)
1639 return false;
1640 if (bitmap_bit_p (load_index, lidx))
1641 return false;
1643 bitmap_set_bit (load_index, lidx);
1645 for (i = 0; i < group_size; i++)
1646 if (!bitmap_bit_p (load_index, i))
1647 return false;
1649 /* This permutation is valid for reduction. Since the order of the
1650 statements in the nodes is not important unless they are memory
1651 accesses, we can rearrange the statements in all the nodes
1652 according to the order of the loads. */
1653 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1654 node->load_permutation);
1656 /* We are done, no actual permutations need to be generated. */
1657 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1658 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1660 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1661 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1662 /* But we have to keep those permutations that are required because
1663 of handling of gaps. */
1664 if (known_eq (unrolling_factor, 1U)
1665 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1666 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1667 SLP_TREE_LOAD_PERMUTATION (node).release ();
1668 else
1669 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1670 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1673 return true;
1676 /* Check if the required load permutations in the SLP instance
1677 SLP_INSTN are supported. */
1679 static bool
1680 vect_supported_load_permutation_p (slp_instance slp_instn)
1682 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1683 unsigned int i, j, k, next;
1684 slp_tree node;
1685 gimple *stmt, *load, *next_load;
1687 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1690 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1691 if (node->load_permutation.exists ())
1692 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1693 dump_printf (MSG_NOTE, "%d ", next);
1694 else
1695 for (k = 0; k < group_size; ++k)
1696 dump_printf (MSG_NOTE, "%d ", k);
1697 dump_printf (MSG_NOTE, "\n");
1700 /* In case of reduction every load permutation is allowed, since the order
1701 of the reduction statements is not important (as opposed to the case of
1702 grouped stores). The only condition we need to check is that all the
1703 load nodes are of the same size and have the same permutation (and then
1704 rearrange all the nodes of the SLP instance according to this
1705 permutation). */
1707 /* Check that all the load nodes are of the same size. */
1708 /* ??? Can't we assert this? */
1709 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1710 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1711 return false;
1713 node = SLP_INSTANCE_TREE (slp_instn);
1714 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1716 /* Reduction (there are no data-refs in the root).
1717 In reduction chain the order of the loads is not important. */
1718 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1719 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1720 vect_attempt_slp_rearrange_stmts (slp_instn);
1722 /* In basic block vectorization we allow any subchain of an interleaving
1723 chain.
1724 FORNOW: not supported in loop SLP because of realignment compications. */
1725 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1727 /* Check whether the loads in an instance form a subchain and thus
1728 no permutation is necessary. */
1729 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1731 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1732 continue;
1733 bool subchain_p = true;
1734 next_load = NULL;
1735 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1737 if (j != 0
1738 && (next_load != load
1739 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
1741 subchain_p = false;
1742 break;
1744 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1746 if (subchain_p)
1747 SLP_TREE_LOAD_PERMUTATION (node).release ();
1748 else
1750 stmt_vec_info group_info
1751 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1752 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
1753 unsigned HOST_WIDE_INT nunits;
1754 unsigned k, maxk = 0;
1755 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1756 if (k > maxk)
1757 maxk = k;
1758 /* In BB vectorization we may not actually use a loaded vector
1759 accessing elements in excess of DR_GROUP_SIZE. */
1760 tree vectype = STMT_VINFO_VECTYPE (group_info);
1761 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1762 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1765 "BB vectorization with gaps at the end of "
1766 "a load is not supported\n");
1767 return false;
1770 /* Verify the permutation can be generated. */
1771 vec<tree> tem;
1772 unsigned n_perms;
1773 if (!vect_transform_slp_perm_load (node, tem, NULL,
1774 1, slp_instn, true, &n_perms))
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1777 vect_location,
1778 "unsupported load permutation\n");
1779 return false;
1783 return true;
1786 /* For loop vectorization verify we can generate the permutation. Be
1787 conservative about the vectorization factor, there are permutations
1788 that will use three vector inputs only starting from a specific factor
1789 and the vectorization factor is not yet final.
1790 ??? The SLP instance unrolling factor might not be the maximum one. */
1791 unsigned n_perms;
1792 poly_uint64 test_vf
1793 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1794 LOOP_VINFO_VECT_FACTOR
1795 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1796 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1797 if (node->load_permutation.exists ()
1798 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1799 slp_instn, true, &n_perms))
1800 return false;
1802 return true;
1806 /* Find the last store in SLP INSTANCE. */
1808 gimple *
1809 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1811 gimple *last = NULL, *stmt;
1813 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1815 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1816 if (is_pattern_stmt_p (stmt_vinfo))
1817 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1818 else
1819 last = get_later_stmt (stmt, last);
1822 return last;
1825 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1826 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1827 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1828 containing the remainder.
1829 Return the first stmt in the second group. */
1831 static gimple *
1832 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1834 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1835 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1836 gcc_assert (group1_size > 0);
1837 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1838 gcc_assert (group2_size > 0);
1839 DR_GROUP_SIZE (first_vinfo) = group1_size;
1841 gimple *stmt = first_stmt;
1842 for (unsigned i = group1_size; i > 1; i--)
1844 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1845 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1847 /* STMT is now the last element of the first group. */
1848 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1849 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1851 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1852 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1854 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1855 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1858 /* For the second group, the DR_GROUP_GAP is that before the original group,
1859 plus skipping over the first vector. */
1860 DR_GROUP_GAP (vinfo_for_stmt (group2))
1861 = DR_GROUP_GAP (first_vinfo) + group1_size;
1863 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1864 DR_GROUP_GAP (first_vinfo) += group2_size;
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1868 group1_size, group2_size);
1870 return group2;
1873 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1874 statements and a vector of NUNITS elements. */
1876 static poly_uint64
1877 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1879 return exact_div (common_multiple (nunits, group_size), group_size);
1882 /* Analyze an SLP instance starting from a group of grouped stores. Call
1883 vect_build_slp_tree to build a tree of packed stmts if possible.
1884 Return FALSE if it's impossible to SLP any stmt in the loop. */
1886 static bool
1887 vect_analyze_slp_instance (vec_info *vinfo,
1888 gimple *stmt, unsigned max_tree_size)
1890 slp_instance new_instance;
1891 slp_tree node;
1892 unsigned int group_size;
1893 tree vectype, scalar_type = NULL_TREE;
1894 gimple *next;
1895 unsigned int i;
1896 vec<slp_tree> loads;
1897 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1898 vec<gimple *> scalar_stmts;
1900 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1902 scalar_type = TREE_TYPE (DR_REF (dr));
1903 vectype = get_vectype_for_scalar_type (scalar_type);
1904 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1906 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1908 gcc_assert (is_a <loop_vec_info> (vinfo));
1909 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1910 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
1912 else
1914 gcc_assert (is_a <loop_vec_info> (vinfo));
1915 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1916 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1919 if (!vectype)
1921 if (dump_enabled_p ())
1923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924 "Build SLP failed: unsupported data-type ");
1925 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1926 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1929 return false;
1931 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1933 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1934 scalar_stmts.create (group_size);
1935 next = stmt;
1936 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1938 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1939 while (next)
1941 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1942 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1943 scalar_stmts.safe_push (
1944 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1945 else
1946 scalar_stmts.safe_push (next);
1947 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1950 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1952 /* Collect the reduction stmts and store them in
1953 SLP_TREE_SCALAR_STMTS. */
1954 while (next)
1956 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1957 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1958 scalar_stmts.safe_push (
1959 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1960 else
1961 scalar_stmts.safe_push (next);
1962 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1964 /* Mark the first element of the reduction chain as reduction to properly
1965 transform the node. In the reduction analysis phase only the last
1966 element of the chain is marked as reduction. */
1967 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1969 else
1971 /* Collect reduction statements. */
1972 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1973 for (i = 0; reductions.iterate (i, &next); i++)
1974 scalar_stmts.safe_push (next);
1977 loads.create (group_size);
1979 /* Build the tree for the SLP instance. */
1980 bool *matches = XALLOCAVEC (bool, group_size);
1981 unsigned npermutes = 0;
1982 bst_fail = new scalar_stmts_set_t ();
1983 poly_uint64 max_nunits = nunits;
1984 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1985 &max_nunits, &loads, matches, &npermutes,
1986 NULL, max_tree_size);
1987 delete bst_fail;
1988 if (node != NULL)
1990 /* Calculate the unrolling factor based on the smallest type. */
1991 poly_uint64 unrolling_factor
1992 = calculate_unrolling_factor (max_nunits, group_size);
1994 if (maybe_ne (unrolling_factor, 1U)
1995 && is_a <bb_vec_info> (vinfo))
1997 unsigned HOST_WIDE_INT const_max_nunits;
1998 if (!max_nunits.is_constant (&const_max_nunits)
1999 || const_max_nunits > group_size)
2001 if (dump_enabled_p ())
2002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2003 "Build SLP failed: store group "
2004 "size not a multiple of the vector size "
2005 "in basic block SLP\n");
2006 vect_free_slp_tree (node);
2007 loads.release ();
2008 return false;
2010 /* Fatal mismatch. */
2011 matches[group_size / const_max_nunits * const_max_nunits] = false;
2012 vect_free_slp_tree (node);
2013 loads.release ();
2015 else
2017 /* Create a new SLP instance. */
2018 new_instance = XNEW (struct _slp_instance);
2019 SLP_INSTANCE_TREE (new_instance) = node;
2020 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2021 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2022 SLP_INSTANCE_LOADS (new_instance) = loads;
2024 /* Compute the load permutation. */
2025 slp_tree load_node;
2026 bool loads_permuted = false;
2027 FOR_EACH_VEC_ELT (loads, i, load_node)
2029 vec<unsigned> load_permutation;
2030 int j;
2031 gimple *load, *first_stmt;
2032 bool this_load_permuted = false;
2033 load_permutation.create (group_size);
2034 first_stmt = DR_GROUP_FIRST_ELEMENT
2035 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2036 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2038 int load_place = vect_get_place_in_interleaving_chain
2039 (load, first_stmt);
2040 gcc_assert (load_place != -1);
2041 if (load_place != j)
2042 this_load_permuted = true;
2043 load_permutation.safe_push (load_place);
2045 if (!this_load_permuted
2046 /* The load requires permutation when unrolling exposes
2047 a gap either because the group is larger than the SLP
2048 group-size or because there is a gap between the groups. */
2049 && (known_eq (unrolling_factor, 1U)
2050 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2051 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2053 load_permutation.release ();
2054 continue;
2056 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2057 loads_permuted = true;
2060 if (loads_permuted)
2062 if (!vect_supported_load_permutation_p (new_instance))
2064 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067 "Build SLP failed: unsupported load "
2068 "permutation ");
2069 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2070 TDF_SLIM, stmt, 0);
2072 vect_free_slp_instance (new_instance);
2073 return false;
2077 /* If the loads and stores can be handled with load/store-lan
2078 instructions do not generate this SLP instance. */
2079 if (is_a <loop_vec_info> (vinfo)
2080 && loads_permuted
2081 && dr && vect_store_lanes_supported (vectype, group_size, false))
2083 slp_tree load_node;
2084 FOR_EACH_VEC_ELT (loads, i, load_node)
2086 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
2087 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2088 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2089 /* Use SLP for strided accesses (or if we
2090 can't load-lanes). */
2091 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2092 || ! vect_load_lanes_supported
2093 (STMT_VINFO_VECTYPE (stmt_vinfo),
2094 DR_GROUP_SIZE (stmt_vinfo), false))
2095 break;
2097 if (i == loads.length ())
2099 if (dump_enabled_p ())
2100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101 "Built SLP cancelled: can use "
2102 "load/store-lanes\n");
2103 vect_free_slp_instance (new_instance);
2104 return false;
2108 vinfo->slp_instances.safe_push (new_instance);
2110 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE, vect_location,
2113 "Final SLP tree for instance:\n");
2114 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2117 return true;
2120 else
2122 /* Failed to SLP. */
2123 /* Free the allocated memory. */
2124 scalar_stmts.release ();
2125 loads.release ();
2128 /* For basic block SLP, try to break the group up into multiples of the
2129 vector size. */
2130 unsigned HOST_WIDE_INT const_nunits;
2131 if (is_a <bb_vec_info> (vinfo)
2132 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2133 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2134 && nunits.is_constant (&const_nunits))
2136 /* We consider breaking the group only on VF boundaries from the existing
2137 start. */
2138 for (i = 0; i < group_size; i++)
2139 if (!matches[i]) break;
2141 if (i >= const_nunits && i < group_size)
2143 /* Split into two groups at the first vector boundary before i. */
2144 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2145 unsigned group1_size = i & ~(const_nunits - 1);
2147 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2148 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2149 /* If the first non-match was in the middle of a vector,
2150 skip the rest of that vector. */
2151 if (group1_size < i)
2153 i = group1_size + const_nunits;
2154 if (i < group_size)
2155 rest = vect_split_slp_store_group (rest, const_nunits);
2157 if (i < group_size)
2158 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2159 return res;
2161 /* Even though the first vector did not all match, we might be able to SLP
2162 (some) of the remainder. FORNOW ignore this possibility. */
2165 return false;
2169 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2170 trees of packed scalar stmts if SLP is possible. */
2172 bool
2173 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2175 unsigned int i;
2176 gimple *first_element;
2178 DUMP_VECT_SCOPE ("vect_analyze_slp");
2180 /* Find SLP sequences starting from groups of grouped stores. */
2181 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2182 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2184 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2186 if (loop_vinfo->reduction_chains.length () > 0)
2188 /* Find SLP sequences starting from reduction chains. */
2189 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2190 if (! vect_analyze_slp_instance (vinfo, first_element,
2191 max_tree_size))
2193 /* Dissolve reduction chain group. */
2194 gimple *next, *stmt = first_element;
2195 while (stmt)
2197 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2198 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2199 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2200 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2201 stmt = next;
2203 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2204 = vect_internal_def;
2208 /* Find SLP sequences starting from groups of reductions. */
2209 if (loop_vinfo->reductions.length () > 1)
2210 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2211 max_tree_size);
2214 return true;
2218 /* For each possible SLP instance decide whether to SLP it and calculate overall
2219 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2220 least one instance. */
2222 bool
2223 vect_make_slp_decision (loop_vec_info loop_vinfo)
2225 unsigned int i;
2226 poly_uint64 unrolling_factor = 1;
2227 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2228 slp_instance instance;
2229 int decided_to_slp = 0;
2231 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2233 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2235 /* FORNOW: SLP if you can. */
2236 /* All unroll factors have the form current_vector_size * X for some
2237 rational X, so they must have a common multiple. */
2238 unrolling_factor
2239 = force_common_multiple (unrolling_factor,
2240 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2242 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2243 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2244 loop-based vectorization. Such stmts will be marked as HYBRID. */
2245 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2246 decided_to_slp++;
2249 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2251 if (decided_to_slp && dump_enabled_p ())
2253 dump_printf_loc (MSG_NOTE, vect_location,
2254 "Decided to SLP %d instances. Unrolling factor ",
2255 decided_to_slp);
2256 dump_dec (MSG_NOTE, unrolling_factor);
2257 dump_printf (MSG_NOTE, "\n");
2260 return (decided_to_slp > 0);
2264 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2265 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2267 static void
2268 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2270 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2271 imm_use_iterator imm_iter;
2272 gimple *use_stmt;
2273 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2274 slp_tree child;
2275 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2276 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2277 int j;
2279 /* Propagate hybrid down the SLP tree. */
2280 if (stype == hybrid)
2282 else if (HYBRID_SLP_STMT (stmt_vinfo))
2283 stype = hybrid;
2284 else
2286 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2287 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2288 /* If we get a pattern stmt here we have to use the LHS of the
2289 original stmt for immediate uses. */
2290 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2291 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2292 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2293 tree def;
2294 if (gimple_code (stmt) == GIMPLE_PHI)
2295 def = gimple_phi_result (stmt);
2296 else
2297 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2298 if (def)
2299 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2301 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2302 continue;
2303 use_vinfo = vinfo_for_stmt (use_stmt);
2304 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2305 && STMT_VINFO_RELATED_STMT (use_vinfo))
2306 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2307 if (!STMT_SLP_TYPE (use_vinfo)
2308 && (STMT_VINFO_RELEVANT (use_vinfo)
2309 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2310 && !(gimple_code (use_stmt) == GIMPLE_PHI
2311 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2313 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2316 "def in non-SLP stmt: ");
2317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2319 stype = hybrid;
2324 if (stype == hybrid
2325 && !HYBRID_SLP_STMT (stmt_vinfo))
2327 if (dump_enabled_p ())
2329 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2330 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2332 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2335 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2336 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2337 vect_detect_hybrid_slp_stmts (child, i, stype);
2340 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2342 static tree
2343 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2345 walk_stmt_info *wi = (walk_stmt_info *)data;
2346 struct loop *loopp = (struct loop *)wi->info;
2348 if (wi->is_lhs)
2349 return NULL_TREE;
2351 if (TREE_CODE (*tp) == SSA_NAME
2352 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2354 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2355 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2356 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2358 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2361 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2363 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2367 return NULL_TREE;
2370 static tree
2371 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2372 walk_stmt_info *)
2374 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2375 /* If the stmt is in a SLP instance then this isn't a reason
2376 to mark use definitions in other SLP instances as hybrid. */
2377 if (! STMT_SLP_TYPE (use_vinfo)
2378 && (STMT_VINFO_RELEVANT (use_vinfo)
2379 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2380 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2381 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2383 else
2384 *handled = true;
2385 return NULL_TREE;
2388 /* Find stmts that must be both vectorized and SLPed. */
2390 void
2391 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2393 unsigned int i;
2394 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2395 slp_instance instance;
2397 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2399 /* First walk all pattern stmt in the loop and mark defs of uses as
2400 hybrid because immediate uses in them are not recorded. */
2401 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2403 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2404 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2405 gsi_next (&gsi))
2407 gimple *stmt = gsi_stmt (gsi);
2408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2409 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2411 walk_stmt_info wi;
2412 memset (&wi, 0, sizeof (wi));
2413 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2414 gimple_stmt_iterator gsi2
2415 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2416 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2417 vect_detect_hybrid_slp_1, &wi);
2418 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2419 vect_detect_hybrid_slp_2,
2420 vect_detect_hybrid_slp_1, &wi);
2425 /* Then walk the SLP instance trees marking stmts with uses in
2426 non-SLP stmts as hybrid, also propagating hybrid down the
2427 SLP tree, collecting the above info on-the-fly. */
2428 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2430 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2431 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2432 i, pure_slp);
2437 /* Initialize a bb_vec_info struct for the statements between
2438 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2440 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2441 gimple_stmt_iterator region_end_in,
2442 vec_info_shared *shared)
2443 : vec_info (vec_info::bb, init_cost (NULL), shared),
2444 bb (gsi_bb (region_begin_in)),
2445 region_begin (region_begin_in),
2446 region_end (region_end_in)
2448 gimple_stmt_iterator gsi;
2450 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2451 gsi_next (&gsi))
2453 gimple *stmt = gsi_stmt (gsi);
2454 gimple_set_uid (stmt, 0);
2455 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2458 bb->aux = this;
2462 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2463 stmts in the basic block. */
2465 _bb_vec_info::~_bb_vec_info ()
2467 for (gimple_stmt_iterator si = region_begin;
2468 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2470 gimple *stmt = gsi_stmt (si);
2471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2473 if (stmt_info)
2474 /* Free stmt_vec_info. */
2475 free_stmt_vec_info (stmt);
2477 /* Reset region marker. */
2478 gimple_set_uid (stmt, -1);
2481 bb->aux = NULL;
2484 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2485 given then that child nodes have already been processed, and that
2486 their def types currently match their SLP node's def type. */
2488 static bool
2489 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2490 slp_instance node_instance,
2491 stmt_vector_for_cost *cost_vec)
2493 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2495 gcc_assert (stmt_info);
2496 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2498 /* For BB vectorization vector types are assigned here.
2499 Memory accesses already got their vector type assigned
2500 in vect_analyze_data_refs. */
2501 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2502 if (bb_vinfo
2503 && ! STMT_VINFO_DATA_REF (stmt_info))
2505 tree vectype, nunits_vectype;
2506 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2507 &nunits_vectype))
2508 /* We checked this when building the node. */
2509 gcc_unreachable ();
2510 if (vectype == boolean_type_node)
2512 vectype = vect_get_mask_type_for_stmt (stmt_info);
2513 if (!vectype)
2514 /* vect_get_mask_type_for_stmt has already explained the
2515 failure. */
2516 return false;
2519 gimple *sstmt;
2520 unsigned int i;
2521 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2522 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2525 /* Calculate the number of vector statements to be created for the
2526 scalar stmts in this node. For SLP reductions it is equal to the
2527 number of vector statements in the children (which has already been
2528 calculated by the recursive call). Otherwise it is the number of
2529 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2530 VF divided by the number of elements in a vector. */
2531 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2532 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2533 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2534 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2535 else
2537 poly_uint64 vf;
2538 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2539 vf = loop_vinfo->vectorization_factor;
2540 else
2541 vf = 1;
2542 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2543 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2544 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2545 = vect_get_num_vectors (vf * group_size, vectype);
2548 bool dummy;
2549 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2552 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2553 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2555 Return true if the operations are supported. */
2557 static bool
2558 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2559 slp_instance node_instance,
2560 scalar_stmts_to_slp_tree_map_t *visited,
2561 scalar_stmts_to_slp_tree_map_t *lvisited,
2562 stmt_vector_for_cost *cost_vec)
2564 int i, j;
2565 slp_tree child;
2567 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2568 return true;
2570 /* If we already analyzed the exact same set of scalar stmts we're done.
2571 We share the generated vector stmts for those. */
2572 slp_tree *leader;
2573 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2574 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2576 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2577 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2578 return true;
2581 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2582 doesn't result in any issue since we throw away the lvisited set
2583 when we fail. */
2584 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2586 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2587 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2588 visited, lvisited, cost_vec))
2589 return false;
2591 /* Push SLP node def-type to stmt operands. */
2592 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2593 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2594 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2595 = SLP_TREE_DEF_TYPE (child);
2596 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2597 cost_vec);
2598 /* Restore def-types. */
2599 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2600 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2601 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2602 = vect_internal_def;
2603 if (! res)
2604 return false;
2606 return true;
2610 /* Analyze statements in SLP instances of VINFO. Return true if the
2611 operations are supported. */
2613 bool
2614 vect_slp_analyze_operations (vec_info *vinfo)
2616 slp_instance instance;
2617 int i;
2619 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2621 scalar_stmts_to_slp_tree_map_t *visited
2622 = new scalar_stmts_to_slp_tree_map_t ();
2623 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2625 scalar_stmts_to_slp_tree_map_t lvisited;
2626 stmt_vector_for_cost cost_vec;
2627 cost_vec.create (2);
2628 if (!vect_slp_analyze_node_operations (vinfo,
2629 SLP_INSTANCE_TREE (instance),
2630 instance, visited, &lvisited,
2631 &cost_vec))
2633 dump_printf_loc (MSG_NOTE, vect_location,
2634 "removing SLP instance operations starting from: ");
2635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2636 SLP_TREE_SCALAR_STMTS
2637 (SLP_INSTANCE_TREE (instance))[0], 0);
2638 vect_free_slp_instance (instance);
2639 vinfo->slp_instances.ordered_remove (i);
2640 cost_vec.release ();
2642 else
2644 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2645 x != lvisited.end(); ++x)
2646 visited->put ((*x).first.copy (), (*x).second);
2647 i++;
2649 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2650 cost_vec.release ();
2653 delete visited;
2655 return !vinfo->slp_instances.is_empty ();
2659 /* Compute the scalar cost of the SLP node NODE and its children
2660 and return it. Do not account defs that are marked in LIFE and
2661 update LIFE according to uses of NODE. */
2663 static void
2664 vect_bb_slp_scalar_cost (basic_block bb,
2665 slp_tree node, vec<bool, va_heap> *life,
2666 stmt_vector_for_cost *cost_vec)
2668 unsigned i;
2669 gimple *stmt;
2670 slp_tree child;
2672 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2674 ssa_op_iter op_iter;
2675 def_operand_p def_p;
2676 stmt_vec_info stmt_info;
2678 if ((*life)[i])
2679 continue;
2681 /* If there is a non-vectorized use of the defs then the scalar
2682 stmt is kept live in which case we do not account it or any
2683 required defs in the SLP children in the scalar cost. This
2684 way we make the vectorization more costly when compared to
2685 the scalar cost. */
2686 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2688 imm_use_iterator use_iter;
2689 gimple *use_stmt;
2690 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2691 if (!is_gimple_debug (use_stmt)
2692 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2693 use_stmt)
2694 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2696 (*life)[i] = true;
2697 BREAK_FROM_IMM_USE_STMT (use_iter);
2700 if ((*life)[i])
2701 continue;
2703 /* Count scalar stmts only once. */
2704 if (gimple_visited_p (stmt))
2705 continue;
2706 gimple_set_visited (stmt, true);
2708 stmt_info = vinfo_for_stmt (stmt);
2709 vect_cost_for_stmt kind;
2710 if (STMT_VINFO_DATA_REF (stmt_info))
2712 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2713 kind = scalar_load;
2714 else
2715 kind = scalar_store;
2717 else
2718 kind = scalar_stmt;
2719 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2722 auto_vec<bool, 20> subtree_life;
2723 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2725 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2727 /* Do not directly pass LIFE to the recursive call, copy it to
2728 confine changes in the callee to the current child/subtree. */
2729 subtree_life.safe_splice (*life);
2730 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2731 subtree_life.truncate (0);
2736 /* Check if vectorization of the basic block is profitable. */
2738 static bool
2739 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2741 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2742 slp_instance instance;
2743 int i;
2744 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2745 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2747 /* Calculate scalar cost. */
2748 stmt_vector_for_cost scalar_costs;
2749 scalar_costs.create (0);
2750 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2752 auto_vec<bool, 20> life;
2753 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2754 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2755 SLP_INSTANCE_TREE (instance),
2756 &life, &scalar_costs);
2758 void *target_cost_data = init_cost (NULL);
2759 add_stmt_costs (target_cost_data, &scalar_costs);
2760 scalar_costs.release ();
2761 unsigned dummy;
2762 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2763 destroy_cost_data (target_cost_data);
2765 /* Unset visited flag. */
2766 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2767 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2768 gimple_set_visited (gsi_stmt (gsi), false);
2770 /* Complete the target-specific cost calculation. */
2771 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2772 &vec_inside_cost, &vec_epilogue_cost);
2774 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2776 if (dump_enabled_p ())
2778 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2779 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2780 vec_inside_cost);
2781 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2782 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2783 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2786 /* Vectorization is profitable if its cost is more than the cost of scalar
2787 version. Note that we err on the vector side for equal cost because
2788 the cost estimate is otherwise quite pessimistic (constant uses are
2789 free on the scalar side but cost a load on the vector side for
2790 example). */
2791 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2792 return false;
2794 return true;
2797 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2798 if so and sets fatal to true if failure is independent of
2799 current_vector_size. */
2801 static bb_vec_info
2802 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2803 gimple_stmt_iterator region_end,
2804 vec<data_reference_p> datarefs, int n_stmts,
2805 bool &fatal, vec_info_shared *shared)
2807 bb_vec_info bb_vinfo;
2808 slp_instance instance;
2809 int i;
2810 poly_uint64 min_vf = 2;
2812 /* The first group of checks is independent of the vector size. */
2813 fatal = true;
2815 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2819 "not vectorized: too many instructions in "
2820 "basic block.\n");
2821 free_data_refs (datarefs);
2822 return NULL;
2825 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2826 if (!bb_vinfo)
2827 return NULL;
2829 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2830 bb_vinfo->shared->save_datarefs ();
2832 /* Analyze the data references. */
2834 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2836 if (dump_enabled_p ())
2837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2838 "not vectorized: unhandled data-ref in basic "
2839 "block.\n");
2841 delete bb_vinfo;
2842 return NULL;
2845 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2849 "not vectorized: not enough data-refs in "
2850 "basic block.\n");
2852 delete bb_vinfo;
2853 return NULL;
2856 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2858 if (dump_enabled_p ())
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "not vectorized: unhandled data access in "
2861 "basic block.\n");
2863 delete bb_vinfo;
2864 return NULL;
2867 /* If there are no grouped stores in the region there is no need
2868 to continue with pattern recog as vect_analyze_slp will fail
2869 anyway. */
2870 if (bb_vinfo->grouped_stores.is_empty ())
2872 if (dump_enabled_p ())
2873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2874 "not vectorized: no grouped stores in "
2875 "basic block.\n");
2877 delete bb_vinfo;
2878 return NULL;
2881 /* While the rest of the analysis below depends on it in some way. */
2882 fatal = false;
2884 vect_pattern_recog (bb_vinfo);
2886 /* Check the SLP opportunities in the basic block, analyze and build SLP
2887 trees. */
2888 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2890 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "Failed to SLP the basic block.\n");
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2895 "not vectorized: failed to find SLP opportunities "
2896 "in basic block.\n");
2899 delete bb_vinfo;
2900 return NULL;
2903 vect_record_base_alignments (bb_vinfo);
2905 /* Analyze and verify the alignment of data references and the
2906 dependence in the SLP instances. */
2907 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2909 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2910 || ! vect_slp_analyze_instance_dependence (instance))
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "removing SLP instance operations starting from: ");
2914 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2915 SLP_TREE_SCALAR_STMTS
2916 (SLP_INSTANCE_TREE (instance))[0], 0);
2917 vect_free_slp_instance (instance);
2918 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2919 continue;
2922 /* Mark all the statements that we want to vectorize as pure SLP and
2923 relevant. */
2924 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2925 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2927 i++;
2929 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2931 delete bb_vinfo;
2932 return NULL;
2935 if (!vect_slp_analyze_operations (bb_vinfo))
2937 if (dump_enabled_p ())
2938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2939 "not vectorized: bad operation in basic block.\n");
2941 delete bb_vinfo;
2942 return NULL;
2945 /* Cost model: check if the vectorization is worthwhile. */
2946 if (!unlimited_cost_model (NULL)
2947 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2949 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2951 "not vectorized: vectorization is not "
2952 "profitable.\n");
2954 delete bb_vinfo;
2955 return NULL;
2958 if (dump_enabled_p ())
2959 dump_printf_loc (MSG_NOTE, vect_location,
2960 "Basic block will be vectorized using SLP\n");
2962 return bb_vinfo;
2966 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2967 true if anything in the basic-block was vectorized. */
2969 bool
2970 vect_slp_bb (basic_block bb)
2972 bb_vec_info bb_vinfo;
2973 gimple_stmt_iterator gsi;
2974 bool any_vectorized = false;
2975 auto_vector_sizes vector_sizes;
2977 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2979 /* Autodetect first vector size we try. */
2980 current_vector_size = 0;
2981 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2982 unsigned int next_size = 0;
2984 gsi = gsi_start_bb (bb);
2986 poly_uint64 autodetected_vector_size = 0;
2987 while (1)
2989 if (gsi_end_p (gsi))
2990 break;
2992 gimple_stmt_iterator region_begin = gsi;
2993 vec<data_reference_p> datarefs = vNULL;
2994 int insns = 0;
2996 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2998 gimple *stmt = gsi_stmt (gsi);
2999 if (is_gimple_debug (stmt))
3000 continue;
3001 insns++;
3003 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3004 vect_location = stmt;
3006 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3007 break;
3010 /* Skip leading unhandled stmts. */
3011 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3013 gsi_next (&gsi);
3014 continue;
3017 gimple_stmt_iterator region_end = gsi;
3019 bool vectorized = false;
3020 bool fatal = false;
3021 vec_info_shared shared;
3022 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3023 datarefs, insns, fatal, &shared);
3024 if (bb_vinfo
3025 && dbg_cnt (vect_slp))
3027 if (dump_enabled_p ())
3028 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3030 bb_vinfo->shared->check_datarefs ();
3031 vect_schedule_slp (bb_vinfo);
3033 unsigned HOST_WIDE_INT bytes;
3034 if (current_vector_size.is_constant (&bytes))
3035 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3036 "basic block part vectorized using "
3037 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3038 "vectors\n", bytes);
3039 else
3040 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3041 "basic block part vectorized using variable "
3042 "length vectors\n");
3044 vectorized = true;
3046 delete bb_vinfo;
3048 any_vectorized |= vectorized;
3050 if (next_size == 0)
3051 autodetected_vector_size = current_vector_size;
3053 if (next_size < vector_sizes.length ()
3054 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3055 next_size += 1;
3057 if (vectorized
3058 || next_size == vector_sizes.length ()
3059 || known_eq (current_vector_size, 0U)
3060 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3061 vector sizes will fail do not bother iterating. */
3062 || fatal)
3064 if (gsi_end_p (region_end))
3065 break;
3067 /* Skip the unhandled stmt. */
3068 gsi_next (&gsi);
3070 /* And reset vector sizes. */
3071 current_vector_size = 0;
3072 next_size = 0;
3074 else
3076 /* Try the next biggest vector size. */
3077 current_vector_size = vector_sizes[next_size++];
3078 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_NOTE, vect_location,
3081 "***** Re-trying analysis with "
3082 "vector size ");
3083 dump_dec (MSG_NOTE, current_vector_size);
3084 dump_printf (MSG_NOTE, "\n");
3087 /* Start over. */
3088 gsi = region_begin;
3092 return any_vectorized;
3096 /* Return 1 if vector type of boolean constant which is OPNUM
3097 operand in statement STMT is a boolean vector. */
3099 static bool
3100 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3102 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3103 enum tree_code code = gimple_expr_code (stmt);
3104 tree op, vectype;
3105 enum vect_def_type dt;
3107 /* For comparison and COND_EXPR type is chosen depending
3108 on the other comparison operand. */
3109 if (TREE_CODE_CLASS (code) == tcc_comparison)
3111 if (opnum)
3112 op = gimple_assign_rhs1 (stmt);
3113 else
3114 op = gimple_assign_rhs2 (stmt);
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3117 gcc_unreachable ();
3119 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3122 if (code == COND_EXPR)
3124 tree cond = gimple_assign_rhs1 (stmt);
3126 if (TREE_CODE (cond) == SSA_NAME)
3127 op = cond;
3128 else if (opnum)
3129 op = TREE_OPERAND (cond, 1);
3130 else
3131 op = TREE_OPERAND (cond, 0);
3133 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3134 gcc_unreachable ();
3136 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3139 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3142 /* Build a variable-length vector in which the elements in ELTS are repeated
3143 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3144 RESULTS and add any new instructions to SEQ.
3146 The approach we use is:
3148 (1) Find a vector mode VM with integer elements of mode IM.
3150 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3151 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3152 from small vectors to IM.
3154 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3156 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3157 correct byte contents.
3159 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3161 We try to find the largest IM for which this sequence works, in order
3162 to cut down on the number of interleaves. */
3164 void
3165 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3166 unsigned int nresults, vec<tree> &results)
3168 unsigned int nelts = elts.length ();
3169 tree element_type = TREE_TYPE (vector_type);
3171 /* (1) Find a vector mode VM with integer elements of mode IM. */
3172 unsigned int nvectors = 1;
3173 tree new_vector_type;
3174 tree permutes[2];
3175 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3176 &nvectors, &new_vector_type,
3177 permutes))
3178 gcc_unreachable ();
3180 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3181 unsigned int partial_nelts = nelts / nvectors;
3182 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3184 tree_vector_builder partial_elts;
3185 auto_vec<tree, 32> pieces (nvectors * 2);
3186 pieces.quick_grow (nvectors * 2);
3187 for (unsigned int i = 0; i < nvectors; ++i)
3189 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3190 ELTS' has mode IM. */
3191 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3192 for (unsigned int j = 0; j < partial_nelts; ++j)
3193 partial_elts.quick_push (elts[i * partial_nelts + j]);
3194 tree t = gimple_build_vector (seq, &partial_elts);
3195 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3196 TREE_TYPE (new_vector_type), t);
3198 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3199 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3202 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3203 correct byte contents.
3205 We need to repeat the following operation log2(nvectors) times:
3207 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3208 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3210 However, if each input repeats every N elements and the VF is
3211 a multiple of N * 2, the HI result is the same as the LO. */
3212 unsigned int in_start = 0;
3213 unsigned int out_start = nvectors;
3214 unsigned int hi_start = nvectors / 2;
3215 /* A bound on the number of outputs needed to produce NRESULTS results
3216 in the final iteration. */
3217 unsigned int noutputs_bound = nvectors * nresults;
3218 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3220 noutputs_bound /= 2;
3221 unsigned int limit = MIN (noutputs_bound, nvectors);
3222 for (unsigned int i = 0; i < limit; ++i)
3224 if ((i & 1) != 0
3225 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3226 2 * in_repeat))
3228 pieces[out_start + i] = pieces[out_start + i - 1];
3229 continue;
3232 tree output = make_ssa_name (new_vector_type);
3233 tree input1 = pieces[in_start + (i / 2)];
3234 tree input2 = pieces[in_start + (i / 2) + hi_start];
3235 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3236 input1, input2,
3237 permutes[i & 1]);
3238 gimple_seq_add_stmt (seq, stmt);
3239 pieces[out_start + i] = output;
3241 std::swap (in_start, out_start);
3244 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3245 results.reserve (nresults);
3246 for (unsigned int i = 0; i < nresults; ++i)
3247 if (i < nvectors)
3248 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3249 pieces[in_start + i]));
3250 else
3251 results.quick_push (results[i - nvectors]);
3255 /* For constant and loop invariant defs of SLP_NODE this function returns
3256 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3257 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3258 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3259 REDUC_INDEX is the index of the reduction operand in the statements, unless
3260 it is -1. */
3262 static void
3263 vect_get_constant_vectors (tree op, slp_tree slp_node,
3264 vec<tree> *vec_oprnds,
3265 unsigned int op_num, unsigned int number_of_vectors)
3267 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3268 gimple *stmt = stmts[0];
3269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3270 unsigned HOST_WIDE_INT nunits;
3271 tree vec_cst;
3272 unsigned j, number_of_places_left_in_vector;
3273 tree vector_type;
3274 tree vop;
3275 int group_size = stmts.length ();
3276 unsigned int vec_num, i;
3277 unsigned number_of_copies = 1;
3278 vec<tree> voprnds;
3279 voprnds.create (number_of_vectors);
3280 bool constant_p, is_store;
3281 tree neutral_op = NULL;
3282 enum tree_code code = gimple_expr_code (stmt);
3283 gimple_seq ctor_seq = NULL;
3284 auto_vec<tree, 16> permute_results;
3286 /* Check if vector type is a boolean vector. */
3287 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3288 && vect_mask_constant_operand_p (stmt, op_num))
3289 vector_type
3290 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3291 else
3292 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3294 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3296 is_store = true;
3297 op = gimple_assign_rhs1 (stmt);
3299 else
3300 is_store = false;
3302 gcc_assert (op);
3304 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3305 created vectors. It is greater than 1 if unrolling is performed.
3307 For example, we have two scalar operands, s1 and s2 (e.g., group of
3308 strided accesses of size two), while NUNITS is four (i.e., four scalars
3309 of this type can be packed in a vector). The output vector will contain
3310 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3311 will be 2).
3313 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3314 containing the operands.
3316 For example, NUNITS is four as before, and the group size is 8
3317 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3318 {s5, s6, s7, s8}. */
3320 /* When using duplicate_and_interleave, we just need one element for
3321 each scalar statement. */
3322 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3323 nunits = group_size;
3325 number_of_copies = nunits * number_of_vectors / group_size;
3327 number_of_places_left_in_vector = nunits;
3328 constant_p = true;
3329 tree_vector_builder elts (vector_type, nunits, 1);
3330 elts.quick_grow (nunits);
3331 bool place_after_defs = false;
3332 for (j = 0; j < number_of_copies; j++)
3334 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3336 if (is_store)
3337 op = gimple_assign_rhs1 (stmt);
3338 else
3340 switch (code)
3342 case COND_EXPR:
3344 tree cond = gimple_assign_rhs1 (stmt);
3345 if (TREE_CODE (cond) == SSA_NAME)
3346 op = gimple_op (stmt, op_num + 1);
3347 else if (op_num == 0 || op_num == 1)
3348 op = TREE_OPERAND (cond, op_num);
3349 else
3351 if (op_num == 2)
3352 op = gimple_assign_rhs2 (stmt);
3353 else
3354 op = gimple_assign_rhs3 (stmt);
3357 break;
3359 case CALL_EXPR:
3360 op = gimple_call_arg (stmt, op_num);
3361 break;
3363 case LSHIFT_EXPR:
3364 case RSHIFT_EXPR:
3365 case LROTATE_EXPR:
3366 case RROTATE_EXPR:
3367 op = gimple_op (stmt, op_num + 1);
3368 /* Unlike the other binary operators, shifts/rotates have
3369 the shift count being int, instead of the same type as
3370 the lhs, so make sure the scalar is the right type if
3371 we are dealing with vectors of
3372 long long/long/short/char. */
3373 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3374 op = fold_convert (TREE_TYPE (vector_type), op);
3375 break;
3377 default:
3378 op = gimple_op (stmt, op_num + 1);
3379 break;
3383 /* Create 'vect_ = {op0,op1,...,opn}'. */
3384 number_of_places_left_in_vector--;
3385 tree orig_op = op;
3386 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3388 if (CONSTANT_CLASS_P (op))
3390 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3392 /* Can't use VIEW_CONVERT_EXPR for booleans because
3393 of possibly different sizes of scalar value and
3394 vector element. */
3395 if (integer_zerop (op))
3396 op = build_int_cst (TREE_TYPE (vector_type), 0);
3397 else if (integer_onep (op))
3398 op = build_all_ones_cst (TREE_TYPE (vector_type));
3399 else
3400 gcc_unreachable ();
3402 else
3403 op = fold_unary (VIEW_CONVERT_EXPR,
3404 TREE_TYPE (vector_type), op);
3405 gcc_assert (op && CONSTANT_CLASS_P (op));
3407 else
3409 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3410 gimple *init_stmt;
3411 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3413 tree true_val
3414 = build_all_ones_cst (TREE_TYPE (vector_type));
3415 tree false_val
3416 = build_zero_cst (TREE_TYPE (vector_type));
3417 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3418 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3419 op, true_val,
3420 false_val);
3422 else
3424 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3425 op);
3426 init_stmt
3427 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3428 op);
3430 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3431 op = new_temp;
3434 elts[number_of_places_left_in_vector] = op;
3435 if (!CONSTANT_CLASS_P (op))
3436 constant_p = false;
3437 if (TREE_CODE (orig_op) == SSA_NAME
3438 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3439 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3440 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3441 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3442 place_after_defs = true;
3444 if (number_of_places_left_in_vector == 0)
3446 if (constant_p
3447 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3448 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3449 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3450 else
3452 if (vec_oprnds->is_empty ())
3453 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3454 number_of_vectors,
3455 permute_results);
3456 vec_cst = permute_results[number_of_vectors - j - 1];
3458 tree init;
3459 gimple_stmt_iterator gsi;
3460 if (place_after_defs)
3462 gsi = gsi_for_stmt
3463 (vect_find_last_scalar_stmt_in_slp (slp_node));
3464 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3466 else
3467 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3468 if (ctor_seq != NULL)
3470 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3471 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3472 ctor_seq = NULL;
3474 voprnds.quick_push (init);
3475 place_after_defs = false;
3476 number_of_places_left_in_vector = nunits;
3477 constant_p = true;
3478 elts.new_vector (vector_type, nunits, 1);
3479 elts.quick_grow (nunits);
3484 /* Since the vectors are created in the reverse order, we should invert
3485 them. */
3486 vec_num = voprnds.length ();
3487 for (j = vec_num; j != 0; j--)
3489 vop = voprnds[j - 1];
3490 vec_oprnds->quick_push (vop);
3493 voprnds.release ();
3495 /* In case that VF is greater than the unrolling factor needed for the SLP
3496 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3497 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3498 to replicate the vectors. */
3499 while (number_of_vectors > vec_oprnds->length ())
3501 tree neutral_vec = NULL;
3503 if (neutral_op)
3505 if (!neutral_vec)
3506 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3508 vec_oprnds->quick_push (neutral_vec);
3510 else
3512 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3513 vec_oprnds->quick_push (vop);
3519 /* Get vectorized definitions from SLP_NODE that contains corresponding
3520 vectorized def-stmts. */
3522 static void
3523 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3525 tree vec_oprnd;
3526 gimple *vec_def_stmt;
3527 unsigned int i;
3529 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3531 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3533 gcc_assert (vec_def_stmt);
3534 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3535 vec_oprnd = gimple_phi_result (vec_def_stmt);
3536 else
3537 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3538 vec_oprnds->quick_push (vec_oprnd);
3543 /* Get vectorized definitions for SLP_NODE.
3544 If the scalar definitions are loop invariants or constants, collect them and
3545 call vect_get_constant_vectors() to create vector stmts.
3546 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3547 must be stored in the corresponding child of SLP_NODE, and we call
3548 vect_get_slp_vect_defs () to retrieve them. */
3550 void
3551 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3552 vec<vec<tree> > *vec_oprnds)
3554 gimple *first_stmt;
3555 int number_of_vects = 0, i;
3556 unsigned int child_index = 0;
3557 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3558 slp_tree child = NULL;
3559 vec<tree> vec_defs;
3560 tree oprnd;
3561 bool vectorized_defs;
3563 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3564 FOR_EACH_VEC_ELT (ops, i, oprnd)
3566 /* For each operand we check if it has vectorized definitions in a child
3567 node or we need to create them (for invariants and constants). We
3568 check if the LHS of the first stmt of the next child matches OPRND.
3569 If it does, we found the correct child. Otherwise, we call
3570 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3571 to check this child node for the next operand. */
3572 vectorized_defs = false;
3573 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3575 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3577 /* We have to check both pattern and original def, if available. */
3578 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3580 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3581 gimple *related
3582 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3583 tree first_def_op;
3585 if (gimple_code (first_def) == GIMPLE_PHI)
3586 first_def_op = gimple_phi_result (first_def);
3587 else
3588 first_def_op = gimple_get_lhs (first_def);
3589 if (operand_equal_p (oprnd, first_def_op, 0)
3590 || (related
3591 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3593 /* The number of vector defs is determined by the number of
3594 vector statements in the node from which we get those
3595 statements. */
3596 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3597 vectorized_defs = true;
3598 child_index++;
3601 else
3602 child_index++;
3605 if (!vectorized_defs)
3607 if (i == 0)
3609 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3610 /* Number of vector stmts was calculated according to LHS in
3611 vect_schedule_slp_instance (), fix it by replacing LHS with
3612 RHS, if necessary. See vect_get_smallest_scalar_type () for
3613 details. */
3614 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3615 &rhs_size_unit);
3616 if (rhs_size_unit != lhs_size_unit)
3618 number_of_vects *= rhs_size_unit;
3619 number_of_vects /= lhs_size_unit;
3624 /* Allocate memory for vectorized defs. */
3625 vec_defs = vNULL;
3626 vec_defs.create (number_of_vects);
3628 /* For reduction defs we call vect_get_constant_vectors (), since we are
3629 looking for initial loop invariant values. */
3630 if (vectorized_defs)
3631 /* The defs are already vectorized. */
3632 vect_get_slp_vect_defs (child, &vec_defs);
3633 else
3634 /* Build vectors from scalar defs. */
3635 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3636 number_of_vects);
3638 vec_oprnds->quick_push (vec_defs);
3642 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3643 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3644 permute statements for the SLP node NODE of the SLP instance
3645 SLP_NODE_INSTANCE. */
3647 bool
3648 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3649 gimple_stmt_iterator *gsi, poly_uint64 vf,
3650 slp_instance slp_node_instance, bool analyze_only,
3651 unsigned *n_perms)
3653 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3655 tree mask_element_type = NULL_TREE, mask_type;
3656 int vec_index = 0;
3657 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3658 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3659 unsigned int mask_element;
3660 machine_mode mode;
3661 unsigned HOST_WIDE_INT nunits, const_vf;
3663 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3664 return false;
3666 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
3668 mode = TYPE_MODE (vectype);
3670 /* At the moment, all permutations are represented using per-element
3671 indices, so we can't cope with variable vector lengths or
3672 vectorization factors. */
3673 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3674 || !vf.is_constant (&const_vf))
3675 return false;
3677 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3678 same size as the vector element being permuted. */
3679 mask_element_type = lang_hooks.types.type_for_mode
3680 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3681 mask_type = get_vectype_for_scalar_type (mask_element_type);
3682 vec_perm_builder mask (nunits, nunits, 1);
3683 mask.quick_grow (nunits);
3684 vec_perm_indices indices;
3686 /* Initialize the vect stmts of NODE to properly insert the generated
3687 stmts later. */
3688 if (! analyze_only)
3689 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3690 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3691 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3693 /* Generate permutation masks for every NODE. Number of masks for each NODE
3694 is equal to GROUP_SIZE.
3695 E.g., we have a group of three nodes with three loads from the same
3696 location in each node, and the vector size is 4. I.e., we have a
3697 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3698 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3699 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3702 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3703 The last mask is illegal since we assume two operands for permute
3704 operation, and the mask element values can't be outside that range.
3705 Hence, the last mask must be converted into {2,5,5,5}.
3706 For the first two permutations we need the first and the second input
3707 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3708 we need the second and the third vectors: {b1,c1,a2,b2} and
3709 {c2,a3,b3,c3}. */
3711 int vect_stmts_counter = 0;
3712 unsigned int index = 0;
3713 int first_vec_index = -1;
3714 int second_vec_index = -1;
3715 bool noop_p = true;
3716 *n_perms = 0;
3718 for (unsigned int j = 0; j < const_vf; j++)
3720 for (int k = 0; k < group_size; k++)
3722 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3723 + j * DR_GROUP_SIZE (stmt_info));
3724 vec_index = i / nunits;
3725 mask_element = i % nunits;
3726 if (vec_index == first_vec_index
3727 || first_vec_index == -1)
3729 first_vec_index = vec_index;
3731 else if (vec_index == second_vec_index
3732 || second_vec_index == -1)
3734 second_vec_index = vec_index;
3735 mask_element += nunits;
3737 else
3739 if (dump_enabled_p ())
3741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3742 "permutation requires at "
3743 "least three vectors ");
3744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3745 stmt, 0);
3747 gcc_assert (analyze_only);
3748 return false;
3751 gcc_assert (mask_element < 2 * nunits);
3752 if (mask_element != index)
3753 noop_p = false;
3754 mask[index++] = mask_element;
3756 if (index == nunits && !noop_p)
3758 indices.new_vector (mask, 2, nunits);
3759 if (!can_vec_perm_const_p (mode, indices))
3761 if (dump_enabled_p ())
3763 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3764 vect_location,
3765 "unsupported vect permute { ");
3766 for (i = 0; i < nunits; ++i)
3768 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3769 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3771 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3773 gcc_assert (analyze_only);
3774 return false;
3777 ++*n_perms;
3780 if (index == nunits)
3782 if (!analyze_only)
3784 tree mask_vec = NULL_TREE;
3786 if (! noop_p)
3787 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3789 if (second_vec_index == -1)
3790 second_vec_index = first_vec_index;
3792 /* Generate the permute statement if necessary. */
3793 tree first_vec = dr_chain[first_vec_index];
3794 tree second_vec = dr_chain[second_vec_index];
3795 gimple *perm_stmt;
3796 if (! noop_p)
3798 tree perm_dest
3799 = vect_create_destination_var (gimple_assign_lhs (stmt),
3800 vectype);
3801 perm_dest = make_ssa_name (perm_dest);
3802 perm_stmt = gimple_build_assign (perm_dest,
3803 VEC_PERM_EXPR,
3804 first_vec, second_vec,
3805 mask_vec);
3806 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3808 else
3809 /* If mask was NULL_TREE generate the requested
3810 identity transform. */
3811 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3813 /* Store the vector statement in NODE. */
3814 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3817 index = 0;
3818 first_vec_index = -1;
3819 second_vec_index = -1;
3820 noop_p = true;
3825 return true;
3828 /* Vectorize SLP instance tree in postorder. */
3830 static bool
3831 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3832 scalar_stmts_to_slp_tree_map_t *bst_map)
3834 gimple *stmt;
3835 bool grouped_store, is_store;
3836 gimple_stmt_iterator si;
3837 stmt_vec_info stmt_info;
3838 unsigned int group_size;
3839 tree vectype;
3840 int i, j;
3841 slp_tree child;
3843 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3844 return false;
3846 /* See if we have already vectorized the same set of stmts and reuse their
3847 vectorized stmts. */
3848 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3850 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3851 return false;
3854 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3856 vect_schedule_slp_instance (child, instance, bst_map);
3858 /* Push SLP node def-type to stmts. */
3859 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3860 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3861 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3862 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3864 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3865 stmt_info = vinfo_for_stmt (stmt);
3867 /* VECTYPE is the type of the destination. */
3868 vectype = STMT_VINFO_VECTYPE (stmt_info);
3869 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3870 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3872 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3873 if (!SLP_TREE_VEC_STMTS (node).exists ())
3874 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3876 if (dump_enabled_p ())
3878 dump_printf_loc (MSG_NOTE,vect_location,
3879 "------>vectorizing SLP node starting from: ");
3880 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3883 /* Vectorized stmts go before the last scalar stmt which is where
3884 all uses are ready. */
3885 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3887 /* Mark the first element of the reduction chain as reduction to properly
3888 transform the node. In the analysis phase only the last element of the
3889 chain is marked as reduction. */
3890 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3891 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3892 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3894 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3895 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3898 /* Handle two-operation SLP nodes by vectorizing the group with
3899 both operations and then performing a merge. */
3900 if (SLP_TREE_TWO_OPERATORS (node))
3902 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3903 enum tree_code ocode = ERROR_MARK;
3904 gimple *ostmt;
3905 vec_perm_builder mask (group_size, group_size, 1);
3906 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3907 if (gimple_assign_rhs_code (ostmt) != code0)
3909 mask.quick_push (1);
3910 ocode = gimple_assign_rhs_code (ostmt);
3912 else
3913 mask.quick_push (0);
3914 if (ocode != ERROR_MARK)
3916 vec<gimple *> v0;
3917 vec<gimple *> v1;
3918 unsigned j;
3919 tree tmask = NULL_TREE;
3920 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3921 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3922 SLP_TREE_VEC_STMTS (node).truncate (0);
3923 gimple_assign_set_rhs_code (stmt, ocode);
3924 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3925 gimple_assign_set_rhs_code (stmt, code0);
3926 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3927 SLP_TREE_VEC_STMTS (node).truncate (0);
3928 tree meltype = build_nonstandard_integer_type
3929 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3930 tree mvectype = get_same_sized_vectype (meltype, vectype);
3931 unsigned k = 0, l;
3932 for (j = 0; j < v0.length (); ++j)
3934 /* Enforced by vect_build_slp_tree, which rejects variable-length
3935 vectors for SLP_TREE_TWO_OPERATORS. */
3936 unsigned int const_nunits = nunits.to_constant ();
3937 tree_vector_builder melts (mvectype, const_nunits, 1);
3938 for (l = 0; l < const_nunits; ++l)
3940 if (k >= group_size)
3941 k = 0;
3942 tree t = build_int_cst (meltype,
3943 mask[k++] * const_nunits + l);
3944 melts.quick_push (t);
3946 tmask = melts.build ();
3948 /* ??? Not all targets support a VEC_PERM_EXPR with a
3949 constant mask that would translate to a vec_merge RTX
3950 (with their vec_perm_const_ok). We can either not
3951 vectorize in that case or let veclower do its job.
3952 Unfortunately that isn't too great and at least for
3953 plus/minus we'd eventually like to match targets
3954 vector addsub instructions. */
3955 gimple *vstmt;
3956 vstmt = gimple_build_assign (make_ssa_name (vectype),
3957 VEC_PERM_EXPR,
3958 gimple_assign_lhs (v0[j]),
3959 gimple_assign_lhs (v1[j]), tmask);
3960 vect_finish_stmt_generation (stmt, vstmt, &si);
3961 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3963 v0.release ();
3964 v1.release ();
3965 return false;
3968 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3970 /* Restore stmt def-types. */
3971 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3972 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3973 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3974 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3976 return is_store;
3979 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3980 For loop vectorization this is done in vectorizable_call, but for SLP
3981 it needs to be deferred until end of vect_schedule_slp, because multiple
3982 SLP instances may refer to the same scalar stmt. */
3984 static void
3985 vect_remove_slp_scalar_calls (slp_tree node)
3987 gimple *stmt, *new_stmt;
3988 gimple_stmt_iterator gsi;
3989 int i;
3990 slp_tree child;
3991 tree lhs;
3992 stmt_vec_info stmt_info;
3994 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3995 return;
3997 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3998 vect_remove_slp_scalar_calls (child);
4000 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
4002 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4003 continue;
4004 stmt_info = vinfo_for_stmt (stmt);
4005 if (stmt_info == NULL
4006 || is_pattern_stmt_p (stmt_info)
4007 || !PURE_SLP_STMT (stmt_info))
4008 continue;
4009 lhs = gimple_call_lhs (stmt);
4010 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4011 set_vinfo_for_stmt (new_stmt, stmt_info);
4012 set_vinfo_for_stmt (stmt, NULL);
4013 STMT_VINFO_STMT (stmt_info) = new_stmt;
4014 gsi = gsi_for_stmt (stmt);
4015 gsi_replace (&gsi, new_stmt, false);
4016 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4020 /* Generate vector code for all SLP instances in the loop/basic block. */
4022 bool
4023 vect_schedule_slp (vec_info *vinfo)
4025 vec<slp_instance> slp_instances;
4026 slp_instance instance;
4027 unsigned int i;
4028 bool is_store = false;
4031 scalar_stmts_to_slp_tree_map_t *bst_map
4032 = new scalar_stmts_to_slp_tree_map_t ();
4033 slp_instances = vinfo->slp_instances;
4034 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4036 /* Schedule the tree of INSTANCE. */
4037 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4038 instance, bst_map);
4039 if (dump_enabled_p ())
4040 dump_printf_loc (MSG_NOTE, vect_location,
4041 "vectorizing stmts using SLP.\n");
4043 delete bst_map;
4045 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4047 slp_tree root = SLP_INSTANCE_TREE (instance);
4048 gimple *store;
4049 unsigned int j;
4050 gimple_stmt_iterator gsi;
4052 /* Remove scalar call stmts. Do not do this for basic-block
4053 vectorization as not all uses may be vectorized.
4054 ??? Why should this be necessary? DCE should be able to
4055 remove the stmts itself.
4056 ??? For BB vectorization we can as well remove scalar
4057 stmts starting from the SLP tree root if they have no
4058 uses. */
4059 if (is_a <loop_vec_info> (vinfo))
4060 vect_remove_slp_scalar_calls (root);
4062 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4063 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4065 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4066 break;
4068 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4069 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4070 /* Free the attached stmt_vec_info and remove the stmt. */
4071 gsi = gsi_for_stmt (store);
4072 unlink_stmt_vdef (store);
4073 gsi_remove (&gsi, true);
4074 release_defs (store);
4075 free_stmt_vec_info (store);
4079 return is_store;