RISC-V: Fix more splitters accidentally calling gen_reg_rtx.
[official-gcc.git] / gcc / tree-vect-slp.c
blob9b86b67734ad3e3506e9cee6a532b68decf24ae6
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 if (--node->refcnt != 0)
61 return;
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
64 vect_free_slp_tree (child, final_p);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
70 if (!final_p)
72 stmt_vec_info stmt_info;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
80 SLP_TREE_CHILDREN (node).release ();
81 SLP_TREE_SCALAR_STMTS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
85 free (node);
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
101 /* Create an SLP node for SCALAR_STMTS. */
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_VEC_STMTS (node).create (0);
126 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
127 SLP_TREE_CHILDREN (node).create (nops);
128 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
129 SLP_TREE_TWO_OPERATORS (node) = false;
130 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
131 node->refcnt = 1;
132 node->max_nunits = 1;
134 unsigned i;
135 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
136 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138 return node;
142 /* This structure is used in creation of an SLP tree. Each instance
143 corresponds to the same operand in a group of scalar stmts in an SLP
144 node. */
145 typedef struct _slp_oprnd_info
147 /* Def-stmts for the operands. */
148 vec<stmt_vec_info> def_stmts;
149 /* Information about the first statement, its vector def-type, type, the
150 operand itself in case it's constant, and an indication if it's a pattern
151 stmt. */
152 tree first_op_type;
153 enum vect_def_type first_dt;
154 bool first_pattern;
155 bool second_pattern;
156 } *slp_oprnd_info;
159 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
160 operand. */
161 static vec<slp_oprnd_info>
162 vect_create_oprnd_info (int nops, int group_size)
164 int i;
165 slp_oprnd_info oprnd_info;
166 vec<slp_oprnd_info> oprnds_info;
168 oprnds_info.create (nops);
169 for (i = 0; i < nops; i++)
171 oprnd_info = XNEW (struct _slp_oprnd_info);
172 oprnd_info->def_stmts.create (group_size);
173 oprnd_info->first_dt = vect_uninitialized_def;
174 oprnd_info->first_op_type = NULL_TREE;
175 oprnd_info->first_pattern = false;
176 oprnd_info->second_pattern = false;
177 oprnds_info.quick_push (oprnd_info);
180 return oprnds_info;
184 /* Free operands info. */
186 static void
187 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
189 int i;
190 slp_oprnd_info oprnd_info;
192 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
194 oprnd_info->def_stmts.release ();
195 XDELETE (oprnd_info);
198 oprnds_info.release ();
202 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
203 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
204 of the chain. */
207 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
208 stmt_vec_info first_stmt_info)
210 stmt_vec_info next_stmt_info = first_stmt_info;
211 int result = 0;
213 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
214 return -1;
218 if (next_stmt_info == stmt_info)
219 return result;
220 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
221 if (next_stmt_info)
222 result += DR_GROUP_GAP (next_stmt_info);
224 while (next_stmt_info);
226 return -1;
229 /* Check whether it is possible to load COUNT elements of type ELT_MODE
230 using the method implemented by duplicate_and_interleave. Return true
231 if so, returning the number of intermediate vectors in *NVECTORS_OUT
232 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
233 (if nonnull). */
235 bool
236 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
237 unsigned int *nvectors_out,
238 tree *vector_type_out,
239 tree *permutes)
241 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
242 poly_int64 nelts;
243 unsigned int nvectors = 1;
244 for (;;)
246 scalar_int_mode int_mode;
247 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
248 if (multiple_p (current_vector_size, elt_bytes, &nelts)
249 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
251 tree int_type = build_nonstandard_integer_type
252 (GET_MODE_BITSIZE (int_mode), 1);
253 tree vector_type = build_vector_type (int_type, nelts);
254 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
256 vec_perm_builder sel1 (nelts, 2, 3);
257 vec_perm_builder sel2 (nelts, 2, 3);
258 poly_int64 half_nelts = exact_div (nelts, 2);
259 for (unsigned int i = 0; i < 3; ++i)
261 sel1.quick_push (i);
262 sel1.quick_push (i + nelts);
263 sel2.quick_push (half_nelts + i);
264 sel2.quick_push (half_nelts + i + nelts);
266 vec_perm_indices indices1 (sel1, 2, nelts);
267 vec_perm_indices indices2 (sel2, 2, nelts);
268 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
269 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
271 if (nvectors_out)
272 *nvectors_out = nvectors;
273 if (vector_type_out)
274 *vector_type_out = vector_type;
275 if (permutes)
277 permutes[0] = vect_gen_perm_mask_checked (vector_type,
278 indices1);
279 permutes[1] = vect_gen_perm_mask_checked (vector_type,
280 indices2);
282 return true;
286 if (!multiple_p (elt_bytes, 2, &elt_bytes))
287 return false;
288 nvectors *= 2;
292 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
293 they are of a valid type and that they match the defs of the first stmt of
294 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
295 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
296 indicates swap is required for cond_expr stmts. Specifically, *SWAP
297 is 1 if STMT is cond and operands of comparison need to be swapped;
298 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
299 If there is any operand swap in this function, *SWAP is set to non-zero
300 value.
301 If there was a fatal error return -1; if the error could be corrected by
302 swapping operands of father node of this one, return 1; if everything is
303 ok return 0. */
304 static int
305 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
306 vec<stmt_vec_info> stmts, unsigned stmt_num,
307 vec<slp_oprnd_info> *oprnds_info)
309 stmt_vec_info stmt_info = stmts[stmt_num];
310 tree oprnd;
311 unsigned int i, number_of_oprnds;
312 enum vect_def_type dt = vect_uninitialized_def;
313 bool pattern = false;
314 slp_oprnd_info oprnd_info;
315 int first_op_idx = 1;
316 unsigned int commutative_op = -1U;
317 bool first_op_cond = false;
318 bool first = stmt_num == 0;
319 bool second = stmt_num == 1;
321 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
323 number_of_oprnds = gimple_call_num_args (stmt);
324 first_op_idx = 3;
325 if (gimple_call_internal_p (stmt))
327 internal_fn ifn = gimple_call_internal_fn (stmt);
328 commutative_op = first_commutative_argument (ifn);
330 /* Masked load, only look at mask. */
331 if (ifn == IFN_MASK_LOAD)
333 number_of_oprnds = 1;
334 /* Mask operand index. */
335 first_op_idx = 5;
339 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
341 enum tree_code code = gimple_assign_rhs_code (stmt);
342 number_of_oprnds = gimple_num_ops (stmt) - 1;
343 /* Swap can only be done for cond_expr if asked to, otherwise we
344 could result in different comparison code to the first stmt. */
345 if (code == COND_EXPR
346 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
348 first_op_cond = true;
349 number_of_oprnds++;
351 else
352 commutative_op = commutative_tree_code (code) ? 0U : -1U;
354 else
355 return -1;
357 bool swapped = (*swap != 0);
358 gcc_assert (!swapped || first_op_cond);
359 for (i = 0; i < number_of_oprnds; i++)
361 again:
362 if (first_op_cond)
364 /* Map indicating how operands of cond_expr should be swapped. */
365 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
366 int *map = maps[*swap];
368 if (i < 2)
369 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
370 first_op_idx), map[i]);
371 else
372 oprnd = gimple_op (stmt_info->stmt, map[i]);
374 else
375 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
376 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
377 oprnd = TREE_OPERAND (oprnd, 0);
379 oprnd_info = (*oprnds_info)[i];
381 stmt_vec_info def_stmt_info;
382 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
384 if (dump_enabled_p ())
385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
386 "Build SLP failed: can't analyze def for %T\n",
387 oprnd);
389 return -1;
392 if (second)
393 oprnd_info->second_pattern = pattern;
395 if (first)
397 oprnd_info->first_dt = dt;
398 oprnd_info->first_pattern = pattern;
399 oprnd_info->first_op_type = TREE_TYPE (oprnd);
401 else
403 /* Not first stmt of the group, check that the def-stmt/s match
404 the def-stmt/s of the first stmt. Allow different definition
405 types for reduction chains: the first stmt must be a
406 vect_reduction_def (a phi node), and the rest
407 vect_internal_def. */
408 tree type = TREE_TYPE (oprnd);
409 if ((oprnd_info->first_dt != dt
410 && !(oprnd_info->first_dt == vect_reduction_def
411 && dt == vect_internal_def)
412 && !((oprnd_info->first_dt == vect_external_def
413 || oprnd_info->first_dt == vect_constant_def)
414 && (dt == vect_external_def
415 || dt == vect_constant_def)))
416 || !types_compatible_p (oprnd_info->first_op_type, type))
418 /* Try swapping operands if we got a mismatch. */
419 if (i == commutative_op && !swapped)
421 swapped = true;
422 goto again;
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "Build SLP failed: different types\n");
429 return 1;
431 if ((dt == vect_constant_def
432 || dt == vect_external_def)
433 && !current_vector_size.is_constant ()
434 && (TREE_CODE (type) == BOOLEAN_TYPE
435 || !can_duplicate_and_interleave_p (stmts.length (),
436 TYPE_MODE (type))))
438 if (dump_enabled_p ())
439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 "Build SLP failed: invalid type of def "
441 "for variable-length SLP %T\n", oprnd);
442 return -1;
446 /* Check the types of the definitions. */
447 switch (dt)
449 case vect_constant_def:
450 case vect_external_def:
451 break;
453 case vect_reduction_def:
454 case vect_induction_def:
455 case vect_internal_def:
456 oprnd_info->def_stmts.quick_push (def_stmt_info);
457 break;
459 default:
460 /* FORNOW: Not supported. */
461 if (dump_enabled_p ())
462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
463 "Build SLP failed: illegal type of def %T\n",
464 oprnd);
466 return -1;
470 /* Swap operands. */
471 if (swapped)
473 /* If there are already uses of this stmt in a SLP instance then
474 we've committed to the operand order and can't swap it. */
475 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "Build SLP failed: cannot swap operands of "
480 "shared stmt %G", stmt_info->stmt);
481 return -1;
484 if (first_op_cond)
486 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
487 tree cond = gimple_assign_rhs1 (stmt);
488 enum tree_code code = TREE_CODE (cond);
490 /* Swap. */
491 if (*swap == 1)
493 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
494 &TREE_OPERAND (cond, 1));
495 TREE_SET_CODE (cond, swap_tree_comparison (code));
497 /* Invert. */
498 else
500 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
501 gimple_assign_rhs3_ptr (stmt));
502 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
503 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
504 gcc_assert (code != ERROR_MARK);
505 TREE_SET_CODE (cond, code);
508 else
510 unsigned int op = commutative_op + first_op_idx;
511 swap_ssa_operands (stmt_info->stmt,
512 gimple_op_ptr (stmt_info->stmt, op),
513 gimple_op_ptr (stmt_info->stmt, op + 1));
515 if (dump_enabled_p ())
516 dump_printf_loc (MSG_NOTE, vect_location,
517 "swapped operands to match def types in %G",
518 stmt_info->stmt);
521 *swap = swapped;
522 return 0;
525 /* Return true if call statements CALL1 and CALL2 are similar enough
526 to be combined into the same SLP group. */
528 static bool
529 compatible_calls_p (gcall *call1, gcall *call2)
531 unsigned int nargs = gimple_call_num_args (call1);
532 if (nargs != gimple_call_num_args (call2))
533 return false;
535 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
536 return false;
538 if (gimple_call_internal_p (call1))
540 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
541 TREE_TYPE (gimple_call_lhs (call2))))
542 return false;
543 for (unsigned int i = 0; i < nargs; ++i)
544 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
545 TREE_TYPE (gimple_call_arg (call2, i))))
546 return false;
548 else
550 if (!operand_equal_p (gimple_call_fn (call1),
551 gimple_call_fn (call2), 0))
552 return false;
554 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
555 return false;
557 return true;
560 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
561 caller's attempt to find the vector type in STMT_INFO with the narrowest
562 element type. Return true if VECTYPE is nonnull and if it is valid
563 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
564 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
565 vect_build_slp_tree. */
567 static bool
568 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
569 tree vectype, poly_uint64 *max_nunits)
571 if (!vectype)
573 if (dump_enabled_p ())
574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
575 "Build SLP failed: unsupported data-type in %G\n",
576 stmt_info->stmt);
577 /* Fatal mismatch. */
578 return false;
581 /* If populating the vector type requires unrolling then fail
582 before adjusting *max_nunits for basic-block vectorization. */
583 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
584 unsigned HOST_WIDE_INT const_nunits;
585 if (STMT_VINFO_BB_VINFO (stmt_info)
586 && (!nunits.is_constant (&const_nunits)
587 || const_nunits > group_size))
589 if (dump_enabled_p ())
590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
591 "Build SLP failed: unrolling required "
592 "in basic block SLP\n");
593 /* Fatal mismatch. */
594 return false;
597 /* In case of multiple types we need to detect the smallest type. */
598 vect_update_max_nunits (max_nunits, vectype);
599 return true;
602 /* STMTS is a group of GROUP_SIZE SLP statements in which some
603 statements do the same operation as the first statement and in which
604 the others do ALT_STMT_CODE. Return true if we can take one vector
605 of the first operation and one vector of the second and permute them
606 to get the required result. VECTYPE is the type of the vector that
607 would be permuted. */
609 static bool
610 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
611 unsigned int group_size, tree vectype,
612 tree_code alt_stmt_code)
614 unsigned HOST_WIDE_INT count;
615 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
616 return false;
618 vec_perm_builder sel (count, count, 1);
619 for (unsigned int i = 0; i < count; ++i)
621 unsigned int elt = i;
622 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
623 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
624 elt += count;
625 sel.quick_push (elt);
627 vec_perm_indices indices (sel, 2, count);
628 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
631 /* Verify if the scalar stmts STMTS are isomorphic, require data
632 permutation or are of unsupported types of operation. Return
633 true if they are, otherwise return false and indicate in *MATCHES
634 which stmts are not isomorphic to the first one. If MATCHES[0]
635 is false then this indicates the comparison could not be
636 carried out or the stmts will never be vectorized by SLP.
638 Note COND_EXPR is possibly isomorphic to another one after swapping its
639 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
640 the first stmt by swapping the two operands of comparison; set SWAP[i]
641 to 2 if stmt I is isormorphic to the first stmt by inverting the code
642 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
643 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
645 static bool
646 vect_build_slp_tree_1 (unsigned char *swap,
647 vec<stmt_vec_info> stmts, unsigned int group_size,
648 poly_uint64 *max_nunits, bool *matches,
649 bool *two_operators)
651 unsigned int i;
652 stmt_vec_info first_stmt_info = stmts[0];
653 enum tree_code first_stmt_code = ERROR_MARK;
654 enum tree_code alt_stmt_code = ERROR_MARK;
655 enum tree_code rhs_code = ERROR_MARK;
656 enum tree_code first_cond_code = ERROR_MARK;
657 tree lhs;
658 bool need_same_oprnds = false;
659 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
660 optab optab;
661 int icode;
662 machine_mode optab_op2_mode;
663 machine_mode vec_mode;
664 stmt_vec_info first_load = NULL, prev_first_load = NULL;
665 bool load_p = false;
667 /* For every stmt in NODE find its def stmt/s. */
668 stmt_vec_info stmt_info;
669 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
671 gimple *stmt = stmt_info->stmt;
672 swap[i] = 0;
673 matches[i] = false;
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
678 /* Fail to vectorize statements marked as unvectorizable. */
679 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: unvectorizable statement %G",
684 stmt);
685 /* Fatal mismatch. */
686 matches[0] = false;
687 return false;
690 lhs = gimple_get_lhs (stmt);
691 if (lhs == NULL_TREE)
693 if (dump_enabled_p ())
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695 "Build SLP failed: not GIMPLE_ASSIGN nor "
696 "GIMPLE_CALL %G", stmt);
697 /* Fatal mismatch. */
698 matches[0] = false;
699 return false;
702 tree nunits_vectype;
703 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
704 &nunits_vectype)
705 || (nunits_vectype
706 && !vect_record_max_nunits (stmt_info, group_size,
707 nunits_vectype, max_nunits)))
709 /* Fatal mismatch. */
710 matches[0] = false;
711 return false;
714 gcc_assert (vectype);
716 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
718 rhs_code = CALL_EXPR;
720 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
721 load_p = true;
722 else if ((gimple_call_internal_p (call_stmt)
723 && (!vectorizable_internal_fn_p
724 (gimple_call_internal_fn (call_stmt))))
725 || gimple_call_tail_p (call_stmt)
726 || gimple_call_noreturn_p (call_stmt)
727 || !gimple_call_nothrow_p (call_stmt)
728 || gimple_call_chain (call_stmt))
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "Build SLP failed: unsupported call type %G",
733 call_stmt);
734 /* Fatal mismatch. */
735 matches[0] = false;
736 return false;
739 else
741 rhs_code = gimple_assign_rhs_code (stmt);
742 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
745 /* Check the operation. */
746 if (i == 0)
748 first_stmt_code = rhs_code;
750 /* Shift arguments should be equal in all the packed stmts for a
751 vector shift with scalar shift operand. */
752 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
753 || rhs_code == LROTATE_EXPR
754 || rhs_code == RROTATE_EXPR)
756 if (vectype == boolean_type_node)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "Build SLP failed: shift of a"
761 " boolean.\n");
762 /* Fatal mismatch. */
763 matches[0] = false;
764 return false;
767 vec_mode = TYPE_MODE (vectype);
769 /* First see if we have a vector/vector shift. */
770 optab = optab_for_tree_code (rhs_code, vectype,
771 optab_vector);
773 if (!optab
774 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
776 /* No vector/vector shift, try for a vector/scalar shift. */
777 optab = optab_for_tree_code (rhs_code, vectype,
778 optab_scalar);
780 if (!optab)
782 if (dump_enabled_p ())
783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
784 "Build SLP failed: no optab.\n");
785 /* Fatal mismatch. */
786 matches[0] = false;
787 return false;
789 icode = (int) optab_handler (optab, vec_mode);
790 if (icode == CODE_FOR_nothing)
792 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
794 "Build SLP failed: "
795 "op not supported by target.\n");
796 /* Fatal mismatch. */
797 matches[0] = false;
798 return false;
800 optab_op2_mode = insn_data[icode].operand[2].mode;
801 if (!VECTOR_MODE_P (optab_op2_mode))
803 need_same_oprnds = true;
804 first_op1 = gimple_assign_rhs2 (stmt);
808 else if (rhs_code == WIDEN_LSHIFT_EXPR)
810 need_same_oprnds = true;
811 first_op1 = gimple_assign_rhs2 (stmt);
814 else
816 if (first_stmt_code != rhs_code
817 && alt_stmt_code == ERROR_MARK)
818 alt_stmt_code = rhs_code;
819 if (first_stmt_code != rhs_code
820 && (first_stmt_code != IMAGPART_EXPR
821 || rhs_code != REALPART_EXPR)
822 && (first_stmt_code != REALPART_EXPR
823 || rhs_code != IMAGPART_EXPR)
824 /* Handle mismatches in plus/minus by computing both
825 and merging the results. */
826 && !((first_stmt_code == PLUS_EXPR
827 || first_stmt_code == MINUS_EXPR)
828 && (alt_stmt_code == PLUS_EXPR
829 || alt_stmt_code == MINUS_EXPR)
830 && rhs_code == alt_stmt_code)
831 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
832 && (first_stmt_code == ARRAY_REF
833 || first_stmt_code == BIT_FIELD_REF
834 || first_stmt_code == INDIRECT_REF
835 || first_stmt_code == COMPONENT_REF
836 || first_stmt_code == MEM_REF)))
838 if (dump_enabled_p ())
840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
841 "Build SLP failed: different operation "
842 "in stmt %G", stmt);
843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
844 "original stmt %G", first_stmt_info->stmt);
846 /* Mismatch. */
847 continue;
850 if (need_same_oprnds
851 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
855 "Build SLP failed: different shift "
856 "arguments in %G", stmt);
857 /* Mismatch. */
858 continue;
861 if (!load_p && rhs_code == CALL_EXPR)
863 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
864 as_a <gcall *> (stmt)))
866 if (dump_enabled_p ())
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different calls in %G",
869 stmt);
870 /* Mismatch. */
871 continue;
876 /* Grouped store or load. */
877 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
879 if (REFERENCE_CLASS_P (lhs))
881 /* Store. */
884 else
886 /* Load. */
887 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
888 if (prev_first_load)
890 /* Check that there are no loads from different interleaving
891 chains in the same node. */
892 if (prev_first_load != first_load)
894 if (dump_enabled_p ())
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
896 vect_location,
897 "Build SLP failed: different "
898 "interleaving chains in one node %G",
899 stmt);
900 /* Mismatch. */
901 continue;
904 else
905 prev_first_load = first_load;
907 } /* Grouped access. */
908 else
910 if (load_p)
912 /* Not grouped load. */
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
915 "Build SLP failed: not grouped load %G", stmt);
917 /* FORNOW: Not grouped loads are not supported. */
918 /* Fatal mismatch. */
919 matches[0] = false;
920 return false;
923 /* Not memory operation. */
924 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
925 && TREE_CODE_CLASS (rhs_code) != tcc_unary
926 && TREE_CODE_CLASS (rhs_code) != tcc_expression
927 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
928 && rhs_code != VIEW_CONVERT_EXPR
929 && rhs_code != CALL_EXPR)
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
933 "Build SLP failed: operation unsupported %G",
934 stmt);
935 /* Fatal mismatch. */
936 matches[0] = false;
937 return false;
940 if (rhs_code == COND_EXPR)
942 tree cond_expr = gimple_assign_rhs1 (stmt);
943 enum tree_code cond_code = TREE_CODE (cond_expr);
944 enum tree_code swap_code = ERROR_MARK;
945 enum tree_code invert_code = ERROR_MARK;
947 if (i == 0)
948 first_cond_code = TREE_CODE (cond_expr);
949 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
951 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
952 swap_code = swap_tree_comparison (cond_code);
953 invert_code = invert_tree_comparison (cond_code, honor_nans);
956 if (first_cond_code == cond_code)
958 /* Isomorphic can be achieved by swapping. */
959 else if (first_cond_code == swap_code)
960 swap[i] = 1;
961 /* Isomorphic can be achieved by inverting. */
962 else if (first_cond_code == invert_code)
963 swap[i] = 2;
964 else
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
968 "Build SLP failed: different"
969 " operation %G", stmt);
970 /* Mismatch. */
971 continue;
976 matches[i] = true;
979 for (i = 0; i < group_size; ++i)
980 if (!matches[i])
981 return false;
983 /* If we allowed a two-operation SLP node verify the target can cope
984 with the permute we are going to use. */
985 if (alt_stmt_code != ERROR_MARK
986 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
988 if (vectype == boolean_type_node
989 || !vect_two_operations_perm_ok_p (stmts, group_size,
990 vectype, alt_stmt_code))
992 for (i = 0; i < group_size; ++i)
993 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
995 matches[i] = false;
996 if (dump_enabled_p ())
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
999 "Build SLP failed: different operation "
1000 "in stmt %G", stmts[i]->stmt);
1001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1002 "original stmt %G", first_stmt_info->stmt);
1005 return false;
1007 *two_operators = true;
1010 return true;
1013 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1014 Note we never remove apart from at destruction time so we do not
1015 need a special value for deleted that differs from empty. */
1016 struct bst_traits
1018 typedef vec <stmt_vec_info> value_type;
1019 typedef vec <stmt_vec_info> compare_type;
1020 static inline hashval_t hash (value_type);
1021 static inline bool equal (value_type existing, value_type candidate);
1022 static inline bool is_empty (value_type x) { return !x.exists (); }
1023 static inline bool is_deleted (value_type x) { return !x.exists (); }
1024 static inline void mark_empty (value_type &x) { x.release (); }
1025 static inline void mark_deleted (value_type &x) { x.release (); }
1026 static inline void remove (value_type &x) { x.release (); }
1028 inline hashval_t
1029 bst_traits::hash (value_type x)
1031 inchash::hash h;
1032 for (unsigned i = 0; i < x.length (); ++i)
1033 h.add_int (gimple_uid (x[i]->stmt));
1034 return h.end ();
1036 inline bool
1037 bst_traits::equal (value_type existing, value_type candidate)
1039 if (existing.length () != candidate.length ())
1040 return false;
1041 for (unsigned i = 0; i < existing.length (); ++i)
1042 if (existing[i] != candidate[i])
1043 return false;
1044 return true;
1047 typedef hash_map <vec <gimple *>, slp_tree,
1048 simple_hashmap_traits <bst_traits, slp_tree> >
1049 scalar_stmts_to_slp_tree_map_t;
1051 static slp_tree
1052 vect_build_slp_tree_2 (vec_info *vinfo,
1053 vec<stmt_vec_info> stmts, unsigned int group_size,
1054 poly_uint64 *max_nunits,
1055 bool *matches, unsigned *npermutes, unsigned *tree_size,
1056 scalar_stmts_to_slp_tree_map_t *bst_map);
1058 static slp_tree
1059 vect_build_slp_tree (vec_info *vinfo,
1060 vec<stmt_vec_info> stmts, unsigned int group_size,
1061 poly_uint64 *max_nunits,
1062 bool *matches, unsigned *npermutes, unsigned *tree_size,
1063 scalar_stmts_to_slp_tree_map_t *bst_map)
1065 if (slp_tree *leader = bst_map->get (stmts))
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1069 *leader ? "" : "failed ", *leader);
1070 if (*leader)
1072 (*leader)->refcnt++;
1073 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1075 return *leader;
1077 poly_uint64 this_max_nunits = 1;
1078 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1079 matches, npermutes, tree_size, bst_map);
1080 if (res)
1082 res->max_nunits = this_max_nunits;
1083 vect_update_max_nunits (max_nunits, this_max_nunits);
1084 /* Keep a reference for the bst_map use. */
1085 res->refcnt++;
1087 bst_map->put (stmts.copy (), res);
1088 return res;
1091 /* Recursively build an SLP tree starting from NODE.
1092 Fail (and return a value not equal to zero) if def-stmts are not
1093 isomorphic, require data permutation or are of unsupported types of
1094 operation. Otherwise, return 0.
1095 The value returned is the depth in the SLP tree where a mismatch
1096 was found. */
1098 static slp_tree
1099 vect_build_slp_tree_2 (vec_info *vinfo,
1100 vec<stmt_vec_info> stmts, unsigned int group_size,
1101 poly_uint64 *max_nunits,
1102 bool *matches, unsigned *npermutes, unsigned *tree_size,
1103 scalar_stmts_to_slp_tree_map_t *bst_map)
1105 unsigned nops, i, this_tree_size = 0;
1106 poly_uint64 this_max_nunits = *max_nunits;
1107 slp_tree node;
1109 matches[0] = false;
1111 stmt_vec_info stmt_info = stmts[0];
1112 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1113 nops = gimple_call_num_args (stmt);
1114 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1116 nops = gimple_num_ops (stmt) - 1;
1117 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1118 nops++;
1120 else if (is_a <gphi *> (stmt_info->stmt))
1121 nops = 0;
1122 else
1123 return NULL;
1125 /* If the SLP node is a PHI (induction or reduction), terminate
1126 the recursion. */
1127 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1129 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1130 tree vectype = get_vectype_for_scalar_type (scalar_type);
1131 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1132 return NULL;
1134 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1135 /* Induction from different IVs is not supported. */
1136 if (def_type == vect_induction_def)
1138 stmt_vec_info other_info;
1139 FOR_EACH_VEC_ELT (stmts, i, other_info)
1140 if (stmt_info != other_info)
1141 return NULL;
1143 else if (def_type == vect_reduction_def
1144 || def_type == vect_double_reduction_def
1145 || def_type == vect_nested_cycle)
1147 /* Else def types have to match. */
1148 stmt_vec_info other_info;
1149 FOR_EACH_VEC_ELT (stmts, i, other_info)
1151 /* But for reduction chains only check on the first stmt. */
1152 if (!STMT_VINFO_DATA_REF (other_info)
1153 && REDUC_GROUP_FIRST_ELEMENT (other_info)
1154 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1155 continue;
1156 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1157 return NULL;
1160 else
1161 return NULL;
1162 (*tree_size)++;
1163 node = vect_create_new_slp_node (stmts);
1164 return node;
1168 bool two_operators = false;
1169 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1170 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1171 &this_max_nunits, matches, &two_operators))
1172 return NULL;
1174 /* If the SLP node is a load, terminate the recursion unless masked. */
1175 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1176 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1178 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1180 /* Masked load. */
1181 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1182 nops = 1;
1184 else
1186 *max_nunits = this_max_nunits;
1187 (*tree_size)++;
1188 node = vect_create_new_slp_node (stmts);
1189 return node;
1193 /* Get at the operands, verifying they are compatible. */
1194 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1195 slp_oprnd_info oprnd_info;
1196 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1198 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1199 stmts, i, &oprnds_info);
1200 if (res != 0)
1201 matches[(res == -1) ? 0 : i] = false;
1202 if (!matches[0])
1203 break;
1205 for (i = 0; i < group_size; ++i)
1206 if (!matches[i])
1208 vect_free_oprnd_info (oprnds_info);
1209 return NULL;
1212 auto_vec<slp_tree, 4> children;
1214 stmt_info = stmts[0];
1216 /* Create SLP_TREE nodes for the definition node/s. */
1217 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1219 slp_tree child;
1220 unsigned old_tree_size = this_tree_size;
1221 unsigned int j;
1223 if (oprnd_info->first_dt != vect_internal_def
1224 && oprnd_info->first_dt != vect_reduction_def
1225 && oprnd_info->first_dt != vect_induction_def)
1226 continue;
1228 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1229 group_size, &this_max_nunits,
1230 matches, npermutes,
1231 &this_tree_size, bst_map)) != NULL)
1233 /* If we have all children of child built up from scalars then just
1234 throw that away and build it up this node from scalars. */
1235 if (!SLP_TREE_CHILDREN (child).is_empty ()
1236 /* ??? Rejecting patterns this way doesn't work. We'd have to
1237 do extra work to cancel the pattern so the uses see the
1238 scalar version. */
1239 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1241 slp_tree grandchild;
1243 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1244 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1245 break;
1246 if (!grandchild)
1248 /* Roll back. */
1249 this_tree_size = old_tree_size;
1250 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1251 vect_free_slp_tree (grandchild, false);
1252 SLP_TREE_CHILDREN (child).truncate (0);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "Building parent vector operands from "
1257 "scalars instead\n");
1258 oprnd_info->def_stmts = vNULL;
1259 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1260 ++this_tree_size;
1261 children.safe_push (child);
1262 continue;
1266 oprnd_info->def_stmts = vNULL;
1267 children.safe_push (child);
1268 continue;
1271 /* If the SLP build failed fatally and we analyze a basic-block
1272 simply treat nodes we fail to build as externally defined
1273 (and thus build vectors from the scalar defs).
1274 The cost model will reject outright expensive cases.
1275 ??? This doesn't treat cases where permutation ultimatively
1276 fails (or we don't try permutation below). Ideally we'd
1277 even compute a permutation that will end up with the maximum
1278 SLP tree size... */
1279 if (is_a <bb_vec_info> (vinfo)
1280 && !matches[0]
1281 /* ??? Rejecting patterns this way doesn't work. We'd have to
1282 do extra work to cancel the pattern so the uses see the
1283 scalar version. */
1284 && !is_pattern_stmt_p (stmt_info))
1286 if (dump_enabled_p ())
1287 dump_printf_loc (MSG_NOTE, vect_location,
1288 "Building vector operands from scalars\n");
1289 this_tree_size++;
1290 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1291 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1292 children.safe_push (child);
1293 oprnd_info->def_stmts = vNULL;
1294 continue;
1297 /* If the SLP build for operand zero failed and operand zero
1298 and one can be commutated try that for the scalar stmts
1299 that failed the match. */
1300 if (i == 0
1301 /* A first scalar stmt mismatch signals a fatal mismatch. */
1302 && matches[0]
1303 /* ??? For COND_EXPRs we can swap the comparison operands
1304 as well as the arms under some constraints. */
1305 && nops == 2
1306 && oprnds_info[1]->first_dt == vect_internal_def
1307 && is_gimple_assign (stmt_info->stmt)
1308 /* Swapping operands for reductions breaks assumptions later on. */
1309 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1310 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1311 /* Do so only if the number of not successful permutes was nor more
1312 than a cut-ff as re-trying the recursive match on
1313 possibly each level of the tree would expose exponential
1314 behavior. */
1315 && *npermutes < 4)
1317 /* See whether we can swap the matching or the non-matching
1318 stmt operands. */
1319 bool swap_not_matching = true;
1322 for (j = 0; j < group_size; ++j)
1324 if (matches[j] != !swap_not_matching)
1325 continue;
1326 stmt_vec_info stmt_info = stmts[j];
1327 /* Verify if we can swap operands of this stmt. */
1328 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1329 if (!stmt
1330 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1332 if (!swap_not_matching)
1333 goto fail;
1334 swap_not_matching = false;
1335 break;
1337 /* Verify if we can safely swap or if we committed to a
1338 specific operand order already.
1339 ??? Instead of modifying GIMPLE stmts here we could
1340 record whether we want to swap operands in the SLP
1341 node and temporarily do that when processing it
1342 (or wrap operand accessors in a helper). */
1343 else if (swap[j] != 0
1344 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1346 if (!swap_not_matching)
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1350 vect_location,
1351 "Build SLP failed: cannot swap "
1352 "operands of shared stmt %G",
1353 stmts[j]->stmt);
1354 goto fail;
1356 swap_not_matching = false;
1357 break;
1361 while (j != group_size);
1363 /* Swap mismatched definition stmts. */
1364 if (dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE, vect_location,
1366 "Re-trying with swapped operands of stmts ");
1367 for (j = 0; j < group_size; ++j)
1368 if (matches[j] == !swap_not_matching)
1370 std::swap (oprnds_info[0]->def_stmts[j],
1371 oprnds_info[1]->def_stmts[j]);
1372 if (dump_enabled_p ())
1373 dump_printf (MSG_NOTE, "%d ", j);
1375 if (dump_enabled_p ())
1376 dump_printf (MSG_NOTE, "\n");
1377 /* And try again with scratch 'matches' ... */
1378 bool *tem = XALLOCAVEC (bool, group_size);
1379 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1380 group_size, &this_max_nunits,
1381 tem, npermutes,
1382 &this_tree_size, bst_map)) != NULL)
1384 /* ... so if successful we can apply the operand swapping
1385 to the GIMPLE IL. This is necessary because for example
1386 vect_get_slp_defs uses operand indexes and thus expects
1387 canonical operand order. This is also necessary even
1388 if we end up building the operand from scalars as
1389 we'll continue to process swapped operand two. */
1390 for (j = 0; j < group_size; ++j)
1391 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1392 for (j = 0; j < group_size; ++j)
1393 if (matches[j] == !swap_not_matching)
1395 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1396 /* Avoid swapping operands twice. */
1397 if (gimple_plf (stmt, GF_PLF_1))
1398 continue;
1399 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1400 gimple_assign_rhs2_ptr (stmt));
1401 gimple_set_plf (stmt, GF_PLF_1, true);
1403 /* Verify we swap all duplicates or none. */
1404 if (flag_checking)
1405 for (j = 0; j < group_size; ++j)
1406 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1407 == (matches[j] == !swap_not_matching));
1409 /* If we have all children of child built up from scalars then
1410 just throw that away and build it up this node from scalars. */
1411 if (!SLP_TREE_CHILDREN (child).is_empty ()
1412 /* ??? Rejecting patterns this way doesn't work. We'd have
1413 to do extra work to cancel the pattern so the uses see the
1414 scalar version. */
1415 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1417 unsigned int j;
1418 slp_tree grandchild;
1420 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1421 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1422 break;
1423 if (!grandchild)
1425 /* Roll back. */
1426 this_tree_size = old_tree_size;
1427 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1428 vect_free_slp_tree (grandchild, false);
1429 SLP_TREE_CHILDREN (child).truncate (0);
1431 if (dump_enabled_p ())
1432 dump_printf_loc (MSG_NOTE, vect_location,
1433 "Building parent vector operands from "
1434 "scalars instead\n");
1435 oprnd_info->def_stmts = vNULL;
1436 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1437 ++this_tree_size;
1438 children.safe_push (child);
1439 continue;
1443 oprnd_info->def_stmts = vNULL;
1444 children.safe_push (child);
1445 continue;
1448 ++*npermutes;
1451 fail:
1452 gcc_assert (child == NULL);
1453 FOR_EACH_VEC_ELT (children, j, child)
1454 vect_free_slp_tree (child, false);
1455 vect_free_oprnd_info (oprnds_info);
1456 return NULL;
1459 vect_free_oprnd_info (oprnds_info);
1461 *tree_size += this_tree_size + 1;
1462 *max_nunits = this_max_nunits;
1464 node = vect_create_new_slp_node (stmts);
1465 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1466 SLP_TREE_CHILDREN (node).splice (children);
1467 return node;
1470 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1472 static void
1473 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1474 slp_tree node, hash_set<slp_tree> &visited)
1476 int i;
1477 stmt_vec_info stmt_info;
1478 slp_tree child;
1480 if (visited.add (node))
1481 return;
1483 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1484 dump_user_location_t user_loc = loc.get_user_location ();
1485 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1486 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1487 ? " (external)" : "", node,
1488 estimated_poly_value (node->max_nunits));
1489 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1490 dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
1491 if (SLP_TREE_CHILDREN (node).is_empty ())
1492 return;
1493 dump_printf_loc (metadata, user_loc, "\tchildren");
1494 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1495 dump_printf (dump_kind, " %p", (void *)child);
1496 dump_printf (dump_kind, "\n");
1497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1498 vect_print_slp_tree (dump_kind, loc, child, visited);
1501 static void
1502 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1503 slp_tree node)
1505 hash_set<slp_tree> visited;
1506 vect_print_slp_tree (dump_kind, loc, node, visited);
1509 /* Mark the tree rooted at NODE with PURE_SLP. */
1511 static void
1512 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1514 int i;
1515 stmt_vec_info stmt_info;
1516 slp_tree child;
1518 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1519 return;
1521 if (visited.add (node))
1522 return;
1524 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1525 STMT_SLP_TYPE (stmt_info) = pure_slp;
1527 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1528 vect_mark_slp_stmts (child, visited);
1531 static void
1532 vect_mark_slp_stmts (slp_tree node)
1534 hash_set<slp_tree> visited;
1535 vect_mark_slp_stmts (node, visited);
1538 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1540 static void
1541 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1543 int i;
1544 stmt_vec_info stmt_info;
1545 slp_tree child;
1547 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1548 return;
1550 if (visited.add (node))
1551 return;
1553 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1555 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1556 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1557 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1560 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1561 vect_mark_slp_stmts_relevant (child, visited);
1564 static void
1565 vect_mark_slp_stmts_relevant (slp_tree node)
1567 hash_set<slp_tree> visited;
1568 vect_mark_slp_stmts_relevant (node, visited);
1572 /* Rearrange the statements of NODE according to PERMUTATION. */
1574 static void
1575 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1576 vec<unsigned> permutation,
1577 hash_set<slp_tree> &visited)
1579 stmt_vec_info stmt_info;
1580 vec<stmt_vec_info> tmp_stmts;
1581 unsigned int i;
1582 slp_tree child;
1584 if (visited.add (node))
1585 return;
1587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1588 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1590 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1591 tmp_stmts.create (group_size);
1592 tmp_stmts.quick_grow_cleared (group_size);
1594 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1595 tmp_stmts[permutation[i]] = stmt_info;
1597 SLP_TREE_SCALAR_STMTS (node).release ();
1598 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1602 /* Attempt to reorder stmts in a reduction chain so that we don't
1603 require any load permutation. Return true if that was possible,
1604 otherwise return false. */
1606 static bool
1607 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1609 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1610 unsigned int i, j;
1611 unsigned int lidx;
1612 slp_tree node, load;
1614 /* Compare all the permutation sequences to the first one. We know
1615 that at least one load is permuted. */
1616 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1617 if (!node->load_permutation.exists ())
1618 return false;
1619 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1621 if (!load->load_permutation.exists ())
1622 return false;
1623 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1624 if (lidx != node->load_permutation[j])
1625 return false;
1628 /* Check that the loads in the first sequence are different and there
1629 are no gaps between them. */
1630 auto_sbitmap load_index (group_size);
1631 bitmap_clear (load_index);
1632 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1634 if (lidx >= group_size)
1635 return false;
1636 if (bitmap_bit_p (load_index, lidx))
1637 return false;
1639 bitmap_set_bit (load_index, lidx);
1641 for (i = 0; i < group_size; i++)
1642 if (!bitmap_bit_p (load_index, i))
1643 return false;
1645 /* This permutation is valid for reduction. Since the order of the
1646 statements in the nodes is not important unless they are memory
1647 accesses, we can rearrange the statements in all the nodes
1648 according to the order of the loads. */
1649 hash_set<slp_tree> visited;
1650 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1651 node->load_permutation, visited);
1653 /* We are done, no actual permutations need to be generated. */
1654 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1655 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1657 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1658 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1659 /* But we have to keep those permutations that are required because
1660 of handling of gaps. */
1661 if (known_eq (unrolling_factor, 1U)
1662 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1663 && DR_GROUP_GAP (first_stmt_info) == 0))
1664 SLP_TREE_LOAD_PERMUTATION (node).release ();
1665 else
1666 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1667 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1670 return true;
1673 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1675 static void
1676 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1677 hash_set<slp_tree> &visited)
1679 if (visited.add (node))
1680 return;
1682 if (SLP_TREE_CHILDREN (node).length () == 0)
1684 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1685 if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
1686 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
1687 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1688 SLP_INSTANCE_LOADS (inst).safe_push (node);
1690 else
1692 unsigned i;
1693 slp_tree child;
1694 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1695 vect_gather_slp_loads (inst, child, visited);
1699 static void
1700 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1702 hash_set<slp_tree> visited;
1703 vect_gather_slp_loads (inst, node, visited);
1706 /* Check if the required load permutations in the SLP instance
1707 SLP_INSTN are supported. */
1709 static bool
1710 vect_supported_load_permutation_p (slp_instance slp_instn)
1712 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1713 unsigned int i, j, k, next;
1714 slp_tree node;
1716 if (dump_enabled_p ())
1718 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1719 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1720 if (node->load_permutation.exists ())
1721 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1722 dump_printf (MSG_NOTE, "%d ", next);
1723 else
1724 for (k = 0; k < group_size; ++k)
1725 dump_printf (MSG_NOTE, "%d ", k);
1726 dump_printf (MSG_NOTE, "\n");
1729 /* In case of reduction every load permutation is allowed, since the order
1730 of the reduction statements is not important (as opposed to the case of
1731 grouped stores). The only condition we need to check is that all the
1732 load nodes are of the same size and have the same permutation (and then
1733 rearrange all the nodes of the SLP instance according to this
1734 permutation). */
1736 /* Check that all the load nodes are of the same size. */
1737 /* ??? Can't we assert this? */
1738 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1739 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1740 return false;
1742 node = SLP_INSTANCE_TREE (slp_instn);
1743 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1745 /* Reduction (there are no data-refs in the root).
1746 In reduction chain the order of the loads is not important. */
1747 if (!STMT_VINFO_DATA_REF (stmt_info)
1748 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1749 vect_attempt_slp_rearrange_stmts (slp_instn);
1751 /* In basic block vectorization we allow any subchain of an interleaving
1752 chain.
1753 FORNOW: not supported in loop SLP because of realignment compications. */
1754 if (STMT_VINFO_BB_VINFO (stmt_info))
1756 /* Check whether the loads in an instance form a subchain and thus
1757 no permutation is necessary. */
1758 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1760 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1761 continue;
1762 bool subchain_p = true;
1763 stmt_vec_info next_load_info = NULL;
1764 stmt_vec_info load_info;
1765 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1767 if (j != 0
1768 && (next_load_info != load_info
1769 || DR_GROUP_GAP (load_info) != 1))
1771 subchain_p = false;
1772 break;
1774 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1776 if (subchain_p)
1777 SLP_TREE_LOAD_PERMUTATION (node).release ();
1778 else
1780 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1781 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1782 unsigned HOST_WIDE_INT nunits;
1783 unsigned k, maxk = 0;
1784 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1785 if (k > maxk)
1786 maxk = k;
1787 /* In BB vectorization we may not actually use a loaded vector
1788 accessing elements in excess of DR_GROUP_SIZE. */
1789 tree vectype = STMT_VINFO_VECTYPE (group_info);
1790 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1791 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1795 "BB vectorization with gaps at the end of "
1796 "a load is not supported\n");
1797 return false;
1800 /* Verify the permutation can be generated. */
1801 vec<tree> tem;
1802 unsigned n_perms;
1803 if (!vect_transform_slp_perm_load (node, tem, NULL,
1804 1, slp_instn, true, &n_perms))
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1808 vect_location,
1809 "unsupported load permutation\n");
1810 return false;
1814 return true;
1817 /* For loop vectorization verify we can generate the permutation. Be
1818 conservative about the vectorization factor, there are permutations
1819 that will use three vector inputs only starting from a specific factor
1820 and the vectorization factor is not yet final.
1821 ??? The SLP instance unrolling factor might not be the maximum one. */
1822 unsigned n_perms;
1823 poly_uint64 test_vf
1824 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1825 LOOP_VINFO_VECT_FACTOR
1826 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1827 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1828 if (node->load_permutation.exists ()
1829 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1830 slp_instn, true, &n_perms))
1831 return false;
1833 return true;
1837 /* Find the last store in SLP INSTANCE. */
1839 stmt_vec_info
1840 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1842 stmt_vec_info last = NULL;
1843 stmt_vec_info stmt_vinfo;
1845 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1847 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1848 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1851 return last;
1854 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1855 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1856 (also containing the first GROUP1_SIZE stmts, since stores are
1857 consecutive), the second containing the remainder.
1858 Return the first stmt in the second group. */
1860 static stmt_vec_info
1861 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1863 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1864 gcc_assert (group1_size > 0);
1865 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1866 gcc_assert (group2_size > 0);
1867 DR_GROUP_SIZE (first_vinfo) = group1_size;
1869 stmt_vec_info stmt_info = first_vinfo;
1870 for (unsigned i = group1_size; i > 1; i--)
1872 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1873 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1875 /* STMT is now the last element of the first group. */
1876 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1877 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1879 DR_GROUP_SIZE (group2) = group2_size;
1880 for (stmt_info = group2; stmt_info;
1881 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1883 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1884 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1887 /* For the second group, the DR_GROUP_GAP is that before the original group,
1888 plus skipping over the first vector. */
1889 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1891 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1892 DR_GROUP_GAP (first_vinfo) += group2_size;
1894 if (dump_enabled_p ())
1895 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1896 group1_size, group2_size);
1898 return group2;
1901 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1902 statements and a vector of NUNITS elements. */
1904 static poly_uint64
1905 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1907 return exact_div (common_multiple (nunits, group_size), group_size);
1910 /* Analyze an SLP instance starting from a group of grouped stores. Call
1911 vect_build_slp_tree to build a tree of packed stmts if possible.
1912 Return FALSE if it's impossible to SLP any stmt in the loop. */
1914 static bool
1915 vect_analyze_slp_instance (vec_info *vinfo,
1916 stmt_vec_info stmt_info, unsigned max_tree_size)
1918 slp_instance new_instance;
1919 slp_tree node;
1920 unsigned int group_size;
1921 tree vectype, scalar_type = NULL_TREE;
1922 unsigned int i;
1923 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1924 vec<stmt_vec_info> scalar_stmts;
1926 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1928 scalar_type = TREE_TYPE (DR_REF (dr));
1929 vectype = get_vectype_for_scalar_type (scalar_type);
1930 group_size = DR_GROUP_SIZE (stmt_info);
1932 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1934 gcc_assert (is_a <loop_vec_info> (vinfo));
1935 vectype = STMT_VINFO_VECTYPE (stmt_info);
1936 group_size = REDUC_GROUP_SIZE (stmt_info);
1938 else
1940 gcc_assert (is_a <loop_vec_info> (vinfo));
1941 vectype = STMT_VINFO_VECTYPE (stmt_info);
1942 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1945 if (!vectype)
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949 "Build SLP failed: unsupported data-type %T\n",
1950 scalar_type);
1952 return false;
1954 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1956 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1957 scalar_stmts.create (group_size);
1958 stmt_vec_info next_info = stmt_info;
1959 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1961 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1962 while (next_info)
1964 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1965 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1968 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1970 /* Collect the reduction stmts and store them in
1971 SLP_TREE_SCALAR_STMTS. */
1972 while (next_info)
1974 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1975 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1977 /* Mark the first element of the reduction chain as reduction to properly
1978 transform the node. In the reduction analysis phase only the last
1979 element of the chain is marked as reduction. */
1980 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1981 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
1982 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
1984 else
1986 /* Collect reduction statements. */
1987 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1988 for (i = 0; reductions.iterate (i, &next_info); i++)
1989 scalar_stmts.safe_push (next_info);
1992 /* Build the tree for the SLP instance. */
1993 bool *matches = XALLOCAVEC (bool, group_size);
1994 unsigned npermutes = 0;
1995 scalar_stmts_to_slp_tree_map_t *bst_map
1996 = new scalar_stmts_to_slp_tree_map_t ();
1997 poly_uint64 max_nunits = nunits;
1998 unsigned tree_size = 0;
1999 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2000 &max_nunits, matches, &npermutes,
2001 &tree_size, bst_map);
2002 /* The map keeps a reference on SLP nodes built, release that. */
2003 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2004 it != bst_map->end (); ++it)
2005 if ((*it).second)
2006 vect_free_slp_tree ((*it).second, false);
2007 delete bst_map;
2008 if (node != NULL)
2010 /* Calculate the unrolling factor based on the smallest type. */
2011 poly_uint64 unrolling_factor
2012 = calculate_unrolling_factor (max_nunits, group_size);
2014 if (maybe_ne (unrolling_factor, 1U)
2015 && is_a <bb_vec_info> (vinfo))
2017 unsigned HOST_WIDE_INT const_max_nunits;
2018 if (!max_nunits.is_constant (&const_max_nunits)
2019 || const_max_nunits > group_size)
2021 if (dump_enabled_p ())
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Build SLP failed: store group "
2024 "size not a multiple of the vector size "
2025 "in basic block SLP\n");
2026 vect_free_slp_tree (node, false);
2027 return false;
2029 /* Fatal mismatch. */
2030 matches[group_size / const_max_nunits * const_max_nunits] = false;
2031 vect_free_slp_tree (node, false);
2033 else
2035 /* Create a new SLP instance. */
2036 new_instance = XNEW (class _slp_instance);
2037 SLP_INSTANCE_TREE (new_instance) = node;
2038 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2039 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2040 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2041 vect_gather_slp_loads (new_instance, node);
2042 if (dump_enabled_p ())
2043 dump_printf_loc (MSG_NOTE, vect_location,
2044 "SLP size %u vs. limit %u.\n",
2045 tree_size, max_tree_size);
2047 /* Compute the load permutation. */
2048 slp_tree load_node;
2049 bool loads_permuted = false;
2050 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2052 vec<unsigned> load_permutation;
2053 int j;
2054 stmt_vec_info load_info;
2055 bool this_load_permuted = false;
2056 load_permutation.create (group_size);
2057 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2058 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2061 int load_place = vect_get_place_in_interleaving_chain
2062 (load_info, first_stmt_info);
2063 gcc_assert (load_place != -1);
2064 if (load_place != j)
2065 this_load_permuted = true;
2066 load_permutation.safe_push (load_place);
2068 if (!this_load_permuted
2069 /* The load requires permutation when unrolling exposes
2070 a gap either because the group is larger than the SLP
2071 group-size or because there is a gap between the groups. */
2072 && (known_eq (unrolling_factor, 1U)
2073 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2074 && DR_GROUP_GAP (first_stmt_info) == 0)))
2076 load_permutation.release ();
2077 continue;
2079 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2080 loads_permuted = true;
2083 if (loads_permuted)
2085 if (!vect_supported_load_permutation_p (new_instance))
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2089 "Build SLP failed: unsupported load "
2090 "permutation %G", stmt_info->stmt);
2091 vect_free_slp_instance (new_instance, false);
2092 return false;
2096 /* If the loads and stores can be handled with load/store-lan
2097 instructions do not generate this SLP instance. */
2098 if (is_a <loop_vec_info> (vinfo)
2099 && loads_permuted
2100 && dr && vect_store_lanes_supported (vectype, group_size, false))
2102 slp_tree load_node;
2103 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2105 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2106 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2107 /* Use SLP for strided accesses (or if we can't load-lanes). */
2108 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2109 || ! vect_load_lanes_supported
2110 (STMT_VINFO_VECTYPE (stmt_vinfo),
2111 DR_GROUP_SIZE (stmt_vinfo), false))
2112 break;
2114 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2116 if (dump_enabled_p ())
2117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2118 "Built SLP cancelled: can use "
2119 "load/store-lanes\n");
2120 vect_free_slp_instance (new_instance, false);
2121 return false;
2125 vinfo->slp_instances.safe_push (new_instance);
2127 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_NOTE, vect_location,
2130 "Final SLP tree for instance:\n");
2131 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2134 return true;
2137 else
2139 /* Failed to SLP. */
2140 /* Free the allocated memory. */
2141 scalar_stmts.release ();
2144 /* For basic block SLP, try to break the group up into multiples of the
2145 vector size. */
2146 unsigned HOST_WIDE_INT const_nunits;
2147 if (is_a <bb_vec_info> (vinfo)
2148 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2149 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2150 && nunits.is_constant (&const_nunits))
2152 /* We consider breaking the group only on VF boundaries from the existing
2153 start. */
2154 for (i = 0; i < group_size; i++)
2155 if (!matches[i]) break;
2157 if (i >= const_nunits && i < group_size)
2159 /* Split into two groups at the first vector boundary before i. */
2160 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2161 unsigned group1_size = i & ~(const_nunits - 1);
2163 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2164 group1_size);
2165 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2166 max_tree_size);
2167 /* If the first non-match was in the middle of a vector,
2168 skip the rest of that vector. */
2169 if (group1_size < i)
2171 i = group1_size + const_nunits;
2172 if (i < group_size)
2173 rest = vect_split_slp_store_group (rest, const_nunits);
2175 if (i < group_size)
2176 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2177 return res;
2179 /* Even though the first vector did not all match, we might be able to SLP
2180 (some) of the remainder. FORNOW ignore this possibility. */
2183 return false;
2187 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2188 trees of packed scalar stmts if SLP is possible. */
2190 opt_result
2191 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2193 unsigned int i;
2194 stmt_vec_info first_element;
2196 DUMP_VECT_SCOPE ("vect_analyze_slp");
2198 /* Find SLP sequences starting from groups of grouped stores. */
2199 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2200 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2202 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2204 if (loop_vinfo->reduction_chains.length () > 0)
2206 /* Find SLP sequences starting from reduction chains. */
2207 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2208 if (! vect_analyze_slp_instance (vinfo, first_element,
2209 max_tree_size))
2211 /* Dissolve reduction chain group. */
2212 stmt_vec_info vinfo = first_element;
2213 while (vinfo)
2215 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2216 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2217 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2218 vinfo = next;
2220 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2224 /* Find SLP sequences starting from groups of reductions. */
2225 if (loop_vinfo->reductions.length () > 1)
2226 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2227 max_tree_size);
2230 return opt_result::success ();
2234 /* For each possible SLP instance decide whether to SLP it and calculate overall
2235 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2236 least one instance. */
2238 bool
2239 vect_make_slp_decision (loop_vec_info loop_vinfo)
2241 unsigned int i;
2242 poly_uint64 unrolling_factor = 1;
2243 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2244 slp_instance instance;
2245 int decided_to_slp = 0;
2247 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2249 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2251 /* FORNOW: SLP if you can. */
2252 /* All unroll factors have the form current_vector_size * X for some
2253 rational X, so they must have a common multiple. */
2254 unrolling_factor
2255 = force_common_multiple (unrolling_factor,
2256 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2258 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2259 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2260 loop-based vectorization. Such stmts will be marked as HYBRID. */
2261 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2262 decided_to_slp++;
2265 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2267 if (decided_to_slp && dump_enabled_p ())
2269 dump_printf_loc (MSG_NOTE, vect_location,
2270 "Decided to SLP %d instances. Unrolling factor ",
2271 decided_to_slp);
2272 dump_dec (MSG_NOTE, unrolling_factor);
2273 dump_printf (MSG_NOTE, "\n");
2276 return (decided_to_slp > 0);
2280 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2281 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2283 static void
2284 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2285 hash_map<slp_tree, unsigned> &visited)
2287 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2288 imm_use_iterator imm_iter;
2289 gimple *use_stmt;
2290 stmt_vec_info use_vinfo;
2291 slp_tree child;
2292 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2293 int j;
2295 /* We need to union stype over the incoming graph edges but we still
2296 want to limit recursion to stay O(N+E). */
2297 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
2299 /* Propagate hybrid down the SLP tree. */
2300 if (stype == hybrid)
2302 else if (HYBRID_SLP_STMT (stmt_vinfo))
2303 stype = hybrid;
2304 else if (!only_edge)
2306 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2307 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2308 /* If we get a pattern stmt here we have to use the LHS of the
2309 original stmt for immediate uses. */
2310 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2311 tree def;
2312 if (gimple_code (stmt) == GIMPLE_PHI)
2313 def = gimple_phi_result (stmt);
2314 else
2315 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2316 if (def)
2317 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2319 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2320 if (!use_vinfo)
2321 continue;
2322 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2323 if (!STMT_SLP_TYPE (use_vinfo)
2324 && (STMT_VINFO_RELEVANT (use_vinfo)
2325 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2326 && !(gimple_code (use_stmt) == GIMPLE_PHI
2327 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2329 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2331 "def in non-SLP stmt: %G", use_stmt);
2332 stype = hybrid;
2337 if (stype == hybrid
2338 && !HYBRID_SLP_STMT (stmt_vinfo))
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2342 stmt_vinfo->stmt);
2343 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2346 if (!only_edge)
2347 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2348 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2349 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2352 static void
2353 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2355 hash_map<slp_tree, unsigned> visited;
2356 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
2359 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2361 static tree
2362 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2364 walk_stmt_info *wi = (walk_stmt_info *)data;
2365 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2367 if (wi->is_lhs)
2368 return NULL_TREE;
2370 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2371 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2375 def_stmt_info->stmt);
2376 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2379 return NULL_TREE;
2382 static tree
2383 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2384 walk_stmt_info *wi)
2386 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2387 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2388 /* If the stmt is in a SLP instance then this isn't a reason
2389 to mark use definitions in other SLP instances as hybrid. */
2390 if (! STMT_SLP_TYPE (use_vinfo)
2391 && (STMT_VINFO_RELEVANT (use_vinfo)
2392 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2393 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2394 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2396 else
2397 *handled = true;
2398 return NULL_TREE;
2401 /* Find stmts that must be both vectorized and SLPed. */
2403 void
2404 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2406 unsigned int i;
2407 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2408 slp_instance instance;
2410 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2412 /* First walk all pattern stmt in the loop and mark defs of uses as
2413 hybrid because immediate uses in them are not recorded. */
2414 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2416 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2417 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2418 gsi_next (&gsi))
2420 gimple *stmt = gsi_stmt (gsi);
2421 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2422 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2424 walk_stmt_info wi;
2425 memset (&wi, 0, sizeof (wi));
2426 wi.info = loop_vinfo;
2427 gimple_stmt_iterator gsi2
2428 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2429 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2430 vect_detect_hybrid_slp_1, &wi);
2431 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2432 vect_detect_hybrid_slp_2,
2433 vect_detect_hybrid_slp_1, &wi);
2438 /* Then walk the SLP instance trees marking stmts with uses in
2439 non-SLP stmts as hybrid, also propagating hybrid down the
2440 SLP tree, collecting the above info on-the-fly. */
2441 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2443 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2444 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2445 i, pure_slp);
2450 /* Initialize a bb_vec_info struct for the statements between
2451 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2453 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2454 gimple_stmt_iterator region_end_in,
2455 vec_info_shared *shared)
2456 : vec_info (vec_info::bb, init_cost (NULL), shared),
2457 bb (gsi_bb (region_begin_in)),
2458 region_begin (region_begin_in),
2459 region_end (region_end_in)
2461 gimple_stmt_iterator gsi;
2463 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2464 gsi_next (&gsi))
2466 gimple *stmt = gsi_stmt (gsi);
2467 gimple_set_uid (stmt, 0);
2468 add_stmt (stmt);
2471 bb->aux = this;
2475 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2476 stmts in the basic block. */
2478 _bb_vec_info::~_bb_vec_info ()
2480 for (gimple_stmt_iterator si = region_begin;
2481 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2482 /* Reset region marker. */
2483 gimple_set_uid (gsi_stmt (si), -1);
2485 bb->aux = NULL;
2488 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2489 given then that child nodes have already been processed, and that
2490 their def types currently match their SLP node's def type. */
2492 static bool
2493 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2494 slp_instance node_instance,
2495 stmt_vector_for_cost *cost_vec)
2497 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2498 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2500 /* For BB vectorization vector types are assigned here.
2501 Memory accesses already got their vector type assigned
2502 in vect_analyze_data_refs. */
2503 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2504 if (bb_vinfo
2505 && ! STMT_VINFO_DATA_REF (stmt_info))
2507 tree vectype, nunits_vectype;
2508 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2509 &nunits_vectype))
2510 /* We checked this when building the node. */
2511 gcc_unreachable ();
2512 if (vectype == boolean_type_node)
2514 vectype = vect_get_mask_type_for_stmt (stmt_info);
2515 if (!vectype)
2516 /* vect_get_mask_type_for_stmt has already explained the
2517 failure. */
2518 return false;
2521 stmt_vec_info sstmt_info;
2522 unsigned int i;
2523 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2524 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2527 /* Calculate the number of vector statements to be created for the
2528 scalar stmts in this node. For SLP reductions it is equal to the
2529 number of vector statements in the children (which has already been
2530 calculated by the recursive call). Otherwise it is the number of
2531 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2532 VF divided by the number of elements in a vector. */
2533 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2534 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2535 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2536 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2537 else
2539 poly_uint64 vf;
2540 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2541 vf = loop_vinfo->vectorization_factor;
2542 else
2543 vf = 1;
2544 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2546 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2547 = vect_get_num_vectors (vf * group_size, vectype);
2550 bool dummy;
2551 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2554 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2555 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2557 Return true if the operations are supported. */
2559 static bool
2560 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2561 slp_instance node_instance,
2562 scalar_stmts_to_slp_tree_map_t *visited,
2563 scalar_stmts_to_slp_tree_map_t *lvisited,
2564 stmt_vector_for_cost *cost_vec)
2566 int i, j;
2567 slp_tree child;
2569 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2570 return true;
2572 /* If we already analyzed the exact same set of scalar stmts we're done.
2573 We share the generated vector stmts for those. */
2574 slp_tree *leader;
2575 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2576 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2578 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2579 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2580 return true;
2583 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2584 doesn't result in any issue since we throw away the lvisited set
2585 when we fail. */
2586 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2588 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2589 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2590 visited, lvisited, cost_vec))
2591 return false;
2593 /* ??? We have to catch the case late where two first scalar stmts appear
2594 in multiple SLP children with different def type and fail. Remember
2595 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2596 match it when that is vect_internal_def. */
2597 auto_vec<vect_def_type, 4> dt;
2598 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2599 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2600 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2602 /* Push SLP node def-type to stmt operands. */
2603 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2604 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2605 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2606 = SLP_TREE_DEF_TYPE (child);
2608 /* Check everything worked out. */
2609 bool res = true;
2610 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2611 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2613 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2614 != SLP_TREE_DEF_TYPE (child))
2615 res = false;
2617 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
2618 res = false;
2619 if (!res && dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621 "not vectorized: same operand with different "
2622 "def type in stmt.\n");
2624 if (res)
2625 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2626 cost_vec);
2628 /* Restore def-types. */
2629 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2630 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2632 return res;
2636 /* Analyze statements in SLP instances of VINFO. Return true if the
2637 operations are supported. */
2639 bool
2640 vect_slp_analyze_operations (vec_info *vinfo)
2642 slp_instance instance;
2643 int i;
2645 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2647 scalar_stmts_to_slp_tree_map_t *visited
2648 = new scalar_stmts_to_slp_tree_map_t ();
2649 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2651 scalar_stmts_to_slp_tree_map_t lvisited;
2652 stmt_vector_for_cost cost_vec;
2653 cost_vec.create (2);
2654 if (!vect_slp_analyze_node_operations (vinfo,
2655 SLP_INSTANCE_TREE (instance),
2656 instance, visited, &lvisited,
2657 &cost_vec))
2659 slp_tree node = SLP_INSTANCE_TREE (instance);
2660 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2661 if (dump_enabled_p ())
2662 dump_printf_loc (MSG_NOTE, vect_location,
2663 "removing SLP instance operations starting from: %G",
2664 stmt_info->stmt);
2665 vect_free_slp_instance (instance, false);
2666 vinfo->slp_instances.ordered_remove (i);
2667 cost_vec.release ();
2669 else
2671 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2672 x != lvisited.end(); ++x)
2673 visited->put ((*x).first.copy (), (*x).second);
2674 i++;
2676 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2677 cost_vec.release ();
2680 delete visited;
2682 return !vinfo->slp_instances.is_empty ();
2686 /* Compute the scalar cost of the SLP node NODE and its children
2687 and return it. Do not account defs that are marked in LIFE and
2688 update LIFE according to uses of NODE. */
2690 static void
2691 vect_bb_slp_scalar_cost (basic_block bb,
2692 slp_tree node, vec<bool, va_heap> *life,
2693 stmt_vector_for_cost *cost_vec,
2694 hash_set<slp_tree> &visited)
2696 unsigned i;
2697 stmt_vec_info stmt_info;
2698 slp_tree child;
2700 if (visited.add (node))
2701 return;
2703 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2705 gimple *stmt = stmt_info->stmt;
2706 vec_info *vinfo = stmt_info->vinfo;
2707 ssa_op_iter op_iter;
2708 def_operand_p def_p;
2710 if ((*life)[i])
2711 continue;
2713 /* If there is a non-vectorized use of the defs then the scalar
2714 stmt is kept live in which case we do not account it or any
2715 required defs in the SLP children in the scalar cost. This
2716 way we make the vectorization more costly when compared to
2717 the scalar cost. */
2718 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2720 imm_use_iterator use_iter;
2721 gimple *use_stmt;
2722 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2723 if (!is_gimple_debug (use_stmt))
2725 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2726 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2728 (*life)[i] = true;
2729 BREAK_FROM_IMM_USE_STMT (use_iter);
2733 if ((*life)[i])
2734 continue;
2736 /* Count scalar stmts only once. */
2737 if (gimple_visited_p (stmt))
2738 continue;
2739 gimple_set_visited (stmt, true);
2741 vect_cost_for_stmt kind;
2742 if (STMT_VINFO_DATA_REF (stmt_info))
2744 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2745 kind = scalar_load;
2746 else
2747 kind = scalar_store;
2749 else
2750 kind = scalar_stmt;
2751 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2754 auto_vec<bool, 20> subtree_life;
2755 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2757 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2759 /* Do not directly pass LIFE to the recursive call, copy it to
2760 confine changes in the callee to the current child/subtree. */
2761 subtree_life.safe_splice (*life);
2762 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2763 visited);
2764 subtree_life.truncate (0);
2769 static void
2770 vect_bb_slp_scalar_cost (basic_block bb,
2771 slp_tree node, vec<bool, va_heap> *life,
2772 stmt_vector_for_cost *cost_vec)
2774 hash_set<slp_tree> visited;
2775 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2778 /* Check if vectorization of the basic block is profitable. */
2780 static bool
2781 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2783 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2784 slp_instance instance;
2785 int i;
2786 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2787 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2789 /* Calculate scalar cost. */
2790 stmt_vector_for_cost scalar_costs;
2791 scalar_costs.create (0);
2792 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2794 auto_vec<bool, 20> life;
2795 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2796 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2797 SLP_INSTANCE_TREE (instance),
2798 &life, &scalar_costs);
2800 void *target_cost_data = init_cost (NULL);
2801 add_stmt_costs (target_cost_data, &scalar_costs);
2802 scalar_costs.release ();
2803 unsigned dummy;
2804 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2805 destroy_cost_data (target_cost_data);
2807 /* Unset visited flag. */
2808 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2809 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2810 gimple_set_visited (gsi_stmt (gsi), false);
2812 /* Complete the target-specific cost calculation. */
2813 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2814 &vec_inside_cost, &vec_epilogue_cost);
2816 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2818 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2821 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2822 vec_inside_cost);
2823 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2824 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2825 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2828 /* Vectorization is profitable if its cost is more than the cost of scalar
2829 version. Note that we err on the vector side for equal cost because
2830 the cost estimate is otherwise quite pessimistic (constant uses are
2831 free on the scalar side but cost a load on the vector side for
2832 example). */
2833 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2834 return false;
2836 return true;
2839 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2840 if so and sets fatal to true if failure is independent of
2841 current_vector_size. */
2843 static bb_vec_info
2844 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2845 gimple_stmt_iterator region_end,
2846 vec<data_reference_p> datarefs, int n_stmts,
2847 bool &fatal, vec_info_shared *shared)
2849 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2851 bb_vec_info bb_vinfo;
2852 slp_instance instance;
2853 int i;
2854 poly_uint64 min_vf = 2;
2856 /* The first group of checks is independent of the vector size. */
2857 fatal = true;
2859 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2861 if (dump_enabled_p ())
2862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2863 "not vectorized: too many instructions in "
2864 "basic block.\n");
2865 free_data_refs (datarefs);
2866 return NULL;
2869 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2870 if (!bb_vinfo)
2871 return NULL;
2873 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2874 bb_vinfo->shared->save_datarefs ();
2876 /* Analyze the data references. */
2878 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
2880 if (dump_enabled_p ())
2881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2882 "not vectorized: unhandled data-ref in basic "
2883 "block.\n");
2885 delete bb_vinfo;
2886 return NULL;
2889 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "not vectorized: not enough data-refs in "
2894 "basic block.\n");
2896 delete bb_vinfo;
2897 return NULL;
2900 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2902 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2904 "not vectorized: unhandled data access in "
2905 "basic block.\n");
2907 delete bb_vinfo;
2908 return NULL;
2911 /* If there are no grouped stores in the region there is no need
2912 to continue with pattern recog as vect_analyze_slp will fail
2913 anyway. */
2914 if (bb_vinfo->grouped_stores.is_empty ())
2916 if (dump_enabled_p ())
2917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2918 "not vectorized: no grouped stores in "
2919 "basic block.\n");
2921 delete bb_vinfo;
2922 return NULL;
2925 /* While the rest of the analysis below depends on it in some way. */
2926 fatal = false;
2928 vect_pattern_recog (bb_vinfo);
2930 /* Check the SLP opportunities in the basic block, analyze and build SLP
2931 trees. */
2932 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2934 if (dump_enabled_p ())
2936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2937 "Failed to SLP the basic block.\n");
2938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2939 "not vectorized: failed to find SLP opportunities "
2940 "in basic block.\n");
2943 delete bb_vinfo;
2944 return NULL;
2947 vect_record_base_alignments (bb_vinfo);
2949 /* Analyze and verify the alignment of data references and the
2950 dependence in the SLP instances. */
2951 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2953 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2954 || ! vect_slp_analyze_instance_dependence (instance))
2956 slp_tree node = SLP_INSTANCE_TREE (instance);
2957 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2958 if (dump_enabled_p ())
2959 dump_printf_loc (MSG_NOTE, vect_location,
2960 "removing SLP instance operations starting from: %G",
2961 stmt_info->stmt);
2962 vect_free_slp_instance (instance, false);
2963 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2964 continue;
2967 /* Mark all the statements that we want to vectorize as pure SLP and
2968 relevant. */
2969 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2970 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2972 i++;
2974 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2976 delete bb_vinfo;
2977 return NULL;
2980 if (!vect_slp_analyze_operations (bb_vinfo))
2982 if (dump_enabled_p ())
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2984 "not vectorized: bad operation in basic block.\n");
2986 delete bb_vinfo;
2987 return NULL;
2990 /* Cost model: check if the vectorization is worthwhile. */
2991 if (!unlimited_cost_model (NULL)
2992 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2994 if (dump_enabled_p ())
2995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2996 "not vectorized: vectorization is not "
2997 "profitable.\n");
2999 delete bb_vinfo;
3000 return NULL;
3003 if (dump_enabled_p ())
3004 dump_printf_loc (MSG_NOTE, vect_location,
3005 "Basic block will be vectorized using SLP\n");
3007 return bb_vinfo;
3011 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3012 true if anything in the basic-block was vectorized. */
3014 bool
3015 vect_slp_bb (basic_block bb)
3017 bb_vec_info bb_vinfo;
3018 gimple_stmt_iterator gsi;
3019 bool any_vectorized = false;
3020 auto_vector_sizes vector_sizes;
3022 /* Autodetect first vector size we try. */
3023 current_vector_size = 0;
3024 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
3025 unsigned int next_size = 0;
3027 gsi = gsi_start_bb (bb);
3029 poly_uint64 autodetected_vector_size = 0;
3030 while (1)
3032 if (gsi_end_p (gsi))
3033 break;
3035 gimple_stmt_iterator region_begin = gsi;
3036 vec<data_reference_p> datarefs = vNULL;
3037 int insns = 0;
3039 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3041 gimple *stmt = gsi_stmt (gsi);
3042 if (is_gimple_debug (stmt))
3043 continue;
3044 insns++;
3046 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3047 vect_location = stmt;
3049 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3050 break;
3053 /* Skip leading unhandled stmts. */
3054 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3056 gsi_next (&gsi);
3057 continue;
3060 gimple_stmt_iterator region_end = gsi;
3062 bool vectorized = false;
3063 bool fatal = false;
3064 vec_info_shared shared;
3065 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3066 datarefs, insns, fatal, &shared);
3067 if (bb_vinfo
3068 && dbg_cnt (vect_slp))
3070 if (dump_enabled_p ())
3071 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3073 bb_vinfo->shared->check_datarefs ();
3074 vect_schedule_slp (bb_vinfo);
3076 unsigned HOST_WIDE_INT bytes;
3077 if (dump_enabled_p ())
3079 if (current_vector_size.is_constant (&bytes))
3080 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3081 "basic block part vectorized using %wu byte "
3082 "vectors\n", bytes);
3083 else
3084 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3085 "basic block part vectorized using variable "
3086 "length vectors\n");
3089 vectorized = true;
3091 delete bb_vinfo;
3093 any_vectorized |= vectorized;
3095 if (next_size == 0)
3096 autodetected_vector_size = current_vector_size;
3098 if (next_size < vector_sizes.length ()
3099 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3100 next_size += 1;
3102 if (vectorized
3103 || next_size == vector_sizes.length ()
3104 || known_eq (current_vector_size, 0U)
3105 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3106 vector sizes will fail do not bother iterating. */
3107 || fatal)
3109 if (gsi_end_p (region_end))
3110 break;
3112 /* Skip the unhandled stmt. */
3113 gsi_next (&gsi);
3115 /* And reset vector sizes. */
3116 current_vector_size = 0;
3117 next_size = 0;
3119 else
3121 /* Try the next biggest vector size. */
3122 current_vector_size = vector_sizes[next_size++];
3123 if (dump_enabled_p ())
3125 dump_printf_loc (MSG_NOTE, vect_location,
3126 "***** Re-trying analysis with "
3127 "vector size ");
3128 dump_dec (MSG_NOTE, current_vector_size);
3129 dump_printf (MSG_NOTE, "\n");
3132 /* Start over. */
3133 gsi = region_begin;
3137 return any_vectorized;
3141 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3143 static bool
3144 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
3146 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3147 tree op, vectype;
3148 enum vect_def_type dt;
3150 /* For comparison and COND_EXPR type is chosen depending
3151 on the non-constant other comparison operand. */
3152 if (TREE_CODE_CLASS (code) == tcc_comparison)
3154 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3155 op = gimple_assign_rhs1 (stmt);
3157 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3158 gcc_unreachable ();
3160 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3163 if (code == COND_EXPR)
3165 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3166 tree cond = gimple_assign_rhs1 (stmt);
3168 if (TREE_CODE (cond) == SSA_NAME)
3169 op = cond;
3170 else
3171 op = TREE_OPERAND (cond, 0);
3173 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3174 gcc_unreachable ();
3176 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3179 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3182 /* Build a variable-length vector in which the elements in ELTS are repeated
3183 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3184 RESULTS and add any new instructions to SEQ.
3186 The approach we use is:
3188 (1) Find a vector mode VM with integer elements of mode IM.
3190 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3191 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3192 from small vectors to IM.
3194 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3196 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3197 correct byte contents.
3199 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3201 We try to find the largest IM for which this sequence works, in order
3202 to cut down on the number of interleaves. */
3204 void
3205 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3206 unsigned int nresults, vec<tree> &results)
3208 unsigned int nelts = elts.length ();
3209 tree element_type = TREE_TYPE (vector_type);
3211 /* (1) Find a vector mode VM with integer elements of mode IM. */
3212 unsigned int nvectors = 1;
3213 tree new_vector_type;
3214 tree permutes[2];
3215 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3216 &nvectors, &new_vector_type,
3217 permutes))
3218 gcc_unreachable ();
3220 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3221 unsigned int partial_nelts = nelts / nvectors;
3222 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3224 tree_vector_builder partial_elts;
3225 auto_vec<tree, 32> pieces (nvectors * 2);
3226 pieces.quick_grow (nvectors * 2);
3227 for (unsigned int i = 0; i < nvectors; ++i)
3229 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3230 ELTS' has mode IM. */
3231 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3232 for (unsigned int j = 0; j < partial_nelts; ++j)
3233 partial_elts.quick_push (elts[i * partial_nelts + j]);
3234 tree t = gimple_build_vector (seq, &partial_elts);
3235 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3236 TREE_TYPE (new_vector_type), t);
3238 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3239 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3242 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3243 correct byte contents.
3245 We need to repeat the following operation log2(nvectors) times:
3247 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3248 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3250 However, if each input repeats every N elements and the VF is
3251 a multiple of N * 2, the HI result is the same as the LO. */
3252 unsigned int in_start = 0;
3253 unsigned int out_start = nvectors;
3254 unsigned int hi_start = nvectors / 2;
3255 /* A bound on the number of outputs needed to produce NRESULTS results
3256 in the final iteration. */
3257 unsigned int noutputs_bound = nvectors * nresults;
3258 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3260 noutputs_bound /= 2;
3261 unsigned int limit = MIN (noutputs_bound, nvectors);
3262 for (unsigned int i = 0; i < limit; ++i)
3264 if ((i & 1) != 0
3265 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3266 2 * in_repeat))
3268 pieces[out_start + i] = pieces[out_start + i - 1];
3269 continue;
3272 tree output = make_ssa_name (new_vector_type);
3273 tree input1 = pieces[in_start + (i / 2)];
3274 tree input2 = pieces[in_start + (i / 2) + hi_start];
3275 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3276 input1, input2,
3277 permutes[i & 1]);
3278 gimple_seq_add_stmt (seq, stmt);
3279 pieces[out_start + i] = output;
3281 std::swap (in_start, out_start);
3284 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3285 results.reserve (nresults);
3286 for (unsigned int i = 0; i < nresults; ++i)
3287 if (i < nvectors)
3288 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3289 pieces[in_start + i]));
3290 else
3291 results.quick_push (results[i - nvectors]);
3295 /* For constant and loop invariant defs of SLP_NODE this function returns
3296 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3297 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3298 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3299 REDUC_INDEX is the index of the reduction operand in the statements, unless
3300 it is -1. */
3302 static void
3303 vect_get_constant_vectors (tree op, slp_tree slp_node,
3304 vec<tree> *vec_oprnds,
3305 unsigned int op_num, unsigned int number_of_vectors)
3307 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3308 stmt_vec_info stmt_vinfo = stmts[0];
3309 gimple *stmt = stmt_vinfo->stmt;
3310 unsigned HOST_WIDE_INT nunits;
3311 tree vec_cst;
3312 unsigned j, number_of_places_left_in_vector;
3313 tree vector_type;
3314 tree vop;
3315 int group_size = stmts.length ();
3316 unsigned int vec_num, i;
3317 unsigned number_of_copies = 1;
3318 vec<tree> voprnds;
3319 voprnds.create (number_of_vectors);
3320 bool constant_p, is_store;
3321 tree neutral_op = NULL;
3322 enum tree_code code = gimple_expr_code (stmt);
3323 gimple_seq ctor_seq = NULL;
3324 auto_vec<tree, 16> permute_results;
3326 /* Check if vector type is a boolean vector. */
3327 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3328 && vect_mask_constant_operand_p (stmt_vinfo))
3329 vector_type
3330 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3331 else
3332 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3334 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3336 is_store = true;
3337 op = gimple_assign_rhs1 (stmt);
3339 else
3340 is_store = false;
3342 gcc_assert (op);
3344 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3345 created vectors. It is greater than 1 if unrolling is performed.
3347 For example, we have two scalar operands, s1 and s2 (e.g., group of
3348 strided accesses of size two), while NUNITS is four (i.e., four scalars
3349 of this type can be packed in a vector). The output vector will contain
3350 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3351 will be 2).
3353 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3354 containing the operands.
3356 For example, NUNITS is four as before, and the group size is 8
3357 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3358 {s5, s6, s7, s8}. */
3360 /* When using duplicate_and_interleave, we just need one element for
3361 each scalar statement. */
3362 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3363 nunits = group_size;
3365 number_of_copies = nunits * number_of_vectors / group_size;
3367 number_of_places_left_in_vector = nunits;
3368 constant_p = true;
3369 tree_vector_builder elts (vector_type, nunits, 1);
3370 elts.quick_grow (nunits);
3371 bool place_after_defs = false;
3372 for (j = 0; j < number_of_copies; j++)
3374 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3376 stmt = stmt_vinfo->stmt;
3377 if (is_store)
3378 op = gimple_assign_rhs1 (stmt);
3379 else
3381 switch (code)
3383 case COND_EXPR:
3385 tree cond = gimple_assign_rhs1 (stmt);
3386 if (TREE_CODE (cond) == SSA_NAME)
3387 op = gimple_op (stmt, op_num + 1);
3388 else if (op_num == 0 || op_num == 1)
3389 op = TREE_OPERAND (cond, op_num);
3390 else
3392 if (op_num == 2)
3393 op = gimple_assign_rhs2 (stmt);
3394 else
3395 op = gimple_assign_rhs3 (stmt);
3398 break;
3400 case CALL_EXPR:
3401 op = gimple_call_arg (stmt, op_num);
3402 break;
3404 case LSHIFT_EXPR:
3405 case RSHIFT_EXPR:
3406 case LROTATE_EXPR:
3407 case RROTATE_EXPR:
3408 op = gimple_op (stmt, op_num + 1);
3409 /* Unlike the other binary operators, shifts/rotates have
3410 the shift count being int, instead of the same type as
3411 the lhs, so make sure the scalar is the right type if
3412 we are dealing with vectors of
3413 long long/long/short/char. */
3414 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3415 op = fold_convert (TREE_TYPE (vector_type), op);
3416 break;
3418 default:
3419 op = gimple_op (stmt, op_num + 1);
3420 break;
3424 /* Create 'vect_ = {op0,op1,...,opn}'. */
3425 number_of_places_left_in_vector--;
3426 tree orig_op = op;
3427 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3429 if (CONSTANT_CLASS_P (op))
3431 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3433 /* Can't use VIEW_CONVERT_EXPR for booleans because
3434 of possibly different sizes of scalar value and
3435 vector element. */
3436 if (integer_zerop (op))
3437 op = build_int_cst (TREE_TYPE (vector_type), 0);
3438 else if (integer_onep (op))
3439 op = build_all_ones_cst (TREE_TYPE (vector_type));
3440 else
3441 gcc_unreachable ();
3443 else
3444 op = fold_unary (VIEW_CONVERT_EXPR,
3445 TREE_TYPE (vector_type), op);
3446 gcc_assert (op && CONSTANT_CLASS_P (op));
3448 else
3450 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3451 gimple *init_stmt;
3452 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3454 tree true_val
3455 = build_all_ones_cst (TREE_TYPE (vector_type));
3456 tree false_val
3457 = build_zero_cst (TREE_TYPE (vector_type));
3458 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3459 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3460 op, true_val,
3461 false_val);
3463 else
3465 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3466 op);
3467 init_stmt
3468 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3469 op);
3471 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3472 op = new_temp;
3475 elts[number_of_places_left_in_vector] = op;
3476 if (!CONSTANT_CLASS_P (op))
3477 constant_p = false;
3478 if (TREE_CODE (orig_op) == SSA_NAME
3479 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3480 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3481 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3482 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3483 place_after_defs = true;
3485 if (number_of_places_left_in_vector == 0)
3487 if (constant_p
3488 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3489 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3490 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3491 else
3493 if (vec_oprnds->is_empty ())
3494 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3495 number_of_vectors,
3496 permute_results);
3497 vec_cst = permute_results[number_of_vectors - j - 1];
3499 tree init;
3500 gimple_stmt_iterator gsi;
3501 if (place_after_defs)
3503 stmt_vec_info last_stmt_info
3504 = vect_find_last_scalar_stmt_in_slp (slp_node);
3505 gsi = gsi_for_stmt (last_stmt_info->stmt);
3506 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3507 &gsi);
3509 else
3510 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3511 NULL);
3512 if (ctor_seq != NULL)
3514 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3515 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3516 ctor_seq = NULL;
3518 voprnds.quick_push (init);
3519 place_after_defs = false;
3520 number_of_places_left_in_vector = nunits;
3521 constant_p = true;
3522 elts.new_vector (vector_type, nunits, 1);
3523 elts.quick_grow (nunits);
3528 /* Since the vectors are created in the reverse order, we should invert
3529 them. */
3530 vec_num = voprnds.length ();
3531 for (j = vec_num; j != 0; j--)
3533 vop = voprnds[j - 1];
3534 vec_oprnds->quick_push (vop);
3537 voprnds.release ();
3539 /* In case that VF is greater than the unrolling factor needed for the SLP
3540 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3541 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3542 to replicate the vectors. */
3543 while (number_of_vectors > vec_oprnds->length ())
3545 tree neutral_vec = NULL;
3547 if (neutral_op)
3549 if (!neutral_vec)
3550 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3552 vec_oprnds->quick_push (neutral_vec);
3554 else
3556 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3557 vec_oprnds->quick_push (vop);
3563 /* Get vectorized definitions from SLP_NODE that contains corresponding
3564 vectorized def-stmts. */
3566 static void
3567 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3569 tree vec_oprnd;
3570 stmt_vec_info vec_def_stmt_info;
3571 unsigned int i;
3573 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3575 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3577 gcc_assert (vec_def_stmt_info);
3578 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3579 vec_oprnd = gimple_phi_result (vec_def_phi);
3580 else
3581 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3582 vec_oprnds->quick_push (vec_oprnd);
3587 /* Get vectorized definitions for SLP_NODE.
3588 If the scalar definitions are loop invariants or constants, collect them and
3589 call vect_get_constant_vectors() to create vector stmts.
3590 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3591 must be stored in the corresponding child of SLP_NODE, and we call
3592 vect_get_slp_vect_defs () to retrieve them. */
3594 void
3595 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3596 vec<vec<tree> > *vec_oprnds)
3598 int number_of_vects = 0, i;
3599 unsigned int child_index = 0;
3600 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3601 slp_tree child = NULL;
3602 vec<tree> vec_defs;
3603 tree oprnd;
3604 bool vectorized_defs;
3606 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3607 FOR_EACH_VEC_ELT (ops, i, oprnd)
3609 /* For each operand we check if it has vectorized definitions in a child
3610 node or we need to create them (for invariants and constants). We
3611 check if the LHS of the first stmt of the next child matches OPRND.
3612 If it does, we found the correct child. Otherwise, we call
3613 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3614 to check this child node for the next operand. */
3615 vectorized_defs = false;
3616 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3618 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3620 /* We have to check both pattern and original def, if available. */
3621 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3623 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3624 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3625 tree first_def_op;
3627 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3628 first_def_op = gimple_phi_result (first_def);
3629 else
3630 first_def_op = gimple_get_lhs (first_def_info->stmt);
3631 if (operand_equal_p (oprnd, first_def_op, 0)
3632 || (related
3633 && operand_equal_p (oprnd,
3634 gimple_get_lhs (related->stmt), 0)))
3636 /* The number of vector defs is determined by the number of
3637 vector statements in the node from which we get those
3638 statements. */
3639 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3640 vectorized_defs = true;
3641 child_index++;
3644 else
3645 child_index++;
3648 if (!vectorized_defs)
3650 if (i == 0)
3652 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3653 /* Number of vector stmts was calculated according to LHS in
3654 vect_schedule_slp_instance (), fix it by replacing LHS with
3655 RHS, if necessary. See vect_get_smallest_scalar_type () for
3656 details. */
3657 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3658 &rhs_size_unit);
3659 if (rhs_size_unit != lhs_size_unit)
3661 number_of_vects *= rhs_size_unit;
3662 number_of_vects /= lhs_size_unit;
3667 /* Allocate memory for vectorized defs. */
3668 vec_defs = vNULL;
3669 vec_defs.create (number_of_vects);
3671 /* For reduction defs we call vect_get_constant_vectors (), since we are
3672 looking for initial loop invariant values. */
3673 if (vectorized_defs)
3674 /* The defs are already vectorized. */
3675 vect_get_slp_vect_defs (child, &vec_defs);
3676 else
3677 /* Build vectors from scalar defs. */
3678 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3679 number_of_vects);
3681 vec_oprnds->quick_push (vec_defs);
3685 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3686 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3687 permute statements for the SLP node NODE of the SLP instance
3688 SLP_NODE_INSTANCE. */
3690 bool
3691 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3692 gimple_stmt_iterator *gsi, poly_uint64 vf,
3693 slp_instance slp_node_instance, bool analyze_only,
3694 unsigned *n_perms)
3696 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3697 vec_info *vinfo = stmt_info->vinfo;
3698 int vec_index = 0;
3699 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3700 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3701 unsigned int mask_element;
3702 machine_mode mode;
3704 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3705 return false;
3707 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3709 mode = TYPE_MODE (vectype);
3710 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3712 /* Initialize the vect stmts of NODE to properly insert the generated
3713 stmts later. */
3714 if (! analyze_only)
3715 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3716 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3717 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3719 /* Generate permutation masks for every NODE. Number of masks for each NODE
3720 is equal to GROUP_SIZE.
3721 E.g., we have a group of three nodes with three loads from the same
3722 location in each node, and the vector size is 4. I.e., we have a
3723 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3724 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3725 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3728 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3729 The last mask is illegal since we assume two operands for permute
3730 operation, and the mask element values can't be outside that range.
3731 Hence, the last mask must be converted into {2,5,5,5}.
3732 For the first two permutations we need the first and the second input
3733 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3734 we need the second and the third vectors: {b1,c1,a2,b2} and
3735 {c2,a3,b3,c3}. */
3737 int vect_stmts_counter = 0;
3738 unsigned int index = 0;
3739 int first_vec_index = -1;
3740 int second_vec_index = -1;
3741 bool noop_p = true;
3742 *n_perms = 0;
3744 vec_perm_builder mask;
3745 unsigned int nelts_to_build;
3746 unsigned int nvectors_per_build;
3747 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3748 && multiple_p (nunits, group_size));
3749 if (repeating_p)
3751 /* A single vector contains a whole number of copies of the node, so:
3752 (a) all permutes can use the same mask; and
3753 (b) the permutes only need a single vector input. */
3754 mask.new_vector (nunits, group_size, 3);
3755 nelts_to_build = mask.encoded_nelts ();
3756 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3758 else
3760 /* We need to construct a separate mask for each vector statement. */
3761 unsigned HOST_WIDE_INT const_nunits, const_vf;
3762 if (!nunits.is_constant (&const_nunits)
3763 || !vf.is_constant (&const_vf))
3764 return false;
3765 mask.new_vector (const_nunits, const_nunits, 1);
3766 nelts_to_build = const_vf * group_size;
3767 nvectors_per_build = 1;
3770 unsigned int count = mask.encoded_nelts ();
3771 mask.quick_grow (count);
3772 vec_perm_indices indices;
3774 for (unsigned int j = 0; j < nelts_to_build; j++)
3776 unsigned int iter_num = j / group_size;
3777 unsigned int stmt_num = j % group_size;
3778 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3779 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3780 if (repeating_p)
3782 first_vec_index = 0;
3783 mask_element = i;
3785 else
3787 /* Enforced before the loop when !repeating_p. */
3788 unsigned int const_nunits = nunits.to_constant ();
3789 vec_index = i / const_nunits;
3790 mask_element = i % const_nunits;
3791 if (vec_index == first_vec_index
3792 || first_vec_index == -1)
3794 first_vec_index = vec_index;
3796 else if (vec_index == second_vec_index
3797 || second_vec_index == -1)
3799 second_vec_index = vec_index;
3800 mask_element += const_nunits;
3802 else
3804 if (dump_enabled_p ())
3805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3806 "permutation requires at "
3807 "least three vectors %G",
3808 stmt_info->stmt);
3809 gcc_assert (analyze_only);
3810 return false;
3813 gcc_assert (mask_element < 2 * const_nunits);
3816 if (mask_element != index)
3817 noop_p = false;
3818 mask[index++] = mask_element;
3820 if (index == count && !noop_p)
3822 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3823 if (!can_vec_perm_const_p (mode, indices))
3825 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3828 vect_location,
3829 "unsupported vect permute { ");
3830 for (i = 0; i < count; ++i)
3832 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3833 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3835 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3837 gcc_assert (analyze_only);
3838 return false;
3841 ++*n_perms;
3844 if (index == count)
3846 if (!analyze_only)
3848 tree mask_vec = NULL_TREE;
3850 if (! noop_p)
3851 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3853 if (second_vec_index == -1)
3854 second_vec_index = first_vec_index;
3856 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3858 /* Generate the permute statement if necessary. */
3859 tree first_vec = dr_chain[first_vec_index + ri];
3860 tree second_vec = dr_chain[second_vec_index + ri];
3861 stmt_vec_info perm_stmt_info;
3862 if (! noop_p)
3864 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3865 tree perm_dest
3866 = vect_create_destination_var (gimple_assign_lhs (stmt),
3867 vectype);
3868 perm_dest = make_ssa_name (perm_dest);
3869 gassign *perm_stmt
3870 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3871 first_vec, second_vec,
3872 mask_vec);
3873 perm_stmt_info
3874 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3875 gsi);
3877 else
3878 /* If mask was NULL_TREE generate the requested
3879 identity transform. */
3880 perm_stmt_info = vinfo->lookup_def (first_vec);
3882 /* Store the vector statement in NODE. */
3883 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3884 = perm_stmt_info;
3888 index = 0;
3889 first_vec_index = -1;
3890 second_vec_index = -1;
3891 noop_p = true;
3895 return true;
3898 /* Vectorize SLP instance tree in postorder. */
3900 static void
3901 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3902 scalar_stmts_to_slp_tree_map_t *bst_map)
3904 gimple_stmt_iterator si;
3905 stmt_vec_info stmt_info;
3906 unsigned int group_size;
3907 tree vectype;
3908 int i, j;
3909 slp_tree child;
3911 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3912 return;
3914 /* See if we have already vectorized the node in the graph of the
3915 SLP instance. */
3916 if (SLP_TREE_VEC_STMTS (node).exists ())
3917 return;
3919 /* See if we have already vectorized the same set of stmts and reuse their
3920 vectorized stmts across instances. */
3921 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3923 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3924 return;
3927 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3928 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3929 vect_schedule_slp_instance (child, instance, bst_map);
3931 /* Push SLP node def-type to stmts. */
3932 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3933 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3935 stmt_vec_info child_stmt_info;
3936 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3937 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3940 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3942 /* VECTYPE is the type of the destination. */
3943 vectype = STMT_VINFO_VECTYPE (stmt_info);
3944 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3945 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3947 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3948 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3950 if (dump_enabled_p ())
3951 dump_printf_loc (MSG_NOTE, vect_location,
3952 "------>vectorizing SLP node starting from: %G",
3953 stmt_info->stmt);
3955 /* Vectorized stmts go before the last scalar stmt which is where
3956 all uses are ready. */
3957 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3958 si = gsi_for_stmt (last_stmt_info->stmt);
3960 /* Mark the first element of the reduction chain as reduction to properly
3961 transform the node. In the analysis phase only the last element of the
3962 chain is marked as reduction. */
3963 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3964 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3965 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3967 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3968 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3971 /* Handle two-operation SLP nodes by vectorizing the group with
3972 both operations and then performing a merge. */
3973 if (SLP_TREE_TWO_OPERATORS (node))
3975 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3976 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3977 enum tree_code ocode = ERROR_MARK;
3978 stmt_vec_info ostmt_info;
3979 vec_perm_builder mask (group_size, group_size, 1);
3980 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3982 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3983 if (gimple_assign_rhs_code (ostmt) != code0)
3985 mask.quick_push (1);
3986 ocode = gimple_assign_rhs_code (ostmt);
3988 else
3989 mask.quick_push (0);
3991 if (ocode != ERROR_MARK)
3993 vec<stmt_vec_info> v0;
3994 vec<stmt_vec_info> v1;
3995 unsigned j;
3996 tree tmask = NULL_TREE;
3997 vect_transform_stmt (stmt_info, &si, node, instance);
3998 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3999 SLP_TREE_VEC_STMTS (node).truncate (0);
4000 gimple_assign_set_rhs_code (stmt, ocode);
4001 vect_transform_stmt (stmt_info, &si, node, instance);
4002 gimple_assign_set_rhs_code (stmt, code0);
4003 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4004 SLP_TREE_VEC_STMTS (node).truncate (0);
4005 tree meltype = build_nonstandard_integer_type
4006 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4007 tree mvectype = get_same_sized_vectype (meltype, vectype);
4008 unsigned k = 0, l;
4009 for (j = 0; j < v0.length (); ++j)
4011 /* Enforced by vect_build_slp_tree, which rejects variable-length
4012 vectors for SLP_TREE_TWO_OPERATORS. */
4013 unsigned int const_nunits = nunits.to_constant ();
4014 tree_vector_builder melts (mvectype, const_nunits, 1);
4015 for (l = 0; l < const_nunits; ++l)
4017 if (k >= group_size)
4018 k = 0;
4019 tree t = build_int_cst (meltype,
4020 mask[k++] * const_nunits + l);
4021 melts.quick_push (t);
4023 tmask = melts.build ();
4025 /* ??? Not all targets support a VEC_PERM_EXPR with a
4026 constant mask that would translate to a vec_merge RTX
4027 (with their vec_perm_const_ok). We can either not
4028 vectorize in that case or let veclower do its job.
4029 Unfortunately that isn't too great and at least for
4030 plus/minus we'd eventually like to match targets
4031 vector addsub instructions. */
4032 gimple *vstmt;
4033 vstmt = gimple_build_assign (make_ssa_name (vectype),
4034 VEC_PERM_EXPR,
4035 gimple_assign_lhs (v0[j]->stmt),
4036 gimple_assign_lhs (v1[j]->stmt),
4037 tmask);
4038 SLP_TREE_VEC_STMTS (node).quick_push
4039 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4041 v0.release ();
4042 v1.release ();
4043 return;
4046 vect_transform_stmt (stmt_info, &si, node, instance);
4048 /* Restore stmt def-types. */
4049 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4050 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4052 stmt_vec_info child_stmt_info;
4053 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4054 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4058 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4059 For loop vectorization this is done in vectorizable_call, but for SLP
4060 it needs to be deferred until end of vect_schedule_slp, because multiple
4061 SLP instances may refer to the same scalar stmt. */
4063 static void
4064 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4066 gimple *new_stmt;
4067 gimple_stmt_iterator gsi;
4068 int i;
4069 slp_tree child;
4070 tree lhs;
4071 stmt_vec_info stmt_info;
4073 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4074 return;
4076 if (visited.add (node))
4077 return;
4079 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4080 vect_remove_slp_scalar_calls (child, visited);
4082 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4084 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4085 if (!stmt || gimple_bb (stmt) == NULL)
4086 continue;
4087 if (is_pattern_stmt_p (stmt_info)
4088 || !PURE_SLP_STMT (stmt_info))
4089 continue;
4090 lhs = gimple_call_lhs (stmt);
4091 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4092 gsi = gsi_for_stmt (stmt);
4093 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4094 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4098 static void
4099 vect_remove_slp_scalar_calls (slp_tree node)
4101 hash_set<slp_tree> visited;
4102 vect_remove_slp_scalar_calls (node, visited);
4105 /* Generate vector code for all SLP instances in the loop/basic block. */
4107 void
4108 vect_schedule_slp (vec_info *vinfo)
4110 vec<slp_instance> slp_instances;
4111 slp_instance instance;
4112 unsigned int i;
4114 scalar_stmts_to_slp_tree_map_t *bst_map
4115 = new scalar_stmts_to_slp_tree_map_t ();
4116 slp_instances = vinfo->slp_instances;
4117 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4119 /* Schedule the tree of INSTANCE. */
4120 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4121 instance, bst_map);
4122 if (dump_enabled_p ())
4123 dump_printf_loc (MSG_NOTE, vect_location,
4124 "vectorizing stmts using SLP.\n");
4126 delete bst_map;
4128 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4130 slp_tree root = SLP_INSTANCE_TREE (instance);
4131 stmt_vec_info store_info;
4132 unsigned int j;
4134 /* Remove scalar call stmts. Do not do this for basic-block
4135 vectorization as not all uses may be vectorized.
4136 ??? Why should this be necessary? DCE should be able to
4137 remove the stmts itself.
4138 ??? For BB vectorization we can as well remove scalar
4139 stmts starting from the SLP tree root if they have no
4140 uses. */
4141 if (is_a <loop_vec_info> (vinfo))
4142 vect_remove_slp_scalar_calls (root);
4144 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4145 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4147 if (!STMT_VINFO_DATA_REF (store_info))
4148 break;
4150 store_info = vect_orig_stmt (store_info);
4151 /* Free the attached stmt_vec_info and remove the stmt. */
4152 vinfo->remove_stmt (store_info);