Avoid is_constant calls in vectorizable_bswap
[official-gcc.git] / gcc / tree-vect-slp.c
blob0ab7bd8086cc86b70830a93e88508ca7d02698b9
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
104 slp_tree node;
105 stmt_vec_info stmt_info = scalar_stmts[0];
106 unsigned int nops;
108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else if (is_a <gphi *> (stmt_info->stmt))
117 nops = 0;
118 else
119 return NULL;
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
134 return node;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec<stmt_vec_info> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
176 return oprnds_info;
180 /* Free operands info. */
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 int i;
186 slp_oprnd_info oprnd_info;
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
194 oprnds_info.release ();
198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
200 of the chain. */
203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
204 stmt_vec_info first_stmt_info)
206 stmt_vec_info next_stmt_info = first_stmt_info;
207 int result = 0;
209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
210 return -1;
214 if (next_stmt_info == stmt_info)
215 return result;
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
220 while (next_stmt_info);
222 return -1;
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
231 bool
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
278 return true;
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
300 static int
301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
302 vec<stmt_vec_info> stmts, unsigned stmt_num,
303 vec<slp_oprnd_info> *oprnds_info)
305 stmt_vec_info stmt_info = stmts[stmt_num];
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
308 enum vect_def_type dt = vect_uninitialized_def;
309 bool pattern = false;
310 slp_oprnd_info oprnd_info;
311 int first_op_idx = 1;
312 unsigned int commutative_op = -1U;
313 bool first_op_cond = false;
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
319 number_of_oprnds = gimple_call_num_args (stmt);
320 first_op_idx = 3;
321 if (gimple_call_internal_p (stmt))
323 internal_fn ifn = gimple_call_internal_fn (stmt);
324 commutative_op = first_commutative_argument (ifn);
327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
329 enum tree_code code = gimple_assign_rhs_code (stmt);
330 number_of_oprnds = gimple_num_ops (stmt) - 1;
331 /* Swap can only be done for cond_expr if asked to, otherwise we
332 could result in different comparison code to the first stmt. */
333 if (gimple_assign_rhs_code (stmt) == COND_EXPR
334 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
336 first_op_cond = true;
337 number_of_oprnds++;
339 else
340 commutative_op = commutative_tree_code (code) ? 0U : -1U;
342 else
343 return -1;
345 bool swapped = (*swap != 0);
346 gcc_assert (!swapped || first_op_cond);
347 for (i = 0; i < number_of_oprnds; i++)
349 again:
350 if (first_op_cond)
352 /* Map indicating how operands of cond_expr should be swapped. */
353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
354 int *map = maps[*swap];
356 if (i < 2)
357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
358 first_op_idx), map[i]);
359 else
360 oprnd = gimple_op (stmt_info->stmt, map[i]);
362 else
363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
365 oprnd_info = (*oprnds_info)[i];
367 stmt_vec_info def_stmt_info;
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
370 if (dump_enabled_p ())
372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
373 "Build SLP failed: can't analyze def for ");
374 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
375 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
378 return -1;
381 if (second)
382 oprnd_info->second_pattern = pattern;
384 if (first)
386 oprnd_info->first_dt = dt;
387 oprnd_info->first_pattern = pattern;
388 oprnd_info->first_op_type = TREE_TYPE (oprnd);
390 else
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
397 tree type = TREE_TYPE (oprnd);
398 if ((oprnd_info->first_dt != dt
399 && !(oprnd_info->first_dt == vect_reduction_def
400 && dt == vect_internal_def)
401 && !((oprnd_info->first_dt == vect_external_def
402 || oprnd_info->first_dt == vect_constant_def)
403 && (dt == vect_external_def
404 || dt == vect_constant_def)))
405 || !types_compatible_p (oprnd_info->first_op_type, type))
407 /* Try swapping operands if we got a mismatch. */
408 if (i == commutative_op && !swapped)
410 swapped = true;
411 goto again;
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416 "Build SLP failed: different types\n");
418 return 1;
420 if ((dt == vect_constant_def
421 || dt == vect_external_def)
422 && !current_vector_size.is_constant ()
423 && (TREE_CODE (type) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts.length (),
425 TYPE_MODE (type))))
427 if (dump_enabled_p ())
429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
430 "Build SLP failed: invalid type of def "
431 "for variable-length SLP ");
432 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
433 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
435 return -1;
439 /* Check the types of the definitions. */
440 switch (dt)
442 case vect_constant_def:
443 case vect_external_def:
444 break;
446 case vect_reduction_def:
447 case vect_induction_def:
448 case vect_internal_def:
449 oprnd_info->def_stmts.quick_push (def_stmt_info);
450 break;
452 default:
453 /* FORNOW: Not supported. */
454 if (dump_enabled_p ())
456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
457 "Build SLP failed: illegal type of def ");
458 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
459 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
462 return -1;
466 /* Swap operands. */
467 if (swapped)
469 /* If there are already uses of this stmt in a SLP instance then
470 we've committed to the operand order and can't swap it. */
471 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
473 if (dump_enabled_p ())
475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
476 "Build SLP failed: cannot swap operands of "
477 "shared stmt ");
478 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
479 stmt_info->stmt, 0);
481 return -1;
484 if (first_op_cond)
486 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
487 tree cond = gimple_assign_rhs1 (stmt);
488 enum tree_code code = TREE_CODE (cond);
490 /* Swap. */
491 if (*swap == 1)
493 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
494 &TREE_OPERAND (cond, 1));
495 TREE_SET_CODE (cond, swap_tree_comparison (code));
497 /* Invert. */
498 else
500 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
501 gimple_assign_rhs3_ptr (stmt));
502 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
503 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
504 gcc_assert (code != ERROR_MARK);
505 TREE_SET_CODE (cond, code);
508 else
510 unsigned int op = commutative_op + first_op_idx;
511 swap_ssa_operands (stmt_info->stmt,
512 gimple_op_ptr (stmt_info->stmt, op),
513 gimple_op_ptr (stmt_info->stmt, op + 1));
515 if (dump_enabled_p ())
517 dump_printf_loc (MSG_NOTE, vect_location,
518 "swapped operands to match def types in ");
519 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
523 *swap = swapped;
524 return 0;
527 /* Return true if call statements CALL1 and CALL2 are similar enough
528 to be combined into the same SLP group. */
530 static bool
531 compatible_calls_p (gcall *call1, gcall *call2)
533 unsigned int nargs = gimple_call_num_args (call1);
534 if (nargs != gimple_call_num_args (call2))
535 return false;
537 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
538 return false;
540 if (gimple_call_internal_p (call1))
542 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
543 TREE_TYPE (gimple_call_lhs (call2))))
544 return false;
545 for (unsigned int i = 0; i < nargs; ++i)
546 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
547 TREE_TYPE (gimple_call_arg (call2, i))))
548 return false;
550 else
552 if (!operand_equal_p (gimple_call_fn (call1),
553 gimple_call_fn (call2), 0))
554 return false;
556 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
557 return false;
559 return true;
562 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
563 caller's attempt to find the vector type in STMT_INFO with the narrowest
564 element type. Return true if VECTYPE is nonnull and if it is valid
565 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
566 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
567 vect_build_slp_tree. */
569 static bool
570 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
571 tree vectype, poly_uint64 *max_nunits)
573 if (!vectype)
575 if (dump_enabled_p ())
577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
578 "Build SLP failed: unsupported data-type in ");
579 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
580 stmt_info->stmt, 0);
581 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
583 /* Fatal mismatch. */
584 return false;
587 /* If populating the vector type requires unrolling then fail
588 before adjusting *max_nunits for basic-block vectorization. */
589 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
590 unsigned HOST_WIDE_INT const_nunits;
591 if (STMT_VINFO_BB_VINFO (stmt_info)
592 && (!nunits.is_constant (&const_nunits)
593 || const_nunits > group_size))
595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
596 "Build SLP failed: unrolling required "
597 "in basic block SLP\n");
598 /* Fatal mismatch. */
599 return false;
602 /* In case of multiple types we need to detect the smallest type. */
603 vect_update_max_nunits (max_nunits, vectype);
604 return true;
607 /* STMTS is a group of GROUP_SIZE SLP statements in which some
608 statements do the same operation as the first statement and in which
609 the others do ALT_STMT_CODE. Return true if we can take one vector
610 of the first operation and one vector of the second and permute them
611 to get the required result. VECTYPE is the type of the vector that
612 would be permuted. */
614 static bool
615 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
616 unsigned int group_size, tree vectype,
617 tree_code alt_stmt_code)
619 unsigned HOST_WIDE_INT count;
620 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
621 return false;
623 vec_perm_builder sel (count, count, 1);
624 for (unsigned int i = 0; i < count; ++i)
626 unsigned int elt = i;
627 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
628 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
629 elt += count;
630 sel.quick_push (elt);
632 vec_perm_indices indices (sel, 2, count);
633 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
636 /* Verify if the scalar stmts STMTS are isomorphic, require data
637 permutation or are of unsupported types of operation. Return
638 true if they are, otherwise return false and indicate in *MATCHES
639 which stmts are not isomorphic to the first one. If MATCHES[0]
640 is false then this indicates the comparison could not be
641 carried out or the stmts will never be vectorized by SLP.
643 Note COND_EXPR is possibly ismorphic to another one after swapping its
644 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
645 the first stmt by swapping the two operands of comparison; set SWAP[i]
646 to 2 if stmt I is isormorphic to the first stmt by inverting the code
647 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
648 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
650 static bool
651 vect_build_slp_tree_1 (unsigned char *swap,
652 vec<stmt_vec_info> stmts, unsigned int group_size,
653 poly_uint64 *max_nunits, bool *matches,
654 bool *two_operators)
656 unsigned int i;
657 stmt_vec_info first_stmt_info = stmts[0];
658 enum tree_code first_stmt_code = ERROR_MARK;
659 enum tree_code alt_stmt_code = ERROR_MARK;
660 enum tree_code rhs_code = ERROR_MARK;
661 enum tree_code first_cond_code = ERROR_MARK;
662 tree lhs;
663 bool need_same_oprnds = false;
664 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
665 optab optab;
666 int icode;
667 machine_mode optab_op2_mode;
668 machine_mode vec_mode;
669 stmt_vec_info first_load = NULL, prev_first_load = NULL;
671 /* For every stmt in NODE find its def stmt/s. */
672 stmt_vec_info stmt_info;
673 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
675 gimple *stmt = stmt_info->stmt;
676 swap[i] = 0;
677 matches[i] = false;
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
685 /* Fail to vectorize statements marked as unvectorizable. */
686 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
691 "Build SLP failed: unvectorizable statement ");
692 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
694 /* Fatal mismatch. */
695 matches[0] = false;
696 return false;
699 lhs = gimple_get_lhs (stmt);
700 if (lhs == NULL_TREE)
702 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
705 "Build SLP failed: not GIMPLE_ASSIGN nor "
706 "GIMPLE_CALL ");
707 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
709 /* Fatal mismatch. */
710 matches[0] = false;
711 return false;
714 tree nunits_vectype;
715 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
716 &nunits_vectype)
717 || (nunits_vectype
718 && !vect_record_max_nunits (stmt_info, group_size,
719 nunits_vectype, max_nunits)))
721 /* Fatal mismatch. */
722 matches[0] = false;
723 return false;
726 gcc_assert (vectype);
728 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
730 rhs_code = CALL_EXPR;
731 if ((gimple_call_internal_p (call_stmt)
732 && (!vectorizable_internal_fn_p
733 (gimple_call_internal_fn (call_stmt))))
734 || gimple_call_tail_p (call_stmt)
735 || gimple_call_noreturn_p (call_stmt)
736 || !gimple_call_nothrow_p (call_stmt)
737 || gimple_call_chain (call_stmt))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported call type ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 call_stmt, 0);
746 /* Fatal mismatch. */
747 matches[0] = false;
748 return false;
751 else
752 rhs_code = gimple_assign_rhs_code (stmt);
754 /* Check the operation. */
755 if (i == 0)
757 first_stmt_code = rhs_code;
759 /* Shift arguments should be equal in all the packed stmts for a
760 vector shift with scalar shift operand. */
761 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
762 || rhs_code == LROTATE_EXPR
763 || rhs_code == RROTATE_EXPR)
765 if (vectype == boolean_type_node)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "Build SLP failed: shift of a"
770 " boolean.\n");
771 /* Fatal mismatch. */
772 matches[0] = false;
773 return false;
776 vec_mode = TYPE_MODE (vectype);
778 /* First see if we have a vector/vector shift. */
779 optab = optab_for_tree_code (rhs_code, vectype,
780 optab_vector);
782 if (!optab
783 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
785 /* No vector/vector shift, try for a vector/scalar shift. */
786 optab = optab_for_tree_code (rhs_code, vectype,
787 optab_scalar);
789 if (!optab)
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
793 "Build SLP failed: no optab.\n");
794 /* Fatal mismatch. */
795 matches[0] = false;
796 return false;
798 icode = (int) optab_handler (optab, vec_mode);
799 if (icode == CODE_FOR_nothing)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
803 "Build SLP failed: "
804 "op not supported by target.\n");
805 /* Fatal mismatch. */
806 matches[0] = false;
807 return false;
809 optab_op2_mode = insn_data[icode].operand[2].mode;
810 if (!VECTOR_MODE_P (optab_op2_mode))
812 need_same_oprnds = true;
813 first_op1 = gimple_assign_rhs2 (stmt);
817 else if (rhs_code == WIDEN_LSHIFT_EXPR)
819 need_same_oprnds = true;
820 first_op1 = gimple_assign_rhs2 (stmt);
823 else
825 if (first_stmt_code != rhs_code
826 && alt_stmt_code == ERROR_MARK)
827 alt_stmt_code = rhs_code;
828 if (first_stmt_code != rhs_code
829 && (first_stmt_code != IMAGPART_EXPR
830 || rhs_code != REALPART_EXPR)
831 && (first_stmt_code != REALPART_EXPR
832 || rhs_code != IMAGPART_EXPR)
833 /* Handle mismatches in plus/minus by computing both
834 and merging the results. */
835 && !((first_stmt_code == PLUS_EXPR
836 || first_stmt_code == MINUS_EXPR)
837 && (alt_stmt_code == PLUS_EXPR
838 || alt_stmt_code == MINUS_EXPR)
839 && rhs_code == alt_stmt_code)
840 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
841 && (first_stmt_code == ARRAY_REF
842 || first_stmt_code == BIT_FIELD_REF
843 || first_stmt_code == INDIRECT_REF
844 || first_stmt_code == COMPONENT_REF
845 || first_stmt_code == MEM_REF)))
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: different operation "
851 "in stmt ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
854 "original stmt ");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 first_stmt_info->stmt, 0);
858 /* Mismatch. */
859 continue;
862 if (need_same_oprnds
863 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
865 if (dump_enabled_p ())
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different shift "
869 "arguments in ");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
872 /* Mismatch. */
873 continue;
876 if (rhs_code == CALL_EXPR)
878 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
879 as_a <gcall *> (stmt)))
881 if (dump_enabled_p ())
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "Build SLP failed: different calls in ");
885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
886 stmt, 0);
888 /* Mismatch. */
889 continue;
894 /* Grouped store or load. */
895 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
897 if (REFERENCE_CLASS_P (lhs))
899 /* Store. */
902 else
904 /* Load. */
905 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
906 if (prev_first_load)
908 /* Check that there are no loads from different interleaving
909 chains in the same node. */
910 if (prev_first_load != first_load)
912 if (dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
915 vect_location,
916 "Build SLP failed: different "
917 "interleaving chains in one node ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 stmt, 0);
921 /* Mismatch. */
922 continue;
925 else
926 prev_first_load = first_load;
928 } /* Grouped access. */
929 else
931 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
933 /* Not grouped load. */
934 if (dump_enabled_p ())
936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
937 "Build SLP failed: not grouped load ");
938 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
941 /* FORNOW: Not grouped loads are not supported. */
942 /* Fatal mismatch. */
943 matches[0] = false;
944 return false;
947 /* Not memory operation. */
948 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
949 && TREE_CODE_CLASS (rhs_code) != tcc_unary
950 && TREE_CODE_CLASS (rhs_code) != tcc_expression
951 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
952 && rhs_code != CALL_EXPR)
954 if (dump_enabled_p ())
956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
957 "Build SLP failed: operation");
958 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
959 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
961 /* Fatal mismatch. */
962 matches[0] = false;
963 return false;
966 if (rhs_code == COND_EXPR)
968 tree cond_expr = gimple_assign_rhs1 (stmt);
969 enum tree_code cond_code = TREE_CODE (cond_expr);
970 enum tree_code swap_code = ERROR_MARK;
971 enum tree_code invert_code = ERROR_MARK;
973 if (i == 0)
974 first_cond_code = TREE_CODE (cond_expr);
975 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
977 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
978 swap_code = swap_tree_comparison (cond_code);
979 invert_code = invert_tree_comparison (cond_code, honor_nans);
982 if (first_cond_code == cond_code)
984 /* Isomorphic can be achieved by swapping. */
985 else if (first_cond_code == swap_code)
986 swap[i] = 1;
987 /* Isomorphic can be achieved by inverting. */
988 else if (first_cond_code == invert_code)
989 swap[i] = 2;
990 else
992 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "Build SLP failed: different"
996 " operation");
997 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
998 stmt, 0);
1000 /* Mismatch. */
1001 continue;
1006 matches[i] = true;
1009 for (i = 0; i < group_size; ++i)
1010 if (!matches[i])
1011 return false;
1013 /* If we allowed a two-operation SLP node verify the target can cope
1014 with the permute we are going to use. */
1015 if (alt_stmt_code != ERROR_MARK
1016 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1018 if (vectype == boolean_type_node
1019 || !vect_two_operations_perm_ok_p (stmts, group_size,
1020 vectype, alt_stmt_code))
1022 for (i = 0; i < group_size; ++i)
1023 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1025 matches[i] = false;
1026 if (dump_enabled_p ())
1028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1029 "Build SLP failed: different operation "
1030 "in stmt ");
1031 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1032 stmts[i]->stmt, 0);
1033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1034 "original stmt ");
1035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1036 first_stmt_info->stmt, 0);
1039 return false;
1041 *two_operators = true;
1044 return true;
1047 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1048 Note we never remove apart from at destruction time so we do not
1049 need a special value for deleted that differs from empty. */
1050 struct bst_traits
1052 typedef vec <stmt_vec_info> value_type;
1053 typedef vec <stmt_vec_info> compare_type;
1054 static inline hashval_t hash (value_type);
1055 static inline bool equal (value_type existing, value_type candidate);
1056 static inline bool is_empty (value_type x) { return !x.exists (); }
1057 static inline bool is_deleted (value_type x) { return !x.exists (); }
1058 static inline void mark_empty (value_type &x) { x.release (); }
1059 static inline void mark_deleted (value_type &x) { x.release (); }
1060 static inline void remove (value_type &x) { x.release (); }
1062 inline hashval_t
1063 bst_traits::hash (value_type x)
1065 inchash::hash h;
1066 for (unsigned i = 0; i < x.length (); ++i)
1067 h.add_int (gimple_uid (x[i]->stmt));
1068 return h.end ();
1070 inline bool
1071 bst_traits::equal (value_type existing, value_type candidate)
1073 if (existing.length () != candidate.length ())
1074 return false;
1075 for (unsigned i = 0; i < existing.length (); ++i)
1076 if (existing[i] != candidate[i])
1077 return false;
1078 return true;
1081 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1082 static scalar_stmts_set_t *bst_fail;
1084 typedef hash_map <vec <gimple *>, slp_tree,
1085 simple_hashmap_traits <bst_traits, slp_tree> >
1086 scalar_stmts_to_slp_tree_map_t;
1088 static slp_tree
1089 vect_build_slp_tree_2 (vec_info *vinfo,
1090 vec<stmt_vec_info> stmts, unsigned int group_size,
1091 poly_uint64 *max_nunits,
1092 vec<slp_tree> *loads,
1093 bool *matches, unsigned *npermutes, unsigned *tree_size,
1094 unsigned max_tree_size);
1096 static slp_tree
1097 vect_build_slp_tree (vec_info *vinfo,
1098 vec<stmt_vec_info> stmts, unsigned int group_size,
1099 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1100 bool *matches, unsigned *npermutes, unsigned *tree_size,
1101 unsigned max_tree_size)
1103 if (bst_fail->contains (stmts))
1104 return NULL;
1105 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1106 loads, matches, npermutes, tree_size,
1107 max_tree_size);
1108 /* When SLP build fails for stmts record this, otherwise SLP build
1109 can be exponential in time when we allow to construct parts from
1110 scalars, see PR81723. */
1111 if (! res)
1113 vec <stmt_vec_info> x;
1114 x.create (stmts.length ());
1115 x.splice (stmts);
1116 bst_fail->add (x);
1118 return res;
1121 /* Recursively build an SLP tree starting from NODE.
1122 Fail (and return a value not equal to zero) if def-stmts are not
1123 isomorphic, require data permutation or are of unsupported types of
1124 operation. Otherwise, return 0.
1125 The value returned is the depth in the SLP tree where a mismatch
1126 was found. */
1128 static slp_tree
1129 vect_build_slp_tree_2 (vec_info *vinfo,
1130 vec<stmt_vec_info> stmts, unsigned int group_size,
1131 poly_uint64 *max_nunits,
1132 vec<slp_tree> *loads,
1133 bool *matches, unsigned *npermutes, unsigned *tree_size,
1134 unsigned max_tree_size)
1136 unsigned nops, i, this_tree_size = 0;
1137 poly_uint64 this_max_nunits = *max_nunits;
1138 slp_tree node;
1140 matches[0] = false;
1142 stmt_vec_info stmt_info = stmts[0];
1143 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1144 nops = gimple_call_num_args (stmt);
1145 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1147 nops = gimple_num_ops (stmt) - 1;
1148 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1149 nops++;
1151 else if (is_a <gphi *> (stmt_info->stmt))
1152 nops = 0;
1153 else
1154 return NULL;
1156 /* If the SLP node is a PHI (induction or reduction), terminate
1157 the recursion. */
1158 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1160 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1161 tree vectype = get_vectype_for_scalar_type (scalar_type);
1162 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1163 return NULL;
1165 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1166 /* Induction from different IVs is not supported. */
1167 if (def_type == vect_induction_def)
1169 stmt_vec_info other_info;
1170 FOR_EACH_VEC_ELT (stmts, i, other_info)
1171 if (stmt_info != other_info)
1172 return NULL;
1174 else
1176 /* Else def types have to match. */
1177 stmt_vec_info other_info;
1178 FOR_EACH_VEC_ELT (stmts, i, other_info)
1180 /* But for reduction chains only check on the first stmt. */
1181 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1182 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1183 continue;
1184 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1185 return NULL;
1188 node = vect_create_new_slp_node (stmts);
1189 return node;
1193 bool two_operators = false;
1194 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1195 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1196 &this_max_nunits, matches, &two_operators))
1197 return NULL;
1199 /* If the SLP node is a load, terminate the recursion. */
1200 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1201 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1203 *max_nunits = this_max_nunits;
1204 node = vect_create_new_slp_node (stmts);
1205 loads->safe_push (node);
1206 return node;
1209 /* Get at the operands, verifying they are compatible. */
1210 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1211 slp_oprnd_info oprnd_info;
1212 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1214 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1215 stmts, i, &oprnds_info);
1216 if (res != 0)
1217 matches[(res == -1) ? 0 : i] = false;
1218 if (!matches[0])
1219 break;
1221 for (i = 0; i < group_size; ++i)
1222 if (!matches[i])
1224 vect_free_oprnd_info (oprnds_info);
1225 return NULL;
1228 auto_vec<slp_tree, 4> children;
1229 auto_vec<slp_tree> this_loads;
1231 stmt_info = stmts[0];
1233 if (tree_size)
1234 max_tree_size -= *tree_size;
1236 /* Create SLP_TREE nodes for the definition node/s. */
1237 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1239 slp_tree child;
1240 unsigned old_nloads = this_loads.length ();
1241 unsigned old_tree_size = this_tree_size;
1242 unsigned int j;
1244 if (oprnd_info->first_dt != vect_internal_def
1245 && oprnd_info->first_dt != vect_reduction_def
1246 && oprnd_info->first_dt != vect_induction_def)
1247 continue;
1249 if (++this_tree_size > max_tree_size)
1251 FOR_EACH_VEC_ELT (children, j, child)
1252 vect_free_slp_tree (child, false);
1253 vect_free_oprnd_info (oprnds_info);
1254 return NULL;
1257 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1258 group_size, &this_max_nunits,
1259 &this_loads, matches, npermutes,
1260 &this_tree_size,
1261 max_tree_size)) != NULL)
1263 /* If we have all children of child built up from scalars then just
1264 throw that away and build it up this node from scalars. */
1265 if (!SLP_TREE_CHILDREN (child).is_empty ()
1266 /* ??? Rejecting patterns this way doesn't work. We'd have to
1267 do extra work to cancel the pattern so the uses see the
1268 scalar version. */
1269 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1271 slp_tree grandchild;
1273 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1274 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1275 break;
1276 if (!grandchild)
1278 /* Roll back. */
1279 this_loads.truncate (old_nloads);
1280 this_tree_size = old_tree_size;
1281 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1282 vect_free_slp_tree (grandchild, false);
1283 SLP_TREE_CHILDREN (child).truncate (0);
1285 dump_printf_loc (MSG_NOTE, vect_location,
1286 "Building parent vector operands from "
1287 "scalars instead\n");
1288 oprnd_info->def_stmts = vNULL;
1289 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1290 children.safe_push (child);
1291 continue;
1295 oprnd_info->def_stmts = vNULL;
1296 children.safe_push (child);
1297 continue;
1300 /* If the SLP build failed fatally and we analyze a basic-block
1301 simply treat nodes we fail to build as externally defined
1302 (and thus build vectors from the scalar defs).
1303 The cost model will reject outright expensive cases.
1304 ??? This doesn't treat cases where permutation ultimatively
1305 fails (or we don't try permutation below). Ideally we'd
1306 even compute a permutation that will end up with the maximum
1307 SLP tree size... */
1308 if (is_a <bb_vec_info> (vinfo)
1309 && !matches[0]
1310 /* ??? Rejecting patterns this way doesn't work. We'd have to
1311 do extra work to cancel the pattern so the uses see the
1312 scalar version. */
1313 && !is_pattern_stmt_p (stmt_info))
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "Building vector operands from scalars\n");
1317 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1318 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1319 children.safe_push (child);
1320 oprnd_info->def_stmts = vNULL;
1321 continue;
1324 /* If the SLP build for operand zero failed and operand zero
1325 and one can be commutated try that for the scalar stmts
1326 that failed the match. */
1327 if (i == 0
1328 /* A first scalar stmt mismatch signals a fatal mismatch. */
1329 && matches[0]
1330 /* ??? For COND_EXPRs we can swap the comparison operands
1331 as well as the arms under some constraints. */
1332 && nops == 2
1333 && oprnds_info[1]->first_dt == vect_internal_def
1334 && is_gimple_assign (stmt_info->stmt)
1335 /* Do so only if the number of not successful permutes was nor more
1336 than a cut-ff as re-trying the recursive match on
1337 possibly each level of the tree would expose exponential
1338 behavior. */
1339 && *npermutes < 4)
1341 /* See whether we can swap the matching or the non-matching
1342 stmt operands. */
1343 bool swap_not_matching = true;
1346 for (j = 0; j < group_size; ++j)
1348 if (matches[j] != !swap_not_matching)
1349 continue;
1350 stmt_vec_info stmt_info = stmts[j];
1351 /* Verify if we can swap operands of this stmt. */
1352 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1353 if (!stmt
1354 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1356 if (!swap_not_matching)
1357 goto fail;
1358 swap_not_matching = false;
1359 break;
1361 /* Verify if we can safely swap or if we committed to a
1362 specific operand order already.
1363 ??? Instead of modifying GIMPLE stmts here we could
1364 record whether we want to swap operands in the SLP
1365 node and temporarily do that when processing it
1366 (or wrap operand accessors in a helper). */
1367 else if (swap[j] != 0
1368 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1370 if (!swap_not_matching)
1372 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1375 vect_location,
1376 "Build SLP failed: cannot swap "
1377 "operands of shared stmt ");
1378 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1379 TDF_SLIM, stmts[j]->stmt, 0);
1381 goto fail;
1383 swap_not_matching = false;
1384 break;
1388 while (j != group_size);
1390 /* Swap mismatched definition stmts. */
1391 dump_printf_loc (MSG_NOTE, vect_location,
1392 "Re-trying with swapped operands of stmts ");
1393 for (j = 0; j < group_size; ++j)
1394 if (matches[j] == !swap_not_matching)
1396 std::swap (oprnds_info[0]->def_stmts[j],
1397 oprnds_info[1]->def_stmts[j]);
1398 dump_printf (MSG_NOTE, "%d ", j);
1400 dump_printf (MSG_NOTE, "\n");
1401 /* And try again with scratch 'matches' ... */
1402 bool *tem = XALLOCAVEC (bool, group_size);
1403 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1404 group_size, &this_max_nunits,
1405 &this_loads, tem, npermutes,
1406 &this_tree_size,
1407 max_tree_size)) != NULL)
1409 /* ... so if successful we can apply the operand swapping
1410 to the GIMPLE IL. This is necessary because for example
1411 vect_get_slp_defs uses operand indexes and thus expects
1412 canonical operand order. This is also necessary even
1413 if we end up building the operand from scalars as
1414 we'll continue to process swapped operand two. */
1415 for (j = 0; j < group_size; ++j)
1416 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1417 for (j = 0; j < group_size; ++j)
1418 if (matches[j] == !swap_not_matching)
1420 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1421 /* Avoid swapping operands twice. */
1422 if (gimple_plf (stmt, GF_PLF_1))
1423 continue;
1424 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1425 gimple_assign_rhs2_ptr (stmt));
1426 gimple_set_plf (stmt, GF_PLF_1, true);
1428 /* Verify we swap all duplicates or none. */
1429 if (flag_checking)
1430 for (j = 0; j < group_size; ++j)
1431 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1432 == (matches[j] == !swap_not_matching));
1434 /* If we have all children of child built up from scalars then
1435 just throw that away and build it up this node from scalars. */
1436 if (!SLP_TREE_CHILDREN (child).is_empty ()
1437 /* ??? Rejecting patterns this way doesn't work. We'd have
1438 to do extra work to cancel the pattern so the uses see the
1439 scalar version. */
1440 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1442 unsigned int j;
1443 slp_tree grandchild;
1445 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1446 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1447 break;
1448 if (!grandchild)
1450 /* Roll back. */
1451 this_loads.truncate (old_nloads);
1452 this_tree_size = old_tree_size;
1453 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1454 vect_free_slp_tree (grandchild, false);
1455 SLP_TREE_CHILDREN (child).truncate (0);
1457 dump_printf_loc (MSG_NOTE, vect_location,
1458 "Building parent vector operands from "
1459 "scalars instead\n");
1460 oprnd_info->def_stmts = vNULL;
1461 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1462 children.safe_push (child);
1463 continue;
1467 oprnd_info->def_stmts = vNULL;
1468 children.safe_push (child);
1469 continue;
1472 ++*npermutes;
1475 fail:
1476 gcc_assert (child == NULL);
1477 FOR_EACH_VEC_ELT (children, j, child)
1478 vect_free_slp_tree (child, false);
1479 vect_free_oprnd_info (oprnds_info);
1480 return NULL;
1483 vect_free_oprnd_info (oprnds_info);
1485 if (tree_size)
1486 *tree_size += this_tree_size;
1487 *max_nunits = this_max_nunits;
1488 loads->safe_splice (this_loads);
1490 node = vect_create_new_slp_node (stmts);
1491 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1492 SLP_TREE_CHILDREN (node).splice (children);
1493 return node;
1496 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1498 static void
1499 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1500 slp_tree node)
1502 int i;
1503 stmt_vec_info stmt_info;
1504 slp_tree child;
1506 dump_printf_loc (dump_kind, loc, "node%s\n",
1507 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1508 ? " (external)" : "");
1509 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1511 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1512 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt_info->stmt, 0);
1514 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1515 vect_print_slp_tree (dump_kind, loc, child);
1519 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1520 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1521 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1522 stmts in NODE are to be marked. */
1524 static void
1525 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1527 int i;
1528 stmt_vec_info stmt_info;
1529 slp_tree child;
1531 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1532 return;
1534 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1535 if (j < 0 || i == j)
1536 STMT_SLP_TYPE (stmt_info) = mark;
1538 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1539 vect_mark_slp_stmts (child, mark, j);
1543 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1545 static void
1546 vect_mark_slp_stmts_relevant (slp_tree node)
1548 int i;
1549 stmt_vec_info stmt_info;
1550 slp_tree child;
1552 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1553 return;
1555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1557 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1558 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1559 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1562 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1563 vect_mark_slp_stmts_relevant (child);
1567 /* Rearrange the statements of NODE according to PERMUTATION. */
1569 static void
1570 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1571 vec<unsigned> permutation)
1573 stmt_vec_info stmt_info;
1574 vec<stmt_vec_info> tmp_stmts;
1575 unsigned int i;
1576 slp_tree child;
1578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1579 vect_slp_rearrange_stmts (child, group_size, permutation);
1581 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1582 tmp_stmts.create (group_size);
1583 tmp_stmts.quick_grow_cleared (group_size);
1585 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1586 tmp_stmts[permutation[i]] = stmt_info;
1588 SLP_TREE_SCALAR_STMTS (node).release ();
1589 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1593 /* Attempt to reorder stmts in a reduction chain so that we don't
1594 require any load permutation. Return true if that was possible,
1595 otherwise return false. */
1597 static bool
1598 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1600 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1601 unsigned int i, j;
1602 unsigned int lidx;
1603 slp_tree node, load;
1605 /* Compare all the permutation sequences to the first one. We know
1606 that at least one load is permuted. */
1607 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1608 if (!node->load_permutation.exists ())
1609 return false;
1610 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1612 if (!load->load_permutation.exists ())
1613 return false;
1614 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1615 if (lidx != node->load_permutation[j])
1616 return false;
1619 /* Check that the loads in the first sequence are different and there
1620 are no gaps between them. */
1621 auto_sbitmap load_index (group_size);
1622 bitmap_clear (load_index);
1623 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1625 if (lidx >= group_size)
1626 return false;
1627 if (bitmap_bit_p (load_index, lidx))
1628 return false;
1630 bitmap_set_bit (load_index, lidx);
1632 for (i = 0; i < group_size; i++)
1633 if (!bitmap_bit_p (load_index, i))
1634 return false;
1636 /* This permutation is valid for reduction. Since the order of the
1637 statements in the nodes is not important unless they are memory
1638 accesses, we can rearrange the statements in all the nodes
1639 according to the order of the loads. */
1640 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1641 node->load_permutation);
1643 /* We are done, no actual permutations need to be generated. */
1644 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1645 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1647 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1648 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1649 /* But we have to keep those permutations that are required because
1650 of handling of gaps. */
1651 if (known_eq (unrolling_factor, 1U)
1652 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1653 && DR_GROUP_GAP (first_stmt_info) == 0))
1654 SLP_TREE_LOAD_PERMUTATION (node).release ();
1655 else
1656 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1657 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1660 return true;
1663 /* Check if the required load permutations in the SLP instance
1664 SLP_INSTN are supported. */
1666 static bool
1667 vect_supported_load_permutation_p (slp_instance slp_instn)
1669 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1670 unsigned int i, j, k, next;
1671 slp_tree node;
1673 if (dump_enabled_p ())
1675 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1676 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1677 if (node->load_permutation.exists ())
1678 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1679 dump_printf (MSG_NOTE, "%d ", next);
1680 else
1681 for (k = 0; k < group_size; ++k)
1682 dump_printf (MSG_NOTE, "%d ", k);
1683 dump_printf (MSG_NOTE, "\n");
1686 /* In case of reduction every load permutation is allowed, since the order
1687 of the reduction statements is not important (as opposed to the case of
1688 grouped stores). The only condition we need to check is that all the
1689 load nodes are of the same size and have the same permutation (and then
1690 rearrange all the nodes of the SLP instance according to this
1691 permutation). */
1693 /* Check that all the load nodes are of the same size. */
1694 /* ??? Can't we assert this? */
1695 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1696 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1697 return false;
1699 node = SLP_INSTANCE_TREE (slp_instn);
1700 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1702 /* Reduction (there are no data-refs in the root).
1703 In reduction chain the order of the loads is not important. */
1704 if (!STMT_VINFO_DATA_REF (stmt_info)
1705 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1706 vect_attempt_slp_rearrange_stmts (slp_instn);
1708 /* In basic block vectorization we allow any subchain of an interleaving
1709 chain.
1710 FORNOW: not supported in loop SLP because of realignment compications. */
1711 if (STMT_VINFO_BB_VINFO (stmt_info))
1713 /* Check whether the loads in an instance form a subchain and thus
1714 no permutation is necessary. */
1715 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1717 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1718 continue;
1719 bool subchain_p = true;
1720 stmt_vec_info next_load_info = NULL;
1721 stmt_vec_info load_info;
1722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1724 if (j != 0
1725 && (next_load_info != load_info
1726 || DR_GROUP_GAP (load_info) != 1))
1728 subchain_p = false;
1729 break;
1731 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1733 if (subchain_p)
1734 SLP_TREE_LOAD_PERMUTATION (node).release ();
1735 else
1737 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1738 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1739 unsigned HOST_WIDE_INT nunits;
1740 unsigned k, maxk = 0;
1741 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1742 if (k > maxk)
1743 maxk = k;
1744 /* In BB vectorization we may not actually use a loaded vector
1745 accessing elements in excess of DR_GROUP_SIZE. */
1746 tree vectype = STMT_VINFO_VECTYPE (group_info);
1747 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1748 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1751 "BB vectorization with gaps at the end of "
1752 "a load is not supported\n");
1753 return false;
1756 /* Verify the permutation can be generated. */
1757 vec<tree> tem;
1758 unsigned n_perms;
1759 if (!vect_transform_slp_perm_load (node, tem, NULL,
1760 1, slp_instn, true, &n_perms))
1762 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1763 vect_location,
1764 "unsupported load permutation\n");
1765 return false;
1769 return true;
1772 /* For loop vectorization verify we can generate the permutation. Be
1773 conservative about the vectorization factor, there are permutations
1774 that will use three vector inputs only starting from a specific factor
1775 and the vectorization factor is not yet final.
1776 ??? The SLP instance unrolling factor might not be the maximum one. */
1777 unsigned n_perms;
1778 poly_uint64 test_vf
1779 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1780 LOOP_VINFO_VECT_FACTOR
1781 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1782 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1783 if (node->load_permutation.exists ()
1784 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1785 slp_instn, true, &n_perms))
1786 return false;
1788 return true;
1792 /* Find the last store in SLP INSTANCE. */
1794 stmt_vec_info
1795 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1797 stmt_vec_info last = NULL;
1798 stmt_vec_info stmt_vinfo;
1800 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1802 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1803 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1806 return last;
1809 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1810 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1811 (also containing the first GROUP1_SIZE stmts, since stores are
1812 consecutive), the second containing the remainder.
1813 Return the first stmt in the second group. */
1815 static stmt_vec_info
1816 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1818 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1819 gcc_assert (group1_size > 0);
1820 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1821 gcc_assert (group2_size > 0);
1822 DR_GROUP_SIZE (first_vinfo) = group1_size;
1824 stmt_vec_info stmt_info = first_vinfo;
1825 for (unsigned i = group1_size; i > 1; i--)
1827 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1828 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1830 /* STMT is now the last element of the first group. */
1831 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1832 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1834 DR_GROUP_SIZE (group2) = group2_size;
1835 for (stmt_info = group2; stmt_info;
1836 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1838 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1839 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1842 /* For the second group, the DR_GROUP_GAP is that before the original group,
1843 plus skipping over the first vector. */
1844 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1846 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1847 DR_GROUP_GAP (first_vinfo) += group2_size;
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1851 group1_size, group2_size);
1853 return group2;
1856 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1857 statements and a vector of NUNITS elements. */
1859 static poly_uint64
1860 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1862 return exact_div (common_multiple (nunits, group_size), group_size);
1865 /* Analyze an SLP instance starting from a group of grouped stores. Call
1866 vect_build_slp_tree to build a tree of packed stmts if possible.
1867 Return FALSE if it's impossible to SLP any stmt in the loop. */
1869 static bool
1870 vect_analyze_slp_instance (vec_info *vinfo,
1871 stmt_vec_info stmt_info, unsigned max_tree_size)
1873 slp_instance new_instance;
1874 slp_tree node;
1875 unsigned int group_size;
1876 tree vectype, scalar_type = NULL_TREE;
1877 unsigned int i;
1878 vec<slp_tree> loads;
1879 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1880 vec<stmt_vec_info> scalar_stmts;
1882 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1884 scalar_type = TREE_TYPE (DR_REF (dr));
1885 vectype = get_vectype_for_scalar_type (scalar_type);
1886 group_size = DR_GROUP_SIZE (stmt_info);
1888 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1890 gcc_assert (is_a <loop_vec_info> (vinfo));
1891 vectype = STMT_VINFO_VECTYPE (stmt_info);
1892 group_size = REDUC_GROUP_SIZE (stmt_info);
1894 else
1896 gcc_assert (is_a <loop_vec_info> (vinfo));
1897 vectype = STMT_VINFO_VECTYPE (stmt_info);
1898 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1901 if (!vectype)
1903 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906 "Build SLP failed: unsupported data-type ");
1907 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1908 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1911 return false;
1913 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1915 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1916 scalar_stmts.create (group_size);
1917 stmt_vec_info next_info = stmt_info;
1918 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1920 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1921 while (next_info)
1923 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1924 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1927 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1929 /* Collect the reduction stmts and store them in
1930 SLP_TREE_SCALAR_STMTS. */
1931 while (next_info)
1933 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1934 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1936 /* Mark the first element of the reduction chain as reduction to properly
1937 transform the node. In the reduction analysis phase only the last
1938 element of the chain is marked as reduction. */
1939 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1941 else
1943 /* Collect reduction statements. */
1944 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1945 for (i = 0; reductions.iterate (i, &next_info); i++)
1946 scalar_stmts.safe_push (next_info);
1949 loads.create (group_size);
1951 /* Build the tree for the SLP instance. */
1952 bool *matches = XALLOCAVEC (bool, group_size);
1953 unsigned npermutes = 0;
1954 bst_fail = new scalar_stmts_set_t ();
1955 poly_uint64 max_nunits = nunits;
1956 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1957 &max_nunits, &loads, matches, &npermutes,
1958 NULL, max_tree_size);
1959 delete bst_fail;
1960 if (node != NULL)
1962 /* Calculate the unrolling factor based on the smallest type. */
1963 poly_uint64 unrolling_factor
1964 = calculate_unrolling_factor (max_nunits, group_size);
1966 if (maybe_ne (unrolling_factor, 1U)
1967 && is_a <bb_vec_info> (vinfo))
1969 unsigned HOST_WIDE_INT const_max_nunits;
1970 if (!max_nunits.is_constant (&const_max_nunits)
1971 || const_max_nunits > group_size)
1973 if (dump_enabled_p ())
1974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1975 "Build SLP failed: store group "
1976 "size not a multiple of the vector size "
1977 "in basic block SLP\n");
1978 vect_free_slp_tree (node, false);
1979 loads.release ();
1980 return false;
1982 /* Fatal mismatch. */
1983 matches[group_size / const_max_nunits * const_max_nunits] = false;
1984 vect_free_slp_tree (node, false);
1985 loads.release ();
1987 else
1989 /* Create a new SLP instance. */
1990 new_instance = XNEW (struct _slp_instance);
1991 SLP_INSTANCE_TREE (new_instance) = node;
1992 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1993 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1994 SLP_INSTANCE_LOADS (new_instance) = loads;
1996 /* Compute the load permutation. */
1997 slp_tree load_node;
1998 bool loads_permuted = false;
1999 FOR_EACH_VEC_ELT (loads, i, load_node)
2001 vec<unsigned> load_permutation;
2002 int j;
2003 stmt_vec_info load_info;
2004 bool this_load_permuted = false;
2005 load_permutation.create (group_size);
2006 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2007 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2008 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2010 int load_place = vect_get_place_in_interleaving_chain
2011 (load_info, first_stmt_info);
2012 gcc_assert (load_place != -1);
2013 if (load_place != j)
2014 this_load_permuted = true;
2015 load_permutation.safe_push (load_place);
2017 if (!this_load_permuted
2018 /* The load requires permutation when unrolling exposes
2019 a gap either because the group is larger than the SLP
2020 group-size or because there is a gap between the groups. */
2021 && (known_eq (unrolling_factor, 1U)
2022 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2023 && DR_GROUP_GAP (first_stmt_info) == 0)))
2025 load_permutation.release ();
2026 continue;
2028 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2029 loads_permuted = true;
2032 if (loads_permuted)
2034 if (!vect_supported_load_permutation_p (new_instance))
2036 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2039 "Build SLP failed: unsupported load "
2040 "permutation ");
2041 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2042 TDF_SLIM, stmt_info->stmt, 0);
2044 vect_free_slp_instance (new_instance, false);
2045 return false;
2049 /* If the loads and stores can be handled with load/store-lan
2050 instructions do not generate this SLP instance. */
2051 if (is_a <loop_vec_info> (vinfo)
2052 && loads_permuted
2053 && dr && vect_store_lanes_supported (vectype, group_size, false))
2055 slp_tree load_node;
2056 FOR_EACH_VEC_ELT (loads, i, load_node)
2058 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2059 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2060 /* Use SLP for strided accesses (or if we can't load-lanes). */
2061 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2062 || ! vect_load_lanes_supported
2063 (STMT_VINFO_VECTYPE (stmt_vinfo),
2064 DR_GROUP_SIZE (stmt_vinfo), false))
2065 break;
2067 if (i == loads.length ())
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2071 "Built SLP cancelled: can use "
2072 "load/store-lanes\n");
2073 vect_free_slp_instance (new_instance, false);
2074 return false;
2078 vinfo->slp_instances.safe_push (new_instance);
2080 if (dump_enabled_p ())
2082 dump_printf_loc (MSG_NOTE, vect_location,
2083 "Final SLP tree for instance:\n");
2084 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2087 return true;
2090 else
2092 /* Failed to SLP. */
2093 /* Free the allocated memory. */
2094 scalar_stmts.release ();
2095 loads.release ();
2098 /* For basic block SLP, try to break the group up into multiples of the
2099 vector size. */
2100 unsigned HOST_WIDE_INT const_nunits;
2101 if (is_a <bb_vec_info> (vinfo)
2102 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2103 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2104 && nunits.is_constant (&const_nunits))
2106 /* We consider breaking the group only on VF boundaries from the existing
2107 start. */
2108 for (i = 0; i < group_size; i++)
2109 if (!matches[i]) break;
2111 if (i >= const_nunits && i < group_size)
2113 /* Split into two groups at the first vector boundary before i. */
2114 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2115 unsigned group1_size = i & ~(const_nunits - 1);
2117 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2118 group1_size);
2119 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2120 max_tree_size);
2121 /* If the first non-match was in the middle of a vector,
2122 skip the rest of that vector. */
2123 if (group1_size < i)
2125 i = group1_size + const_nunits;
2126 if (i < group_size)
2127 rest = vect_split_slp_store_group (rest, const_nunits);
2129 if (i < group_size)
2130 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2131 return res;
2133 /* Even though the first vector did not all match, we might be able to SLP
2134 (some) of the remainder. FORNOW ignore this possibility. */
2137 return false;
2141 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2142 trees of packed scalar stmts if SLP is possible. */
2144 bool
2145 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2147 unsigned int i;
2148 stmt_vec_info first_element;
2150 DUMP_VECT_SCOPE ("vect_analyze_slp");
2152 /* Find SLP sequences starting from groups of grouped stores. */
2153 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2154 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2156 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2158 if (loop_vinfo->reduction_chains.length () > 0)
2160 /* Find SLP sequences starting from reduction chains. */
2161 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2162 if (! vect_analyze_slp_instance (vinfo, first_element,
2163 max_tree_size))
2165 /* Dissolve reduction chain group. */
2166 stmt_vec_info vinfo = first_element;
2167 while (vinfo)
2169 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2170 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2171 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2172 vinfo = next;
2174 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2178 /* Find SLP sequences starting from groups of reductions. */
2179 if (loop_vinfo->reductions.length () > 1)
2180 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2181 max_tree_size);
2184 return true;
2188 /* For each possible SLP instance decide whether to SLP it and calculate overall
2189 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2190 least one instance. */
2192 bool
2193 vect_make_slp_decision (loop_vec_info loop_vinfo)
2195 unsigned int i;
2196 poly_uint64 unrolling_factor = 1;
2197 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2198 slp_instance instance;
2199 int decided_to_slp = 0;
2201 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2203 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2205 /* FORNOW: SLP if you can. */
2206 /* All unroll factors have the form current_vector_size * X for some
2207 rational X, so they must have a common multiple. */
2208 unrolling_factor
2209 = force_common_multiple (unrolling_factor,
2210 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2212 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2213 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2214 loop-based vectorization. Such stmts will be marked as HYBRID. */
2215 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2216 decided_to_slp++;
2219 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2221 if (decided_to_slp && dump_enabled_p ())
2223 dump_printf_loc (MSG_NOTE, vect_location,
2224 "Decided to SLP %d instances. Unrolling factor ",
2225 decided_to_slp);
2226 dump_dec (MSG_NOTE, unrolling_factor);
2227 dump_printf (MSG_NOTE, "\n");
2230 return (decided_to_slp > 0);
2234 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2235 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2237 static void
2238 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2240 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2241 imm_use_iterator imm_iter;
2242 gimple *use_stmt;
2243 stmt_vec_info use_vinfo;
2244 slp_tree child;
2245 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2246 int j;
2248 /* Propagate hybrid down the SLP tree. */
2249 if (stype == hybrid)
2251 else if (HYBRID_SLP_STMT (stmt_vinfo))
2252 stype = hybrid;
2253 else
2255 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2256 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2257 /* If we get a pattern stmt here we have to use the LHS of the
2258 original stmt for immediate uses. */
2259 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2260 tree def;
2261 if (gimple_code (stmt) == GIMPLE_PHI)
2262 def = gimple_phi_result (stmt);
2263 else
2264 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2265 if (def)
2266 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2268 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2269 if (!use_vinfo)
2270 continue;
2271 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2272 if (!STMT_SLP_TYPE (use_vinfo)
2273 && (STMT_VINFO_RELEVANT (use_vinfo)
2274 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2275 && !(gimple_code (use_stmt) == GIMPLE_PHI
2276 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2278 if (dump_enabled_p ())
2280 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2281 "def in non-SLP stmt: ");
2282 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2284 stype = hybrid;
2289 if (stype == hybrid
2290 && !HYBRID_SLP_STMT (stmt_vinfo))
2292 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2295 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
2297 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2300 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2301 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2302 vect_detect_hybrid_slp_stmts (child, i, stype);
2305 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2307 static tree
2308 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2310 walk_stmt_info *wi = (walk_stmt_info *)data;
2311 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2313 if (wi->is_lhs)
2314 return NULL_TREE;
2316 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2317 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2319 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2322 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
2324 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2327 return NULL_TREE;
2330 static tree
2331 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2332 walk_stmt_info *wi)
2334 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2335 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2336 /* If the stmt is in a SLP instance then this isn't a reason
2337 to mark use definitions in other SLP instances as hybrid. */
2338 if (! STMT_SLP_TYPE (use_vinfo)
2339 && (STMT_VINFO_RELEVANT (use_vinfo)
2340 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2341 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2342 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2344 else
2345 *handled = true;
2346 return NULL_TREE;
2349 /* Find stmts that must be both vectorized and SLPed. */
2351 void
2352 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2354 unsigned int i;
2355 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2356 slp_instance instance;
2358 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2360 /* First walk all pattern stmt in the loop and mark defs of uses as
2361 hybrid because immediate uses in them are not recorded. */
2362 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2364 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2365 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2366 gsi_next (&gsi))
2368 gimple *stmt = gsi_stmt (gsi);
2369 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2370 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2372 walk_stmt_info wi;
2373 memset (&wi, 0, sizeof (wi));
2374 wi.info = loop_vinfo;
2375 gimple_stmt_iterator gsi2
2376 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2377 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2378 vect_detect_hybrid_slp_1, &wi);
2379 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2380 vect_detect_hybrid_slp_2,
2381 vect_detect_hybrid_slp_1, &wi);
2386 /* Then walk the SLP instance trees marking stmts with uses in
2387 non-SLP stmts as hybrid, also propagating hybrid down the
2388 SLP tree, collecting the above info on-the-fly. */
2389 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2391 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2392 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2393 i, pure_slp);
2398 /* Initialize a bb_vec_info struct for the statements between
2399 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2401 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2402 gimple_stmt_iterator region_end_in,
2403 vec_info_shared *shared)
2404 : vec_info (vec_info::bb, init_cost (NULL), shared),
2405 bb (gsi_bb (region_begin_in)),
2406 region_begin (region_begin_in),
2407 region_end (region_end_in)
2409 gimple_stmt_iterator gsi;
2411 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2412 gsi_next (&gsi))
2414 gimple *stmt = gsi_stmt (gsi);
2415 gimple_set_uid (stmt, 0);
2416 add_stmt (stmt);
2419 bb->aux = this;
2423 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2424 stmts in the basic block. */
2426 _bb_vec_info::~_bb_vec_info ()
2428 for (gimple_stmt_iterator si = region_begin;
2429 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2430 /* Reset region marker. */
2431 gimple_set_uid (gsi_stmt (si), -1);
2433 bb->aux = NULL;
2436 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2437 given then that child nodes have already been processed, and that
2438 their def types currently match their SLP node's def type. */
2440 static bool
2441 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2442 slp_instance node_instance,
2443 stmt_vector_for_cost *cost_vec)
2445 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2446 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2448 /* For BB vectorization vector types are assigned here.
2449 Memory accesses already got their vector type assigned
2450 in vect_analyze_data_refs. */
2451 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2452 if (bb_vinfo
2453 && ! STMT_VINFO_DATA_REF (stmt_info))
2455 tree vectype, nunits_vectype;
2456 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2457 &nunits_vectype))
2458 /* We checked this when building the node. */
2459 gcc_unreachable ();
2460 if (vectype == boolean_type_node)
2462 vectype = vect_get_mask_type_for_stmt (stmt_info);
2463 if (!vectype)
2464 /* vect_get_mask_type_for_stmt has already explained the
2465 failure. */
2466 return false;
2469 stmt_vec_info sstmt_info;
2470 unsigned int i;
2471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2472 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2475 /* Calculate the number of vector statements to be created for the
2476 scalar stmts in this node. For SLP reductions it is equal to the
2477 number of vector statements in the children (which has already been
2478 calculated by the recursive call). Otherwise it is the number of
2479 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2480 VF divided by the number of elements in a vector. */
2481 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2482 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2483 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2484 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2485 else
2487 poly_uint64 vf;
2488 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2489 vf = loop_vinfo->vectorization_factor;
2490 else
2491 vf = 1;
2492 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2493 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2494 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2495 = vect_get_num_vectors (vf * group_size, vectype);
2498 bool dummy;
2499 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2502 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2503 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2505 Return true if the operations are supported. */
2507 static bool
2508 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2509 slp_instance node_instance,
2510 scalar_stmts_to_slp_tree_map_t *visited,
2511 scalar_stmts_to_slp_tree_map_t *lvisited,
2512 stmt_vector_for_cost *cost_vec)
2514 int i, j;
2515 slp_tree child;
2517 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2518 return true;
2520 /* If we already analyzed the exact same set of scalar stmts we're done.
2521 We share the generated vector stmts for those. */
2522 slp_tree *leader;
2523 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2524 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2526 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2527 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2528 return true;
2531 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2532 doesn't result in any issue since we throw away the lvisited set
2533 when we fail. */
2534 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2536 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2537 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2538 visited, lvisited, cost_vec))
2539 return false;
2541 /* Push SLP node def-type to stmt operands. */
2542 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2543 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2544 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2545 = SLP_TREE_DEF_TYPE (child);
2546 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2547 cost_vec);
2548 /* Restore def-types. */
2549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2550 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2551 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2552 = vect_internal_def;
2553 if (! res)
2554 return false;
2556 return true;
2560 /* Analyze statements in SLP instances of VINFO. Return true if the
2561 operations are supported. */
2563 bool
2564 vect_slp_analyze_operations (vec_info *vinfo)
2566 slp_instance instance;
2567 int i;
2569 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2571 scalar_stmts_to_slp_tree_map_t *visited
2572 = new scalar_stmts_to_slp_tree_map_t ();
2573 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2575 scalar_stmts_to_slp_tree_map_t lvisited;
2576 stmt_vector_for_cost cost_vec;
2577 cost_vec.create (2);
2578 if (!vect_slp_analyze_node_operations (vinfo,
2579 SLP_INSTANCE_TREE (instance),
2580 instance, visited, &lvisited,
2581 &cost_vec))
2583 slp_tree node = SLP_INSTANCE_TREE (instance);
2584 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2585 dump_printf_loc (MSG_NOTE, vect_location,
2586 "removing SLP instance operations starting from: ");
2587 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2588 vect_free_slp_instance (instance, false);
2589 vinfo->slp_instances.ordered_remove (i);
2590 cost_vec.release ();
2592 else
2594 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2595 x != lvisited.end(); ++x)
2596 visited->put ((*x).first.copy (), (*x).second);
2597 i++;
2599 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2600 cost_vec.release ();
2603 delete visited;
2605 return !vinfo->slp_instances.is_empty ();
2609 /* Compute the scalar cost of the SLP node NODE and its children
2610 and return it. Do not account defs that are marked in LIFE and
2611 update LIFE according to uses of NODE. */
2613 static void
2614 vect_bb_slp_scalar_cost (basic_block bb,
2615 slp_tree node, vec<bool, va_heap> *life,
2616 stmt_vector_for_cost *cost_vec)
2618 unsigned i;
2619 stmt_vec_info stmt_info;
2620 slp_tree child;
2622 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2624 gimple *stmt = stmt_info->stmt;
2625 vec_info *vinfo = stmt_info->vinfo;
2626 ssa_op_iter op_iter;
2627 def_operand_p def_p;
2629 if ((*life)[i])
2630 continue;
2632 /* If there is a non-vectorized use of the defs then the scalar
2633 stmt is kept live in which case we do not account it or any
2634 required defs in the SLP children in the scalar cost. This
2635 way we make the vectorization more costly when compared to
2636 the scalar cost. */
2637 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2639 imm_use_iterator use_iter;
2640 gimple *use_stmt;
2641 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2642 if (!is_gimple_debug (use_stmt))
2644 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2645 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2647 (*life)[i] = true;
2648 BREAK_FROM_IMM_USE_STMT (use_iter);
2652 if ((*life)[i])
2653 continue;
2655 /* Count scalar stmts only once. */
2656 if (gimple_visited_p (stmt))
2657 continue;
2658 gimple_set_visited (stmt, true);
2660 vect_cost_for_stmt kind;
2661 if (STMT_VINFO_DATA_REF (stmt_info))
2663 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2664 kind = scalar_load;
2665 else
2666 kind = scalar_store;
2668 else
2669 kind = scalar_stmt;
2670 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2673 auto_vec<bool, 20> subtree_life;
2674 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2676 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2678 /* Do not directly pass LIFE to the recursive call, copy it to
2679 confine changes in the callee to the current child/subtree. */
2680 subtree_life.safe_splice (*life);
2681 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2682 subtree_life.truncate (0);
2687 /* Check if vectorization of the basic block is profitable. */
2689 static bool
2690 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2692 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2693 slp_instance instance;
2694 int i;
2695 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2696 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2698 /* Calculate scalar cost. */
2699 stmt_vector_for_cost scalar_costs;
2700 scalar_costs.create (0);
2701 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2703 auto_vec<bool, 20> life;
2704 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2705 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2706 SLP_INSTANCE_TREE (instance),
2707 &life, &scalar_costs);
2709 void *target_cost_data = init_cost (NULL);
2710 add_stmt_costs (target_cost_data, &scalar_costs);
2711 scalar_costs.release ();
2712 unsigned dummy;
2713 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2714 destroy_cost_data (target_cost_data);
2716 /* Unset visited flag. */
2717 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2718 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2719 gimple_set_visited (gsi_stmt (gsi), false);
2721 /* Complete the target-specific cost calculation. */
2722 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2723 &vec_inside_cost, &vec_epilogue_cost);
2725 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2727 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2730 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2731 vec_inside_cost);
2732 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2733 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2734 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2737 /* Vectorization is profitable if its cost is more than the cost of scalar
2738 version. Note that we err on the vector side for equal cost because
2739 the cost estimate is otherwise quite pessimistic (constant uses are
2740 free on the scalar side but cost a load on the vector side for
2741 example). */
2742 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2743 return false;
2745 return true;
2748 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2749 if so and sets fatal to true if failure is independent of
2750 current_vector_size. */
2752 static bb_vec_info
2753 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2754 gimple_stmt_iterator region_end,
2755 vec<data_reference_p> datarefs, int n_stmts,
2756 bool &fatal, vec_info_shared *shared)
2758 bb_vec_info bb_vinfo;
2759 slp_instance instance;
2760 int i;
2761 poly_uint64 min_vf = 2;
2763 /* The first group of checks is independent of the vector size. */
2764 fatal = true;
2766 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2768 if (dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2770 "not vectorized: too many instructions in "
2771 "basic block.\n");
2772 free_data_refs (datarefs);
2773 return NULL;
2776 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2777 if (!bb_vinfo)
2778 return NULL;
2780 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2781 bb_vinfo->shared->save_datarefs ();
2783 /* Analyze the data references. */
2785 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2787 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2789 "not vectorized: unhandled data-ref in basic "
2790 "block.\n");
2792 delete bb_vinfo;
2793 return NULL;
2796 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2798 if (dump_enabled_p ())
2799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2800 "not vectorized: not enough data-refs in "
2801 "basic block.\n");
2803 delete bb_vinfo;
2804 return NULL;
2807 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2809 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2811 "not vectorized: unhandled data access in "
2812 "basic block.\n");
2814 delete bb_vinfo;
2815 return NULL;
2818 /* If there are no grouped stores in the region there is no need
2819 to continue with pattern recog as vect_analyze_slp will fail
2820 anyway. */
2821 if (bb_vinfo->grouped_stores.is_empty ())
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2825 "not vectorized: no grouped stores in "
2826 "basic block.\n");
2828 delete bb_vinfo;
2829 return NULL;
2832 /* While the rest of the analysis below depends on it in some way. */
2833 fatal = false;
2835 vect_pattern_recog (bb_vinfo);
2837 /* Check the SLP opportunities in the basic block, analyze and build SLP
2838 trees. */
2839 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2841 if (dump_enabled_p ())
2843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2844 "Failed to SLP the basic block.\n");
2845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2846 "not vectorized: failed to find SLP opportunities "
2847 "in basic block.\n");
2850 delete bb_vinfo;
2851 return NULL;
2854 vect_record_base_alignments (bb_vinfo);
2856 /* Analyze and verify the alignment of data references and the
2857 dependence in the SLP instances. */
2858 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2860 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2861 || ! vect_slp_analyze_instance_dependence (instance))
2863 slp_tree node = SLP_INSTANCE_TREE (instance);
2864 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2865 dump_printf_loc (MSG_NOTE, vect_location,
2866 "removing SLP instance operations starting from: ");
2867 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2868 vect_free_slp_instance (instance, false);
2869 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2870 continue;
2873 /* Mark all the statements that we want to vectorize as pure SLP and
2874 relevant. */
2875 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2876 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2878 i++;
2880 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2882 delete bb_vinfo;
2883 return NULL;
2886 if (!vect_slp_analyze_operations (bb_vinfo))
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2890 "not vectorized: bad operation in basic block.\n");
2892 delete bb_vinfo;
2893 return NULL;
2896 /* Cost model: check if the vectorization is worthwhile. */
2897 if (!unlimited_cost_model (NULL)
2898 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2902 "not vectorized: vectorization is not "
2903 "profitable.\n");
2905 delete bb_vinfo;
2906 return NULL;
2909 if (dump_enabled_p ())
2910 dump_printf_loc (MSG_NOTE, vect_location,
2911 "Basic block will be vectorized using SLP\n");
2913 return bb_vinfo;
2917 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2918 true if anything in the basic-block was vectorized. */
2920 bool
2921 vect_slp_bb (basic_block bb)
2923 bb_vec_info bb_vinfo;
2924 gimple_stmt_iterator gsi;
2925 bool any_vectorized = false;
2926 auto_vector_sizes vector_sizes;
2928 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2930 /* Autodetect first vector size we try. */
2931 current_vector_size = 0;
2932 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2933 unsigned int next_size = 0;
2935 gsi = gsi_start_bb (bb);
2937 poly_uint64 autodetected_vector_size = 0;
2938 while (1)
2940 if (gsi_end_p (gsi))
2941 break;
2943 gimple_stmt_iterator region_begin = gsi;
2944 vec<data_reference_p> datarefs = vNULL;
2945 int insns = 0;
2947 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2949 gimple *stmt = gsi_stmt (gsi);
2950 if (is_gimple_debug (stmt))
2951 continue;
2952 insns++;
2954 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2955 vect_location = stmt;
2957 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
2958 break;
2961 /* Skip leading unhandled stmts. */
2962 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2964 gsi_next (&gsi);
2965 continue;
2968 gimple_stmt_iterator region_end = gsi;
2970 bool vectorized = false;
2971 bool fatal = false;
2972 vec_info_shared shared;
2973 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2974 datarefs, insns, fatal, &shared);
2975 if (bb_vinfo
2976 && dbg_cnt (vect_slp))
2978 if (dump_enabled_p ())
2979 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2981 bb_vinfo->shared->check_datarefs ();
2982 vect_schedule_slp (bb_vinfo);
2984 unsigned HOST_WIDE_INT bytes;
2985 if (current_vector_size.is_constant (&bytes))
2986 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2987 "basic block part vectorized using %wu byte "
2988 "vectors\n", bytes);
2989 else
2990 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2991 "basic block part vectorized using variable "
2992 "length vectors\n");
2994 vectorized = true;
2996 delete bb_vinfo;
2998 any_vectorized |= vectorized;
3000 if (next_size == 0)
3001 autodetected_vector_size = current_vector_size;
3003 if (next_size < vector_sizes.length ()
3004 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3005 next_size += 1;
3007 if (vectorized
3008 || next_size == vector_sizes.length ()
3009 || known_eq (current_vector_size, 0U)
3010 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3011 vector sizes will fail do not bother iterating. */
3012 || fatal)
3014 if (gsi_end_p (region_end))
3015 break;
3017 /* Skip the unhandled stmt. */
3018 gsi_next (&gsi);
3020 /* And reset vector sizes. */
3021 current_vector_size = 0;
3022 next_size = 0;
3024 else
3026 /* Try the next biggest vector size. */
3027 current_vector_size = vector_sizes[next_size++];
3028 if (dump_enabled_p ())
3030 dump_printf_loc (MSG_NOTE, vect_location,
3031 "***** Re-trying analysis with "
3032 "vector size ");
3033 dump_dec (MSG_NOTE, current_vector_size);
3034 dump_printf (MSG_NOTE, "\n");
3037 /* Start over. */
3038 gsi = region_begin;
3042 return any_vectorized;
3046 /* Return 1 if vector type of boolean constant which is OPNUM
3047 operand in statement STMT_VINFO is a boolean vector. */
3049 static bool
3050 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
3052 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3053 tree op, vectype;
3054 enum vect_def_type dt;
3056 /* For comparison and COND_EXPR type is chosen depending
3057 on the other comparison operand. */
3058 if (TREE_CODE_CLASS (code) == tcc_comparison)
3060 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3061 if (opnum)
3062 op = gimple_assign_rhs1 (stmt);
3063 else
3064 op = gimple_assign_rhs2 (stmt);
3066 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3067 gcc_unreachable ();
3069 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3072 if (code == COND_EXPR)
3074 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3075 tree cond = gimple_assign_rhs1 (stmt);
3077 if (TREE_CODE (cond) == SSA_NAME)
3078 op = cond;
3079 else if (opnum)
3080 op = TREE_OPERAND (cond, 1);
3081 else
3082 op = TREE_OPERAND (cond, 0);
3084 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3085 gcc_unreachable ();
3087 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3090 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3093 /* Build a variable-length vector in which the elements in ELTS are repeated
3094 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3095 RESULTS and add any new instructions to SEQ.
3097 The approach we use is:
3099 (1) Find a vector mode VM with integer elements of mode IM.
3101 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3102 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3103 from small vectors to IM.
3105 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3107 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3108 correct byte contents.
3110 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3112 We try to find the largest IM for which this sequence works, in order
3113 to cut down on the number of interleaves. */
3115 void
3116 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3117 unsigned int nresults, vec<tree> &results)
3119 unsigned int nelts = elts.length ();
3120 tree element_type = TREE_TYPE (vector_type);
3122 /* (1) Find a vector mode VM with integer elements of mode IM. */
3123 unsigned int nvectors = 1;
3124 tree new_vector_type;
3125 tree permutes[2];
3126 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3127 &nvectors, &new_vector_type,
3128 permutes))
3129 gcc_unreachable ();
3131 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3132 unsigned int partial_nelts = nelts / nvectors;
3133 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3135 tree_vector_builder partial_elts;
3136 auto_vec<tree, 32> pieces (nvectors * 2);
3137 pieces.quick_grow (nvectors * 2);
3138 for (unsigned int i = 0; i < nvectors; ++i)
3140 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3141 ELTS' has mode IM. */
3142 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3143 for (unsigned int j = 0; j < partial_nelts; ++j)
3144 partial_elts.quick_push (elts[i * partial_nelts + j]);
3145 tree t = gimple_build_vector (seq, &partial_elts);
3146 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3147 TREE_TYPE (new_vector_type), t);
3149 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3150 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3153 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3154 correct byte contents.
3156 We need to repeat the following operation log2(nvectors) times:
3158 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3159 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3161 However, if each input repeats every N elements and the VF is
3162 a multiple of N * 2, the HI result is the same as the LO. */
3163 unsigned int in_start = 0;
3164 unsigned int out_start = nvectors;
3165 unsigned int hi_start = nvectors / 2;
3166 /* A bound on the number of outputs needed to produce NRESULTS results
3167 in the final iteration. */
3168 unsigned int noutputs_bound = nvectors * nresults;
3169 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3171 noutputs_bound /= 2;
3172 unsigned int limit = MIN (noutputs_bound, nvectors);
3173 for (unsigned int i = 0; i < limit; ++i)
3175 if ((i & 1) != 0
3176 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3177 2 * in_repeat))
3179 pieces[out_start + i] = pieces[out_start + i - 1];
3180 continue;
3183 tree output = make_ssa_name (new_vector_type);
3184 tree input1 = pieces[in_start + (i / 2)];
3185 tree input2 = pieces[in_start + (i / 2) + hi_start];
3186 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3187 input1, input2,
3188 permutes[i & 1]);
3189 gimple_seq_add_stmt (seq, stmt);
3190 pieces[out_start + i] = output;
3192 std::swap (in_start, out_start);
3195 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3196 results.reserve (nresults);
3197 for (unsigned int i = 0; i < nresults; ++i)
3198 if (i < nvectors)
3199 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3200 pieces[in_start + i]));
3201 else
3202 results.quick_push (results[i - nvectors]);
3206 /* For constant and loop invariant defs of SLP_NODE this function returns
3207 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3208 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3209 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3210 REDUC_INDEX is the index of the reduction operand in the statements, unless
3211 it is -1. */
3213 static void
3214 vect_get_constant_vectors (tree op, slp_tree slp_node,
3215 vec<tree> *vec_oprnds,
3216 unsigned int op_num, unsigned int number_of_vectors)
3218 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3219 stmt_vec_info stmt_vinfo = stmts[0];
3220 gimple *stmt = stmt_vinfo->stmt;
3221 unsigned HOST_WIDE_INT nunits;
3222 tree vec_cst;
3223 unsigned j, number_of_places_left_in_vector;
3224 tree vector_type;
3225 tree vop;
3226 int group_size = stmts.length ();
3227 unsigned int vec_num, i;
3228 unsigned number_of_copies = 1;
3229 vec<tree> voprnds;
3230 voprnds.create (number_of_vectors);
3231 bool constant_p, is_store;
3232 tree neutral_op = NULL;
3233 enum tree_code code = gimple_expr_code (stmt);
3234 gimple_seq ctor_seq = NULL;
3235 auto_vec<tree, 16> permute_results;
3237 /* Check if vector type is a boolean vector. */
3238 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3239 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3240 vector_type
3241 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3242 else
3243 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3245 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3247 is_store = true;
3248 op = gimple_assign_rhs1 (stmt);
3250 else
3251 is_store = false;
3253 gcc_assert (op);
3255 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3256 created vectors. It is greater than 1 if unrolling is performed.
3258 For example, we have two scalar operands, s1 and s2 (e.g., group of
3259 strided accesses of size two), while NUNITS is four (i.e., four scalars
3260 of this type can be packed in a vector). The output vector will contain
3261 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3262 will be 2).
3264 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3265 containing the operands.
3267 For example, NUNITS is four as before, and the group size is 8
3268 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3269 {s5, s6, s7, s8}. */
3271 /* When using duplicate_and_interleave, we just need one element for
3272 each scalar statement. */
3273 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3274 nunits = group_size;
3276 number_of_copies = nunits * number_of_vectors / group_size;
3278 number_of_places_left_in_vector = nunits;
3279 constant_p = true;
3280 tree_vector_builder elts (vector_type, nunits, 1);
3281 elts.quick_grow (nunits);
3282 bool place_after_defs = false;
3283 for (j = 0; j < number_of_copies; j++)
3285 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3287 stmt = stmt_vinfo->stmt;
3288 if (is_store)
3289 op = gimple_assign_rhs1 (stmt);
3290 else
3292 switch (code)
3294 case COND_EXPR:
3296 tree cond = gimple_assign_rhs1 (stmt);
3297 if (TREE_CODE (cond) == SSA_NAME)
3298 op = gimple_op (stmt, op_num + 1);
3299 else if (op_num == 0 || op_num == 1)
3300 op = TREE_OPERAND (cond, op_num);
3301 else
3303 if (op_num == 2)
3304 op = gimple_assign_rhs2 (stmt);
3305 else
3306 op = gimple_assign_rhs3 (stmt);
3309 break;
3311 case CALL_EXPR:
3312 op = gimple_call_arg (stmt, op_num);
3313 break;
3315 case LSHIFT_EXPR:
3316 case RSHIFT_EXPR:
3317 case LROTATE_EXPR:
3318 case RROTATE_EXPR:
3319 op = gimple_op (stmt, op_num + 1);
3320 /* Unlike the other binary operators, shifts/rotates have
3321 the shift count being int, instead of the same type as
3322 the lhs, so make sure the scalar is the right type if
3323 we are dealing with vectors of
3324 long long/long/short/char. */
3325 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3326 op = fold_convert (TREE_TYPE (vector_type), op);
3327 break;
3329 default:
3330 op = gimple_op (stmt, op_num + 1);
3331 break;
3335 /* Create 'vect_ = {op0,op1,...,opn}'. */
3336 number_of_places_left_in_vector--;
3337 tree orig_op = op;
3338 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3340 if (CONSTANT_CLASS_P (op))
3342 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3344 /* Can't use VIEW_CONVERT_EXPR for booleans because
3345 of possibly different sizes of scalar value and
3346 vector element. */
3347 if (integer_zerop (op))
3348 op = build_int_cst (TREE_TYPE (vector_type), 0);
3349 else if (integer_onep (op))
3350 op = build_all_ones_cst (TREE_TYPE (vector_type));
3351 else
3352 gcc_unreachable ();
3354 else
3355 op = fold_unary (VIEW_CONVERT_EXPR,
3356 TREE_TYPE (vector_type), op);
3357 gcc_assert (op && CONSTANT_CLASS_P (op));
3359 else
3361 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3362 gimple *init_stmt;
3363 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3365 tree true_val
3366 = build_all_ones_cst (TREE_TYPE (vector_type));
3367 tree false_val
3368 = build_zero_cst (TREE_TYPE (vector_type));
3369 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3370 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3371 op, true_val,
3372 false_val);
3374 else
3376 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3377 op);
3378 init_stmt
3379 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3380 op);
3382 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3383 op = new_temp;
3386 elts[number_of_places_left_in_vector] = op;
3387 if (!CONSTANT_CLASS_P (op))
3388 constant_p = false;
3389 if (TREE_CODE (orig_op) == SSA_NAME
3390 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3391 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3392 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3393 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3394 place_after_defs = true;
3396 if (number_of_places_left_in_vector == 0)
3398 if (constant_p
3399 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3400 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3401 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3402 else
3404 if (vec_oprnds->is_empty ())
3405 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3406 number_of_vectors,
3407 permute_results);
3408 vec_cst = permute_results[number_of_vectors - j - 1];
3410 tree init;
3411 gimple_stmt_iterator gsi;
3412 if (place_after_defs)
3414 stmt_vec_info last_stmt_info
3415 = vect_find_last_scalar_stmt_in_slp (slp_node);
3416 gsi = gsi_for_stmt (last_stmt_info->stmt);
3417 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3418 &gsi);
3420 else
3421 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3422 NULL);
3423 if (ctor_seq != NULL)
3425 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3426 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3427 ctor_seq = NULL;
3429 voprnds.quick_push (init);
3430 place_after_defs = false;
3431 number_of_places_left_in_vector = nunits;
3432 constant_p = true;
3433 elts.new_vector (vector_type, nunits, 1);
3434 elts.quick_grow (nunits);
3439 /* Since the vectors are created in the reverse order, we should invert
3440 them. */
3441 vec_num = voprnds.length ();
3442 for (j = vec_num; j != 0; j--)
3444 vop = voprnds[j - 1];
3445 vec_oprnds->quick_push (vop);
3448 voprnds.release ();
3450 /* In case that VF is greater than the unrolling factor needed for the SLP
3451 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3452 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3453 to replicate the vectors. */
3454 while (number_of_vectors > vec_oprnds->length ())
3456 tree neutral_vec = NULL;
3458 if (neutral_op)
3460 if (!neutral_vec)
3461 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3463 vec_oprnds->quick_push (neutral_vec);
3465 else
3467 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3468 vec_oprnds->quick_push (vop);
3474 /* Get vectorized definitions from SLP_NODE that contains corresponding
3475 vectorized def-stmts. */
3477 static void
3478 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3480 tree vec_oprnd;
3481 stmt_vec_info vec_def_stmt_info;
3482 unsigned int i;
3484 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3486 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3488 gcc_assert (vec_def_stmt_info);
3489 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3490 vec_oprnd = gimple_phi_result (vec_def_phi);
3491 else
3492 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3493 vec_oprnds->quick_push (vec_oprnd);
3498 /* Get vectorized definitions for SLP_NODE.
3499 If the scalar definitions are loop invariants or constants, collect them and
3500 call vect_get_constant_vectors() to create vector stmts.
3501 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3502 must be stored in the corresponding child of SLP_NODE, and we call
3503 vect_get_slp_vect_defs () to retrieve them. */
3505 void
3506 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3507 vec<vec<tree> > *vec_oprnds)
3509 int number_of_vects = 0, i;
3510 unsigned int child_index = 0;
3511 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3512 slp_tree child = NULL;
3513 vec<tree> vec_defs;
3514 tree oprnd;
3515 bool vectorized_defs;
3517 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3518 FOR_EACH_VEC_ELT (ops, i, oprnd)
3520 /* For each operand we check if it has vectorized definitions in a child
3521 node or we need to create them (for invariants and constants). We
3522 check if the LHS of the first stmt of the next child matches OPRND.
3523 If it does, we found the correct child. Otherwise, we call
3524 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3525 to check this child node for the next operand. */
3526 vectorized_defs = false;
3527 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3529 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3531 /* We have to check both pattern and original def, if available. */
3532 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3534 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3535 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3536 tree first_def_op;
3538 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3539 first_def_op = gimple_phi_result (first_def);
3540 else
3541 first_def_op = gimple_get_lhs (first_def_info->stmt);
3542 if (operand_equal_p (oprnd, first_def_op, 0)
3543 || (related
3544 && operand_equal_p (oprnd,
3545 gimple_get_lhs (related->stmt), 0)))
3547 /* The number of vector defs is determined by the number of
3548 vector statements in the node from which we get those
3549 statements. */
3550 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3551 vectorized_defs = true;
3552 child_index++;
3555 else
3556 child_index++;
3559 if (!vectorized_defs)
3561 if (i == 0)
3563 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3564 /* Number of vector stmts was calculated according to LHS in
3565 vect_schedule_slp_instance (), fix it by replacing LHS with
3566 RHS, if necessary. See vect_get_smallest_scalar_type () for
3567 details. */
3568 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3569 &rhs_size_unit);
3570 if (rhs_size_unit != lhs_size_unit)
3572 number_of_vects *= rhs_size_unit;
3573 number_of_vects /= lhs_size_unit;
3578 /* Allocate memory for vectorized defs. */
3579 vec_defs = vNULL;
3580 vec_defs.create (number_of_vects);
3582 /* For reduction defs we call vect_get_constant_vectors (), since we are
3583 looking for initial loop invariant values. */
3584 if (vectorized_defs)
3585 /* The defs are already vectorized. */
3586 vect_get_slp_vect_defs (child, &vec_defs);
3587 else
3588 /* Build vectors from scalar defs. */
3589 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3590 number_of_vects);
3592 vec_oprnds->quick_push (vec_defs);
3596 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3597 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3598 permute statements for the SLP node NODE of the SLP instance
3599 SLP_NODE_INSTANCE. */
3601 bool
3602 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3603 gimple_stmt_iterator *gsi, poly_uint64 vf,
3604 slp_instance slp_node_instance, bool analyze_only,
3605 unsigned *n_perms)
3607 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3608 vec_info *vinfo = stmt_info->vinfo;
3609 int vec_index = 0;
3610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3611 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3612 unsigned int mask_element;
3613 machine_mode mode;
3615 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3616 return false;
3618 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3620 mode = TYPE_MODE (vectype);
3621 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3623 /* Initialize the vect stmts of NODE to properly insert the generated
3624 stmts later. */
3625 if (! analyze_only)
3626 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3627 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3628 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3630 /* Generate permutation masks for every NODE. Number of masks for each NODE
3631 is equal to GROUP_SIZE.
3632 E.g., we have a group of three nodes with three loads from the same
3633 location in each node, and the vector size is 4. I.e., we have a
3634 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3635 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3636 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3639 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3640 The last mask is illegal since we assume two operands for permute
3641 operation, and the mask element values can't be outside that range.
3642 Hence, the last mask must be converted into {2,5,5,5}.
3643 For the first two permutations we need the first and the second input
3644 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3645 we need the second and the third vectors: {b1,c1,a2,b2} and
3646 {c2,a3,b3,c3}. */
3648 int vect_stmts_counter = 0;
3649 unsigned int index = 0;
3650 int first_vec_index = -1;
3651 int second_vec_index = -1;
3652 bool noop_p = true;
3653 *n_perms = 0;
3655 vec_perm_builder mask;
3656 unsigned int nelts_to_build;
3657 unsigned int nvectors_per_build;
3658 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3659 && multiple_p (nunits, group_size));
3660 if (repeating_p)
3662 /* A single vector contains a whole number of copies of the node, so:
3663 (a) all permutes can use the same mask; and
3664 (b) the permutes only need a single vector input. */
3665 mask.new_vector (nunits, group_size, 3);
3666 nelts_to_build = mask.encoded_nelts ();
3667 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3669 else
3671 /* We need to construct a separate mask for each vector statement. */
3672 unsigned HOST_WIDE_INT const_nunits, const_vf;
3673 if (!nunits.is_constant (&const_nunits)
3674 || !vf.is_constant (&const_vf))
3675 return false;
3676 mask.new_vector (const_nunits, const_nunits, 1);
3677 nelts_to_build = const_vf * group_size;
3678 nvectors_per_build = 1;
3681 unsigned int count = mask.encoded_nelts ();
3682 mask.quick_grow (count);
3683 vec_perm_indices indices;
3685 for (unsigned int j = 0; j < nelts_to_build; j++)
3687 unsigned int iter_num = j / group_size;
3688 unsigned int stmt_num = j % group_size;
3689 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3690 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3691 if (repeating_p)
3693 first_vec_index = 0;
3694 mask_element = i;
3696 else
3698 /* Enforced before the loop when !repeating_p. */
3699 unsigned int const_nunits = nunits.to_constant ();
3700 vec_index = i / const_nunits;
3701 mask_element = i % const_nunits;
3702 if (vec_index == first_vec_index
3703 || first_vec_index == -1)
3705 first_vec_index = vec_index;
3707 else if (vec_index == second_vec_index
3708 || second_vec_index == -1)
3710 second_vec_index = vec_index;
3711 mask_element += const_nunits;
3713 else
3715 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3718 "permutation requires at "
3719 "least three vectors ");
3720 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3721 stmt_info->stmt, 0);
3723 gcc_assert (analyze_only);
3724 return false;
3727 gcc_assert (mask_element < 2 * const_nunits);
3730 if (mask_element != index)
3731 noop_p = false;
3732 mask[index++] = mask_element;
3734 if (index == count && !noop_p)
3736 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3737 if (!can_vec_perm_const_p (mode, indices))
3739 if (dump_enabled_p ())
3741 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3742 vect_location,
3743 "unsupported vect permute { ");
3744 for (i = 0; i < count; ++i)
3746 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3747 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3749 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3751 gcc_assert (analyze_only);
3752 return false;
3755 ++*n_perms;
3758 if (index == count)
3760 if (!analyze_only)
3762 tree mask_vec = NULL_TREE;
3764 if (! noop_p)
3765 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3767 if (second_vec_index == -1)
3768 second_vec_index = first_vec_index;
3770 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3772 /* Generate the permute statement if necessary. */
3773 tree first_vec = dr_chain[first_vec_index + ri];
3774 tree second_vec = dr_chain[second_vec_index + ri];
3775 stmt_vec_info perm_stmt_info;
3776 if (! noop_p)
3778 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3779 tree perm_dest
3780 = vect_create_destination_var (gimple_assign_lhs (stmt),
3781 vectype);
3782 perm_dest = make_ssa_name (perm_dest);
3783 gassign *perm_stmt
3784 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3785 first_vec, second_vec,
3786 mask_vec);
3787 perm_stmt_info
3788 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3789 gsi);
3791 else
3792 /* If mask was NULL_TREE generate the requested
3793 identity transform. */
3794 perm_stmt_info = vinfo->lookup_def (first_vec);
3796 /* Store the vector statement in NODE. */
3797 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3798 = perm_stmt_info;
3802 index = 0;
3803 first_vec_index = -1;
3804 second_vec_index = -1;
3805 noop_p = true;
3809 return true;
3812 /* Vectorize SLP instance tree in postorder. */
3814 static void
3815 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3816 scalar_stmts_to_slp_tree_map_t *bst_map)
3818 gimple_stmt_iterator si;
3819 stmt_vec_info stmt_info;
3820 unsigned int group_size;
3821 tree vectype;
3822 int i, j;
3823 slp_tree child;
3825 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3826 return;
3828 /* See if we have already vectorized the same set of stmts and reuse their
3829 vectorized stmts. */
3830 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3832 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3833 return;
3836 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3837 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3838 vect_schedule_slp_instance (child, instance, bst_map);
3840 /* Push SLP node def-type to stmts. */
3841 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3842 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3844 stmt_vec_info child_stmt_info;
3845 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3846 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3849 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3851 /* VECTYPE is the type of the destination. */
3852 vectype = STMT_VINFO_VECTYPE (stmt_info);
3853 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3854 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3856 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3857 if (!SLP_TREE_VEC_STMTS (node).exists ())
3858 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3860 if (dump_enabled_p ())
3862 dump_printf_loc (MSG_NOTE,vect_location,
3863 "------>vectorizing SLP node starting from: ");
3864 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
3867 /* Vectorized stmts go before the last scalar stmt which is where
3868 all uses are ready. */
3869 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3870 si = gsi_for_stmt (last_stmt_info->stmt);
3872 /* Mark the first element of the reduction chain as reduction to properly
3873 transform the node. In the analysis phase only the last element of the
3874 chain is marked as reduction. */
3875 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3876 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3877 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3879 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3880 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3883 /* Handle two-operation SLP nodes by vectorizing the group with
3884 both operations and then performing a merge. */
3885 if (SLP_TREE_TWO_OPERATORS (node))
3887 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3888 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3889 enum tree_code ocode = ERROR_MARK;
3890 stmt_vec_info ostmt_info;
3891 vec_perm_builder mask (group_size, group_size, 1);
3892 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3894 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3895 if (gimple_assign_rhs_code (ostmt) != code0)
3897 mask.quick_push (1);
3898 ocode = gimple_assign_rhs_code (ostmt);
3900 else
3901 mask.quick_push (0);
3903 if (ocode != ERROR_MARK)
3905 vec<stmt_vec_info> v0;
3906 vec<stmt_vec_info> v1;
3907 unsigned j;
3908 tree tmask = NULL_TREE;
3909 vect_transform_stmt (stmt_info, &si, node, instance);
3910 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3911 SLP_TREE_VEC_STMTS (node).truncate (0);
3912 gimple_assign_set_rhs_code (stmt, ocode);
3913 vect_transform_stmt (stmt_info, &si, node, instance);
3914 gimple_assign_set_rhs_code (stmt, code0);
3915 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3916 SLP_TREE_VEC_STMTS (node).truncate (0);
3917 tree meltype = build_nonstandard_integer_type
3918 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3919 tree mvectype = get_same_sized_vectype (meltype, vectype);
3920 unsigned k = 0, l;
3921 for (j = 0; j < v0.length (); ++j)
3923 /* Enforced by vect_build_slp_tree, which rejects variable-length
3924 vectors for SLP_TREE_TWO_OPERATORS. */
3925 unsigned int const_nunits = nunits.to_constant ();
3926 tree_vector_builder melts (mvectype, const_nunits, 1);
3927 for (l = 0; l < const_nunits; ++l)
3929 if (k >= group_size)
3930 k = 0;
3931 tree t = build_int_cst (meltype,
3932 mask[k++] * const_nunits + l);
3933 melts.quick_push (t);
3935 tmask = melts.build ();
3937 /* ??? Not all targets support a VEC_PERM_EXPR with a
3938 constant mask that would translate to a vec_merge RTX
3939 (with their vec_perm_const_ok). We can either not
3940 vectorize in that case or let veclower do its job.
3941 Unfortunately that isn't too great and at least for
3942 plus/minus we'd eventually like to match targets
3943 vector addsub instructions. */
3944 gimple *vstmt;
3945 vstmt = gimple_build_assign (make_ssa_name (vectype),
3946 VEC_PERM_EXPR,
3947 gimple_assign_lhs (v0[j]->stmt),
3948 gimple_assign_lhs (v1[j]->stmt),
3949 tmask);
3950 SLP_TREE_VEC_STMTS (node).quick_push
3951 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
3953 v0.release ();
3954 v1.release ();
3955 return;
3958 vect_transform_stmt (stmt_info, &si, node, instance);
3960 /* Restore stmt def-types. */
3961 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3962 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3964 stmt_vec_info child_stmt_info;
3965 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3966 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3970 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3971 For loop vectorization this is done in vectorizable_call, but for SLP
3972 it needs to be deferred until end of vect_schedule_slp, because multiple
3973 SLP instances may refer to the same scalar stmt. */
3975 static void
3976 vect_remove_slp_scalar_calls (slp_tree node)
3978 gimple *new_stmt;
3979 gimple_stmt_iterator gsi;
3980 int i;
3981 slp_tree child;
3982 tree lhs;
3983 stmt_vec_info stmt_info;
3985 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3986 return;
3988 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3989 vect_remove_slp_scalar_calls (child);
3991 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3993 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3994 if (!stmt || gimple_bb (stmt) == NULL)
3995 continue;
3996 if (is_pattern_stmt_p (stmt_info)
3997 || !PURE_SLP_STMT (stmt_info))
3998 continue;
3999 lhs = gimple_call_lhs (stmt);
4000 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4001 gsi = gsi_for_stmt (stmt);
4002 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4003 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4007 /* Generate vector code for all SLP instances in the loop/basic block. */
4009 void
4010 vect_schedule_slp (vec_info *vinfo)
4012 vec<slp_instance> slp_instances;
4013 slp_instance instance;
4014 unsigned int i;
4016 scalar_stmts_to_slp_tree_map_t *bst_map
4017 = new scalar_stmts_to_slp_tree_map_t ();
4018 slp_instances = vinfo->slp_instances;
4019 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4021 /* Schedule the tree of INSTANCE. */
4022 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4023 instance, bst_map);
4024 if (dump_enabled_p ())
4025 dump_printf_loc (MSG_NOTE, vect_location,
4026 "vectorizing stmts using SLP.\n");
4028 delete bst_map;
4030 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4032 slp_tree root = SLP_INSTANCE_TREE (instance);
4033 stmt_vec_info store_info;
4034 unsigned int j;
4036 /* Remove scalar call stmts. Do not do this for basic-block
4037 vectorization as not all uses may be vectorized.
4038 ??? Why should this be necessary? DCE should be able to
4039 remove the stmts itself.
4040 ??? For BB vectorization we can as well remove scalar
4041 stmts starting from the SLP tree root if they have no
4042 uses. */
4043 if (is_a <loop_vec_info> (vinfo))
4044 vect_remove_slp_scalar_calls (root);
4046 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4047 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4049 if (!STMT_VINFO_DATA_REF (store_info))
4050 break;
4052 store_info = vect_orig_stmt (store_info);
4053 /* Free the attached stmt_vec_info and remove the stmt. */
4054 vinfo->remove_stmt (store_info);