[43/46] Make free_stmt_vec_info take a stmt_vec_info
[official-gcc.git] / gcc / tree-vect-slp.c
blob6b6861165b7fdb484f0e6e15e67de5521009d77d
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
104 slp_tree node;
105 stmt_vec_info stmt_info = scalar_stmts[0];
106 unsigned int nops;
108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else if (is_a <gphi *> (stmt_info->stmt))
117 nops = 0;
118 else
119 return NULL;
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
134 return node;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec<stmt_vec_info> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
176 return oprnds_info;
180 /* Free operands info. */
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 int i;
186 slp_oprnd_info oprnd_info;
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
194 oprnds_info.release ();
198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
200 of the chain. */
203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
204 stmt_vec_info first_stmt_info)
206 stmt_vec_info next_stmt_info = first_stmt_info;
207 int result = 0;
209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
210 return -1;
214 if (next_stmt_info == stmt_info)
215 return result;
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
220 while (next_stmt_info);
222 return -1;
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
231 bool
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
278 return true;
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
300 static int
301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
302 vec<stmt_vec_info> stmts, unsigned stmt_num,
303 vec<slp_oprnd_info> *oprnds_info)
305 stmt_vec_info stmt_info = stmts[stmt_num];
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
308 enum vect_def_type dt = vect_uninitialized_def;
309 bool pattern = false;
310 slp_oprnd_info oprnd_info;
311 int first_op_idx = 1;
312 bool commutative = false;
313 bool first_op_cond = false;
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
319 number_of_oprnds = gimple_call_num_args (stmt);
320 first_op_idx = 3;
322 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
324 enum tree_code code = gimple_assign_rhs_code (stmt);
325 number_of_oprnds = gimple_num_ops (stmt) - 1;
326 /* Swap can only be done for cond_expr if asked to, otherwise we
327 could result in different comparison code to the first stmt. */
328 if (gimple_assign_rhs_code (stmt) == COND_EXPR
329 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
331 first_op_cond = true;
332 number_of_oprnds++;
334 else
335 commutative = commutative_tree_code (code);
337 else
338 return -1;
340 bool swapped = (*swap != 0);
341 gcc_assert (!swapped || first_op_cond);
342 for (i = 0; i < number_of_oprnds; i++)
344 again:
345 if (first_op_cond)
347 /* Map indicating how operands of cond_expr should be swapped. */
348 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
349 int *map = maps[*swap];
351 if (i < 2)
352 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
353 first_op_idx), map[i]);
354 else
355 oprnd = gimple_op (stmt_info->stmt, map[i]);
357 else
358 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
360 oprnd_info = (*oprnds_info)[i];
362 stmt_vec_info def_stmt_info;
363 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
365 if (dump_enabled_p ())
367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368 "Build SLP failed: can't analyze def for ");
369 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
370 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
373 return -1;
376 /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
377 the def stmt from the pattern. Check that all the stmts of the
378 node are in the pattern. */
379 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
381 pattern = true;
382 if (!first && !oprnd_info->first_pattern
383 /* Allow different pattern state for the defs of the
384 first stmt in reduction chains. */
385 && (oprnd_info->first_dt != vect_reduction_def
386 || (!second && !oprnd_info->second_pattern)))
388 if (i == 0
389 && !swapped
390 && commutative)
392 swapped = true;
393 goto again;
396 if (dump_enabled_p ())
398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
399 "Build SLP failed: some of the stmts"
400 " are in a pattern, and others are not ");
401 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
402 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
405 return 1;
408 dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
410 if (dt == vect_unknown_def_type)
412 if (dump_enabled_p ())
413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
414 "Unsupported pattern.\n");
415 return -1;
418 switch (gimple_code (def_stmt_info->stmt))
420 case GIMPLE_PHI:
421 case GIMPLE_ASSIGN:
422 break;
424 default:
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "unsupported defining stmt:\n");
428 return -1;
432 if (second)
433 oprnd_info->second_pattern = pattern;
435 if (first)
437 oprnd_info->first_dt = dt;
438 oprnd_info->first_pattern = pattern;
439 oprnd_info->first_op_type = TREE_TYPE (oprnd);
441 else
443 /* Not first stmt of the group, check that the def-stmt/s match
444 the def-stmt/s of the first stmt. Allow different definition
445 types for reduction chains: the first stmt must be a
446 vect_reduction_def (a phi node), and the rest
447 vect_internal_def. */
448 tree type = TREE_TYPE (oprnd);
449 if ((oprnd_info->first_dt != dt
450 && !(oprnd_info->first_dt == vect_reduction_def
451 && dt == vect_internal_def)
452 && !((oprnd_info->first_dt == vect_external_def
453 || oprnd_info->first_dt == vect_constant_def)
454 && (dt == vect_external_def
455 || dt == vect_constant_def)))
456 || !types_compatible_p (oprnd_info->first_op_type, type))
458 /* Try swapping operands if we got a mismatch. */
459 if (i == 0
460 && !swapped
461 && commutative)
463 swapped = true;
464 goto again;
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
469 "Build SLP failed: different types\n");
471 return 1;
473 if ((dt == vect_constant_def
474 || dt == vect_external_def)
475 && !current_vector_size.is_constant ()
476 && (TREE_CODE (type) == BOOLEAN_TYPE
477 || !can_duplicate_and_interleave_p (stmts.length (),
478 TYPE_MODE (type))))
480 if (dump_enabled_p ())
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "Build SLP failed: invalid type of def "
484 "for variable-length SLP ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
486 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
488 return -1;
492 /* Check the types of the definitions. */
493 switch (dt)
495 case vect_constant_def:
496 case vect_external_def:
497 break;
499 case vect_reduction_def:
500 case vect_induction_def:
501 case vect_internal_def:
502 oprnd_info->def_stmts.quick_push (def_stmt_info);
503 break;
505 default:
506 /* FORNOW: Not supported. */
507 if (dump_enabled_p ())
509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
510 "Build SLP failed: illegal type of def ");
511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
515 return -1;
519 /* Swap operands. */
520 if (swapped)
522 /* If there are already uses of this stmt in a SLP instance then
523 we've committed to the operand order and can't swap it. */
524 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: cannot swap operands of "
530 "shared stmt ");
531 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
532 stmt_info->stmt, 0);
534 return -1;
537 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
538 if (first_op_cond)
540 tree cond = gimple_assign_rhs1 (stmt);
541 enum tree_code code = TREE_CODE (cond);
543 /* Swap. */
544 if (*swap == 1)
546 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
547 &TREE_OPERAND (cond, 1));
548 TREE_SET_CODE (cond, swap_tree_comparison (code));
550 /* Invert. */
551 else
553 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
554 gimple_assign_rhs3_ptr (stmt));
555 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
556 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
557 gcc_assert (code != ERROR_MARK);
558 TREE_SET_CODE (cond, code);
561 else
562 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
563 gimple_assign_rhs2_ptr (stmt));
564 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "swapped operands to match def types in ");
568 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
572 *swap = swapped;
573 return 0;
576 /* Return true if call statements CALL1 and CALL2 are similar enough
577 to be combined into the same SLP group. */
579 static bool
580 compatible_calls_p (gcall *call1, gcall *call2)
582 unsigned int nargs = gimple_call_num_args (call1);
583 if (nargs != gimple_call_num_args (call2))
584 return false;
586 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
587 return false;
589 if (gimple_call_internal_p (call1))
591 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
592 TREE_TYPE (gimple_call_lhs (call2))))
593 return false;
594 for (unsigned int i = 0; i < nargs; ++i)
595 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
596 TREE_TYPE (gimple_call_arg (call2, i))))
597 return false;
599 else
601 if (!operand_equal_p (gimple_call_fn (call1),
602 gimple_call_fn (call2), 0))
603 return false;
605 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
606 return false;
608 return true;
611 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
612 caller's attempt to find the vector type in STMT_INFO with the narrowest
613 element type. Return true if VECTYPE is nonnull and if it is valid
614 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
615 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
616 vect_build_slp_tree. */
618 static bool
619 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
620 tree vectype, poly_uint64 *max_nunits)
622 if (!vectype)
624 if (dump_enabled_p ())
626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
627 "Build SLP failed: unsupported data-type in ");
628 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
629 stmt_info->stmt, 0);
630 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
632 /* Fatal mismatch. */
633 return false;
636 /* If populating the vector type requires unrolling then fail
637 before adjusting *max_nunits for basic-block vectorization. */
638 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
639 unsigned HOST_WIDE_INT const_nunits;
640 if (STMT_VINFO_BB_VINFO (stmt_info)
641 && (!nunits.is_constant (&const_nunits)
642 || const_nunits > group_size))
644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
645 "Build SLP failed: unrolling required "
646 "in basic block SLP\n");
647 /* Fatal mismatch. */
648 return false;
651 /* In case of multiple types we need to detect the smallest type. */
652 vect_update_max_nunits (max_nunits, vectype);
653 return true;
656 /* STMTS is a group of GROUP_SIZE SLP statements in which some
657 statements do the same operation as the first statement and in which
658 the others do ALT_STMT_CODE. Return true if we can take one vector
659 of the first operation and one vector of the second and permute them
660 to get the required result. VECTYPE is the type of the vector that
661 would be permuted. */
663 static bool
664 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
665 unsigned int group_size, tree vectype,
666 tree_code alt_stmt_code)
668 unsigned HOST_WIDE_INT count;
669 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
670 return false;
672 vec_perm_builder sel (count, count, 1);
673 for (unsigned int i = 0; i < count; ++i)
675 unsigned int elt = i;
676 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
677 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
678 elt += count;
679 sel.quick_push (elt);
681 vec_perm_indices indices (sel, 2, count);
682 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
685 /* Verify if the scalar stmts STMTS are isomorphic, require data
686 permutation or are of unsupported types of operation. Return
687 true if they are, otherwise return false and indicate in *MATCHES
688 which stmts are not isomorphic to the first one. If MATCHES[0]
689 is false then this indicates the comparison could not be
690 carried out or the stmts will never be vectorized by SLP.
692 Note COND_EXPR is possibly ismorphic to another one after swapping its
693 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
694 the first stmt by swapping the two operands of comparison; set SWAP[i]
695 to 2 if stmt I is isormorphic to the first stmt by inverting the code
696 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
697 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
699 static bool
700 vect_build_slp_tree_1 (unsigned char *swap,
701 vec<stmt_vec_info> stmts, unsigned int group_size,
702 poly_uint64 *max_nunits, bool *matches,
703 bool *two_operators)
705 unsigned int i;
706 stmt_vec_info first_stmt_info = stmts[0];
707 enum tree_code first_stmt_code = ERROR_MARK;
708 enum tree_code alt_stmt_code = ERROR_MARK;
709 enum tree_code rhs_code = ERROR_MARK;
710 enum tree_code first_cond_code = ERROR_MARK;
711 tree lhs;
712 bool need_same_oprnds = false;
713 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
714 optab optab;
715 int icode;
716 machine_mode optab_op2_mode;
717 machine_mode vec_mode;
718 stmt_vec_info first_load = NULL, prev_first_load = NULL;
720 /* For every stmt in NODE find its def stmt/s. */
721 stmt_vec_info stmt_info;
722 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
724 gimple *stmt = stmt_info->stmt;
725 swap[i] = 0;
726 matches[i] = false;
728 if (dump_enabled_p ())
730 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
731 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
734 /* Fail to vectorize statements marked as unvectorizable. */
735 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740 "Build SLP failed: unvectorizable statement ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
743 /* Fatal mismatch. */
744 matches[0] = false;
745 return false;
748 lhs = gimple_get_lhs (stmt);
749 if (lhs == NULL_TREE)
751 if (dump_enabled_p ())
753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
754 "Build SLP failed: not GIMPLE_ASSIGN nor "
755 "GIMPLE_CALL ");
756 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
758 /* Fatal mismatch. */
759 matches[0] = false;
760 return false;
763 tree nunits_vectype;
764 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
765 &nunits_vectype)
766 || (nunits_vectype
767 && !vect_record_max_nunits (stmt_info, group_size,
768 nunits_vectype, max_nunits)))
770 /* Fatal mismatch. */
771 matches[0] = false;
772 return false;
775 gcc_assert (vectype);
777 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
779 rhs_code = CALL_EXPR;
780 if ((gimple_call_internal_p (call_stmt)
781 && (!vectorizable_internal_fn_p
782 (gimple_call_internal_fn (call_stmt))))
783 || gimple_call_tail_p (call_stmt)
784 || gimple_call_noreturn_p (call_stmt)
785 || !gimple_call_nothrow_p (call_stmt)
786 || gimple_call_chain (call_stmt))
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
791 "Build SLP failed: unsupported call type ");
792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
793 call_stmt, 0);
795 /* Fatal mismatch. */
796 matches[0] = false;
797 return false;
800 else
801 rhs_code = gimple_assign_rhs_code (stmt);
803 /* Check the operation. */
804 if (i == 0)
806 first_stmt_code = rhs_code;
808 /* Shift arguments should be equal in all the packed stmts for a
809 vector shift with scalar shift operand. */
810 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
811 || rhs_code == LROTATE_EXPR
812 || rhs_code == RROTATE_EXPR)
814 if (vectype == boolean_type_node)
816 if (dump_enabled_p ())
817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
818 "Build SLP failed: shift of a"
819 " boolean.\n");
820 /* Fatal mismatch. */
821 matches[0] = false;
822 return false;
825 vec_mode = TYPE_MODE (vectype);
827 /* First see if we have a vector/vector shift. */
828 optab = optab_for_tree_code (rhs_code, vectype,
829 optab_vector);
831 if (!optab
832 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
834 /* No vector/vector shift, try for a vector/scalar shift. */
835 optab = optab_for_tree_code (rhs_code, vectype,
836 optab_scalar);
838 if (!optab)
840 if (dump_enabled_p ())
841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
842 "Build SLP failed: no optab.\n");
843 /* Fatal mismatch. */
844 matches[0] = false;
845 return false;
847 icode = (int) optab_handler (optab, vec_mode);
848 if (icode == CODE_FOR_nothing)
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
852 "Build SLP failed: "
853 "op not supported by target.\n");
854 /* Fatal mismatch. */
855 matches[0] = false;
856 return false;
858 optab_op2_mode = insn_data[icode].operand[2].mode;
859 if (!VECTOR_MODE_P (optab_op2_mode))
861 need_same_oprnds = true;
862 first_op1 = gimple_assign_rhs2 (stmt);
866 else if (rhs_code == WIDEN_LSHIFT_EXPR)
868 need_same_oprnds = true;
869 first_op1 = gimple_assign_rhs2 (stmt);
872 else
874 if (first_stmt_code != rhs_code
875 && alt_stmt_code == ERROR_MARK)
876 alt_stmt_code = rhs_code;
877 if (first_stmt_code != rhs_code
878 && (first_stmt_code != IMAGPART_EXPR
879 || rhs_code != REALPART_EXPR)
880 && (first_stmt_code != REALPART_EXPR
881 || rhs_code != IMAGPART_EXPR)
882 /* Handle mismatches in plus/minus by computing both
883 and merging the results. */
884 && !((first_stmt_code == PLUS_EXPR
885 || first_stmt_code == MINUS_EXPR)
886 && (alt_stmt_code == PLUS_EXPR
887 || alt_stmt_code == MINUS_EXPR)
888 && rhs_code == alt_stmt_code)
889 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
890 && (first_stmt_code == ARRAY_REF
891 || first_stmt_code == BIT_FIELD_REF
892 || first_stmt_code == INDIRECT_REF
893 || first_stmt_code == COMPONENT_REF
894 || first_stmt_code == MEM_REF)))
896 if (dump_enabled_p ())
898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
899 "Build SLP failed: different operation "
900 "in stmt ");
901 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
903 "original stmt ");
904 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
905 first_stmt_info->stmt, 0);
907 /* Mismatch. */
908 continue;
911 if (need_same_oprnds
912 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
914 if (dump_enabled_p ())
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "Build SLP failed: different shift "
918 "arguments in ");
919 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
921 /* Mismatch. */
922 continue;
925 if (rhs_code == CALL_EXPR)
927 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
928 as_a <gcall *> (stmt)))
930 if (dump_enabled_p ())
932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
933 "Build SLP failed: different calls in ");
934 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
935 stmt, 0);
937 /* Mismatch. */
938 continue;
943 /* Grouped store or load. */
944 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
946 if (REFERENCE_CLASS_P (lhs))
948 /* Store. */
951 else
953 /* Load. */
954 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
955 if (prev_first_load)
957 /* Check that there are no loads from different interleaving
958 chains in the same node. */
959 if (prev_first_load != first_load)
961 if (dump_enabled_p ())
963 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
964 vect_location,
965 "Build SLP failed: different "
966 "interleaving chains in one node ");
967 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
968 stmt, 0);
970 /* Mismatch. */
971 continue;
974 else
975 prev_first_load = first_load;
977 } /* Grouped access. */
978 else
980 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
982 /* Not grouped load. */
983 if (dump_enabled_p ())
985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
986 "Build SLP failed: not grouped load ");
987 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
990 /* FORNOW: Not grouped loads are not supported. */
991 /* Fatal mismatch. */
992 matches[0] = false;
993 return false;
996 /* Not memory operation. */
997 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
998 && TREE_CODE_CLASS (rhs_code) != tcc_unary
999 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1000 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1001 && rhs_code != CALL_EXPR)
1003 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1006 "Build SLP failed: operation");
1007 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1008 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1010 /* Fatal mismatch. */
1011 matches[0] = false;
1012 return false;
1015 if (rhs_code == COND_EXPR)
1017 tree cond_expr = gimple_assign_rhs1 (stmt);
1018 enum tree_code cond_code = TREE_CODE (cond_expr);
1019 enum tree_code swap_code = ERROR_MARK;
1020 enum tree_code invert_code = ERROR_MARK;
1022 if (i == 0)
1023 first_cond_code = TREE_CODE (cond_expr);
1024 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1026 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1027 swap_code = swap_tree_comparison (cond_code);
1028 invert_code = invert_tree_comparison (cond_code, honor_nans);
1031 if (first_cond_code == cond_code)
1033 /* Isomorphic can be achieved by swapping. */
1034 else if (first_cond_code == swap_code)
1035 swap[i] = 1;
1036 /* Isomorphic can be achieved by inverting. */
1037 else if (first_cond_code == invert_code)
1038 swap[i] = 2;
1039 else
1041 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1044 "Build SLP failed: different"
1045 " operation");
1046 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1047 stmt, 0);
1049 /* Mismatch. */
1050 continue;
1055 matches[i] = true;
1058 for (i = 0; i < group_size; ++i)
1059 if (!matches[i])
1060 return false;
1062 /* If we allowed a two-operation SLP node verify the target can cope
1063 with the permute we are going to use. */
1064 if (alt_stmt_code != ERROR_MARK
1065 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1067 if (vectype == boolean_type_node
1068 || !vect_two_operations_perm_ok_p (stmts, group_size,
1069 vectype, alt_stmt_code))
1071 for (i = 0; i < group_size; ++i)
1072 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1074 matches[i] = false;
1075 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1078 "Build SLP failed: different operation "
1079 "in stmt ");
1080 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1081 stmts[i]->stmt, 0);
1082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1083 "original stmt ");
1084 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1085 first_stmt_info->stmt, 0);
1088 return false;
1090 *two_operators = true;
1093 return true;
1096 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1097 Note we never remove apart from at destruction time so we do not
1098 need a special value for deleted that differs from empty. */
1099 struct bst_traits
1101 typedef vec <stmt_vec_info> value_type;
1102 typedef vec <stmt_vec_info> compare_type;
1103 static inline hashval_t hash (value_type);
1104 static inline bool equal (value_type existing, value_type candidate);
1105 static inline bool is_empty (value_type x) { return !x.exists (); }
1106 static inline bool is_deleted (value_type x) { return !x.exists (); }
1107 static inline void mark_empty (value_type &x) { x.release (); }
1108 static inline void mark_deleted (value_type &x) { x.release (); }
1109 static inline void remove (value_type &x) { x.release (); }
1111 inline hashval_t
1112 bst_traits::hash (value_type x)
1114 inchash::hash h;
1115 for (unsigned i = 0; i < x.length (); ++i)
1116 h.add_int (gimple_uid (x[i]->stmt));
1117 return h.end ();
1119 inline bool
1120 bst_traits::equal (value_type existing, value_type candidate)
1122 if (existing.length () != candidate.length ())
1123 return false;
1124 for (unsigned i = 0; i < existing.length (); ++i)
1125 if (existing[i] != candidate[i])
1126 return false;
1127 return true;
1130 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1131 static scalar_stmts_set_t *bst_fail;
1133 typedef hash_map <vec <gimple *>, slp_tree,
1134 simple_hashmap_traits <bst_traits, slp_tree> >
1135 scalar_stmts_to_slp_tree_map_t;
1137 static slp_tree
1138 vect_build_slp_tree_2 (vec_info *vinfo,
1139 vec<stmt_vec_info> stmts, unsigned int group_size,
1140 poly_uint64 *max_nunits,
1141 vec<slp_tree> *loads,
1142 bool *matches, unsigned *npermutes, unsigned *tree_size,
1143 unsigned max_tree_size);
1145 static slp_tree
1146 vect_build_slp_tree (vec_info *vinfo,
1147 vec<stmt_vec_info> stmts, unsigned int group_size,
1148 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1149 bool *matches, unsigned *npermutes, unsigned *tree_size,
1150 unsigned max_tree_size)
1152 if (bst_fail->contains (stmts))
1153 return NULL;
1154 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1155 loads, matches, npermutes, tree_size,
1156 max_tree_size);
1157 /* When SLP build fails for stmts record this, otherwise SLP build
1158 can be exponential in time when we allow to construct parts from
1159 scalars, see PR81723. */
1160 if (! res)
1162 vec <stmt_vec_info> x;
1163 x.create (stmts.length ());
1164 x.splice (stmts);
1165 bst_fail->add (x);
1167 return res;
1170 /* Recursively build an SLP tree starting from NODE.
1171 Fail (and return a value not equal to zero) if def-stmts are not
1172 isomorphic, require data permutation or are of unsupported types of
1173 operation. Otherwise, return 0.
1174 The value returned is the depth in the SLP tree where a mismatch
1175 was found. */
1177 static slp_tree
1178 vect_build_slp_tree_2 (vec_info *vinfo,
1179 vec<stmt_vec_info> stmts, unsigned int group_size,
1180 poly_uint64 *max_nunits,
1181 vec<slp_tree> *loads,
1182 bool *matches, unsigned *npermutes, unsigned *tree_size,
1183 unsigned max_tree_size)
1185 unsigned nops, i, this_tree_size = 0;
1186 poly_uint64 this_max_nunits = *max_nunits;
1187 slp_tree node;
1189 matches[0] = false;
1191 stmt_vec_info stmt_info = stmts[0];
1192 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1193 nops = gimple_call_num_args (stmt);
1194 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1196 nops = gimple_num_ops (stmt) - 1;
1197 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1198 nops++;
1200 else if (is_a <gphi *> (stmt_info->stmt))
1201 nops = 0;
1202 else
1203 return NULL;
1205 /* If the SLP node is a PHI (induction or reduction), terminate
1206 the recursion. */
1207 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1209 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1210 tree vectype = get_vectype_for_scalar_type (scalar_type);
1211 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1212 return NULL;
1214 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1215 /* Induction from different IVs is not supported. */
1216 if (def_type == vect_induction_def)
1218 stmt_vec_info other_info;
1219 FOR_EACH_VEC_ELT (stmts, i, other_info)
1220 if (stmt_info != other_info)
1221 return NULL;
1223 else
1225 /* Else def types have to match. */
1226 stmt_vec_info other_info;
1227 FOR_EACH_VEC_ELT (stmts, i, other_info)
1229 /* But for reduction chains only check on the first stmt. */
1230 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1231 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1232 continue;
1233 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1234 return NULL;
1237 node = vect_create_new_slp_node (stmts);
1238 return node;
1242 bool two_operators = false;
1243 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1244 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1245 &this_max_nunits, matches, &two_operators))
1246 return NULL;
1248 /* If the SLP node is a load, terminate the recursion. */
1249 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1250 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1252 *max_nunits = this_max_nunits;
1253 node = vect_create_new_slp_node (stmts);
1254 loads->safe_push (node);
1255 return node;
1258 /* Get at the operands, verifying they are compatible. */
1259 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1260 slp_oprnd_info oprnd_info;
1261 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1263 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1264 stmts, i, &oprnds_info);
1265 if (res != 0)
1266 matches[(res == -1) ? 0 : i] = false;
1267 if (!matches[0])
1268 break;
1270 for (i = 0; i < group_size; ++i)
1271 if (!matches[i])
1273 vect_free_oprnd_info (oprnds_info);
1274 return NULL;
1277 auto_vec<slp_tree, 4> children;
1278 auto_vec<slp_tree> this_loads;
1280 stmt_info = stmts[0];
1282 if (tree_size)
1283 max_tree_size -= *tree_size;
1285 /* Create SLP_TREE nodes for the definition node/s. */
1286 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1288 slp_tree child;
1289 unsigned old_nloads = this_loads.length ();
1290 unsigned old_tree_size = this_tree_size;
1291 unsigned int j;
1293 if (oprnd_info->first_dt != vect_internal_def
1294 && oprnd_info->first_dt != vect_reduction_def
1295 && oprnd_info->first_dt != vect_induction_def)
1296 continue;
1298 if (++this_tree_size > max_tree_size)
1300 FOR_EACH_VEC_ELT (children, j, child)
1301 vect_free_slp_tree (child, false);
1302 vect_free_oprnd_info (oprnds_info);
1303 return NULL;
1306 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1307 group_size, &this_max_nunits,
1308 &this_loads, matches, npermutes,
1309 &this_tree_size,
1310 max_tree_size)) != NULL)
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
1314 if (!SLP_TREE_CHILDREN (child).is_empty ()
1315 /* ??? Rejecting patterns this way doesn't work. We'd have to
1316 do extra work to cancel the pattern so the uses see the
1317 scalar version. */
1318 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1320 slp_tree grandchild;
1322 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1323 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1324 break;
1325 if (!grandchild)
1327 /* Roll back. */
1328 this_loads.truncate (old_nloads);
1329 this_tree_size = old_tree_size;
1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1331 vect_free_slp_tree (grandchild, false);
1332 SLP_TREE_CHILDREN (child).truncate (0);
1334 dump_printf_loc (MSG_NOTE, vect_location,
1335 "Building parent vector operands from "
1336 "scalars instead\n");
1337 oprnd_info->def_stmts = vNULL;
1338 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1339 children.safe_push (child);
1340 continue;
1344 oprnd_info->def_stmts = vNULL;
1345 children.safe_push (child);
1346 continue;
1349 /* If the SLP build failed fatally and we analyze a basic-block
1350 simply treat nodes we fail to build as externally defined
1351 (and thus build vectors from the scalar defs).
1352 The cost model will reject outright expensive cases.
1353 ??? This doesn't treat cases where permutation ultimatively
1354 fails (or we don't try permutation below). Ideally we'd
1355 even compute a permutation that will end up with the maximum
1356 SLP tree size... */
1357 if (is_a <bb_vec_info> (vinfo)
1358 && !matches[0]
1359 /* ??? Rejecting patterns this way doesn't work. We'd have to
1360 do extra work to cancel the pattern so the uses see the
1361 scalar version. */
1362 && !is_pattern_stmt_p (stmt_info))
1364 dump_printf_loc (MSG_NOTE, vect_location,
1365 "Building vector operands from scalars\n");
1366 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1367 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1368 children.safe_push (child);
1369 oprnd_info->def_stmts = vNULL;
1370 continue;
1373 /* If the SLP build for operand zero failed and operand zero
1374 and one can be commutated try that for the scalar stmts
1375 that failed the match. */
1376 if (i == 0
1377 /* A first scalar stmt mismatch signals a fatal mismatch. */
1378 && matches[0]
1379 /* ??? For COND_EXPRs we can swap the comparison operands
1380 as well as the arms under some constraints. */
1381 && nops == 2
1382 && oprnds_info[1]->first_dt == vect_internal_def
1383 && is_gimple_assign (stmt_info->stmt)
1384 /* Do so only if the number of not successful permutes was nor more
1385 than a cut-ff as re-trying the recursive match on
1386 possibly each level of the tree would expose exponential
1387 behavior. */
1388 && *npermutes < 4)
1390 /* See whether we can swap the matching or the non-matching
1391 stmt operands. */
1392 bool swap_not_matching = true;
1395 for (j = 0; j < group_size; ++j)
1397 if (matches[j] != !swap_not_matching)
1398 continue;
1399 stmt_vec_info stmt_info = stmts[j];
1400 /* Verify if we can swap operands of this stmt. */
1401 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1402 if (!stmt
1403 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1405 if (!swap_not_matching)
1406 goto fail;
1407 swap_not_matching = false;
1408 break;
1410 /* Verify if we can safely swap or if we committed to a
1411 specific operand order already.
1412 ??? Instead of modifying GIMPLE stmts here we could
1413 record whether we want to swap operands in the SLP
1414 node and temporarily do that when processing it
1415 (or wrap operand accessors in a helper). */
1416 else if (swap[j] != 0
1417 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1419 if (!swap_not_matching)
1421 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1424 vect_location,
1425 "Build SLP failed: cannot swap "
1426 "operands of shared stmt ");
1427 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1428 TDF_SLIM, stmts[j]->stmt, 0);
1430 goto fail;
1432 swap_not_matching = false;
1433 break;
1437 while (j != group_size);
1439 /* Swap mismatched definition stmts. */
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "Re-trying with swapped operands of stmts ");
1442 for (j = 0; j < group_size; ++j)
1443 if (matches[j] == !swap_not_matching)
1445 std::swap (oprnds_info[0]->def_stmts[j],
1446 oprnds_info[1]->def_stmts[j]);
1447 dump_printf (MSG_NOTE, "%d ", j);
1449 dump_printf (MSG_NOTE, "\n");
1450 /* And try again with scratch 'matches' ... */
1451 bool *tem = XALLOCAVEC (bool, group_size);
1452 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1453 group_size, &this_max_nunits,
1454 &this_loads, tem, npermutes,
1455 &this_tree_size,
1456 max_tree_size)) != NULL)
1458 /* ... so if successful we can apply the operand swapping
1459 to the GIMPLE IL. This is necessary because for example
1460 vect_get_slp_defs uses operand indexes and thus expects
1461 canonical operand order. This is also necessary even
1462 if we end up building the operand from scalars as
1463 we'll continue to process swapped operand two. */
1464 for (j = 0; j < group_size; ++j)
1465 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1466 for (j = 0; j < group_size; ++j)
1467 if (matches[j] == !swap_not_matching)
1469 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1470 /* Avoid swapping operands twice. */
1471 if (gimple_plf (stmt, GF_PLF_1))
1472 continue;
1473 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1474 gimple_assign_rhs2_ptr (stmt));
1475 gimple_set_plf (stmt, GF_PLF_1, true);
1477 /* Verify we swap all duplicates or none. */
1478 if (flag_checking)
1479 for (j = 0; j < group_size; ++j)
1480 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1481 == (matches[j] == !swap_not_matching));
1483 /* If we have all children of child built up from scalars then
1484 just throw that away and build it up this node from scalars. */
1485 if (!SLP_TREE_CHILDREN (child).is_empty ()
1486 /* ??? Rejecting patterns this way doesn't work. We'd have
1487 to do extra work to cancel the pattern so the uses see the
1488 scalar version. */
1489 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1491 unsigned int j;
1492 slp_tree grandchild;
1494 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1495 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1496 break;
1497 if (!grandchild)
1499 /* Roll back. */
1500 this_loads.truncate (old_nloads);
1501 this_tree_size = old_tree_size;
1502 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1503 vect_free_slp_tree (grandchild, false);
1504 SLP_TREE_CHILDREN (child).truncate (0);
1506 dump_printf_loc (MSG_NOTE, vect_location,
1507 "Building parent vector operands from "
1508 "scalars instead\n");
1509 oprnd_info->def_stmts = vNULL;
1510 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1511 children.safe_push (child);
1512 continue;
1516 oprnd_info->def_stmts = vNULL;
1517 children.safe_push (child);
1518 continue;
1521 ++*npermutes;
1524 fail:
1525 gcc_assert (child == NULL);
1526 FOR_EACH_VEC_ELT (children, j, child)
1527 vect_free_slp_tree (child, false);
1528 vect_free_oprnd_info (oprnds_info);
1529 return NULL;
1532 vect_free_oprnd_info (oprnds_info);
1534 if (tree_size)
1535 *tree_size += this_tree_size;
1536 *max_nunits = this_max_nunits;
1537 loads->safe_splice (this_loads);
1539 node = vect_create_new_slp_node (stmts);
1540 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1541 SLP_TREE_CHILDREN (node).splice (children);
1542 return node;
1545 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1547 static void
1548 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1549 slp_tree node)
1551 int i;
1552 stmt_vec_info stmt_info;
1553 slp_tree child;
1555 dump_printf_loc (dump_kind, loc, "node%s\n",
1556 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1557 ? " (external)" : "");
1558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1560 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1561 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt_info->stmt, 0);
1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1564 vect_print_slp_tree (dump_kind, loc, child);
1568 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1569 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1570 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1571 stmts in NODE are to be marked. */
1573 static void
1574 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1576 int i;
1577 stmt_vec_info stmt_info;
1578 slp_tree child;
1580 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1581 return;
1583 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1584 if (j < 0 || i == j)
1585 STMT_SLP_TYPE (stmt_info) = mark;
1587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1588 vect_mark_slp_stmts (child, mark, j);
1592 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1594 static void
1595 vect_mark_slp_stmts_relevant (slp_tree node)
1597 int i;
1598 stmt_vec_info stmt_info;
1599 slp_tree child;
1601 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1602 return;
1604 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1606 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1607 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1608 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1611 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1612 vect_mark_slp_stmts_relevant (child);
1616 /* Rearrange the statements of NODE according to PERMUTATION. */
1618 static void
1619 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1620 vec<unsigned> permutation)
1622 stmt_vec_info stmt_info;
1623 vec<stmt_vec_info> tmp_stmts;
1624 unsigned int i;
1625 slp_tree child;
1627 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1628 vect_slp_rearrange_stmts (child, group_size, permutation);
1630 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1631 tmp_stmts.create (group_size);
1632 tmp_stmts.quick_grow_cleared (group_size);
1634 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1635 tmp_stmts[permutation[i]] = stmt_info;
1637 SLP_TREE_SCALAR_STMTS (node).release ();
1638 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1642 /* Attempt to reorder stmts in a reduction chain so that we don't
1643 require any load permutation. Return true if that was possible,
1644 otherwise return false. */
1646 static bool
1647 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1649 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1650 unsigned int i, j;
1651 unsigned int lidx;
1652 slp_tree node, load;
1654 /* Compare all the permutation sequences to the first one. We know
1655 that at least one load is permuted. */
1656 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1657 if (!node->load_permutation.exists ())
1658 return false;
1659 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1661 if (!load->load_permutation.exists ())
1662 return false;
1663 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1664 if (lidx != node->load_permutation[j])
1665 return false;
1668 /* Check that the loads in the first sequence are different and there
1669 are no gaps between them. */
1670 auto_sbitmap load_index (group_size);
1671 bitmap_clear (load_index);
1672 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1674 if (lidx >= group_size)
1675 return false;
1676 if (bitmap_bit_p (load_index, lidx))
1677 return false;
1679 bitmap_set_bit (load_index, lidx);
1681 for (i = 0; i < group_size; i++)
1682 if (!bitmap_bit_p (load_index, i))
1683 return false;
1685 /* This permutation is valid for reduction. Since the order of the
1686 statements in the nodes is not important unless they are memory
1687 accesses, we can rearrange the statements in all the nodes
1688 according to the order of the loads. */
1689 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1690 node->load_permutation);
1692 /* We are done, no actual permutations need to be generated. */
1693 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1694 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1696 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1697 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1698 /* But we have to keep those permutations that are required because
1699 of handling of gaps. */
1700 if (known_eq (unrolling_factor, 1U)
1701 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1702 && DR_GROUP_GAP (first_stmt_info) == 0))
1703 SLP_TREE_LOAD_PERMUTATION (node).release ();
1704 else
1705 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1706 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1709 return true;
1712 /* Check if the required load permutations in the SLP instance
1713 SLP_INSTN are supported. */
1715 static bool
1716 vect_supported_load_permutation_p (slp_instance slp_instn)
1718 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1719 unsigned int i, j, k, next;
1720 slp_tree node;
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1725 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1726 if (node->load_permutation.exists ())
1727 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1728 dump_printf (MSG_NOTE, "%d ", next);
1729 else
1730 for (k = 0; k < group_size; ++k)
1731 dump_printf (MSG_NOTE, "%d ", k);
1732 dump_printf (MSG_NOTE, "\n");
1735 /* In case of reduction every load permutation is allowed, since the order
1736 of the reduction statements is not important (as opposed to the case of
1737 grouped stores). The only condition we need to check is that all the
1738 load nodes are of the same size and have the same permutation (and then
1739 rearrange all the nodes of the SLP instance according to this
1740 permutation). */
1742 /* Check that all the load nodes are of the same size. */
1743 /* ??? Can't we assert this? */
1744 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1745 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1746 return false;
1748 node = SLP_INSTANCE_TREE (slp_instn);
1749 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1751 /* Reduction (there are no data-refs in the root).
1752 In reduction chain the order of the loads is not important. */
1753 if (!STMT_VINFO_DATA_REF (stmt_info)
1754 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1755 vect_attempt_slp_rearrange_stmts (slp_instn);
1757 /* In basic block vectorization we allow any subchain of an interleaving
1758 chain.
1759 FORNOW: not supported in loop SLP because of realignment compications. */
1760 if (STMT_VINFO_BB_VINFO (stmt_info))
1762 /* Check whether the loads in an instance form a subchain and thus
1763 no permutation is necessary. */
1764 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1766 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1767 continue;
1768 bool subchain_p = true;
1769 stmt_vec_info next_load_info = NULL;
1770 stmt_vec_info load_info;
1771 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1773 if (j != 0
1774 && (next_load_info != load_info
1775 || DR_GROUP_GAP (load_info) != 1))
1777 subchain_p = false;
1778 break;
1780 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1782 if (subchain_p)
1783 SLP_TREE_LOAD_PERMUTATION (node).release ();
1784 else
1786 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1787 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1788 unsigned HOST_WIDE_INT nunits;
1789 unsigned k, maxk = 0;
1790 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1791 if (k > maxk)
1792 maxk = k;
1793 /* In BB vectorization we may not actually use a loaded vector
1794 accessing elements in excess of DR_GROUP_SIZE. */
1795 tree vectype = STMT_VINFO_VECTYPE (group_info);
1796 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1797 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "BB vectorization with gaps at the end of "
1801 "a load is not supported\n");
1802 return false;
1805 /* Verify the permutation can be generated. */
1806 vec<tree> tem;
1807 unsigned n_perms;
1808 if (!vect_transform_slp_perm_load (node, tem, NULL,
1809 1, slp_instn, true, &n_perms))
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1812 vect_location,
1813 "unsupported load permutation\n");
1814 return false;
1818 return true;
1821 /* For loop vectorization verify we can generate the permutation. Be
1822 conservative about the vectorization factor, there are permutations
1823 that will use three vector inputs only starting from a specific factor
1824 and the vectorization factor is not yet final.
1825 ??? The SLP instance unrolling factor might not be the maximum one. */
1826 unsigned n_perms;
1827 poly_uint64 test_vf
1828 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1829 LOOP_VINFO_VECT_FACTOR
1830 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1831 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1832 if (node->load_permutation.exists ()
1833 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1834 slp_instn, true, &n_perms))
1835 return false;
1837 return true;
1841 /* Find the last store in SLP INSTANCE. */
1843 stmt_vec_info
1844 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1846 stmt_vec_info last = NULL;
1847 stmt_vec_info stmt_vinfo;
1849 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1851 if (is_pattern_stmt_p (stmt_vinfo))
1852 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1853 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1856 return last;
1859 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1860 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1861 (also containing the first GROUP1_SIZE stmts, since stores are
1862 consecutive), the second containing the remainder.
1863 Return the first stmt in the second group. */
1865 static stmt_vec_info
1866 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1868 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1869 gcc_assert (group1_size > 0);
1870 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1871 gcc_assert (group2_size > 0);
1872 DR_GROUP_SIZE (first_vinfo) = group1_size;
1874 stmt_vec_info stmt_info = first_vinfo;
1875 for (unsigned i = group1_size; i > 1; i--)
1877 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1878 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1880 /* STMT is now the last element of the first group. */
1881 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1882 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1884 DR_GROUP_SIZE (group2) = group2_size;
1885 for (stmt_info = group2; stmt_info;
1886 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1888 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1889 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1892 /* For the second group, the DR_GROUP_GAP is that before the original group,
1893 plus skipping over the first vector. */
1894 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1896 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1897 DR_GROUP_GAP (first_vinfo) += group2_size;
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1901 group1_size, group2_size);
1903 return group2;
1906 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1907 statements and a vector of NUNITS elements. */
1909 static poly_uint64
1910 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1912 return exact_div (common_multiple (nunits, group_size), group_size);
1915 /* Analyze an SLP instance starting from a group of grouped stores. Call
1916 vect_build_slp_tree to build a tree of packed stmts if possible.
1917 Return FALSE if it's impossible to SLP any stmt in the loop. */
1919 static bool
1920 vect_analyze_slp_instance (vec_info *vinfo,
1921 stmt_vec_info stmt_info, unsigned max_tree_size)
1923 slp_instance new_instance;
1924 slp_tree node;
1925 unsigned int group_size;
1926 tree vectype, scalar_type = NULL_TREE;
1927 unsigned int i;
1928 vec<slp_tree> loads;
1929 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1930 vec<stmt_vec_info> scalar_stmts;
1932 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1934 scalar_type = TREE_TYPE (DR_REF (dr));
1935 vectype = get_vectype_for_scalar_type (scalar_type);
1936 group_size = DR_GROUP_SIZE (stmt_info);
1938 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1940 gcc_assert (is_a <loop_vec_info> (vinfo));
1941 vectype = STMT_VINFO_VECTYPE (stmt_info);
1942 group_size = REDUC_GROUP_SIZE (stmt_info);
1944 else
1946 gcc_assert (is_a <loop_vec_info> (vinfo));
1947 vectype = STMT_VINFO_VECTYPE (stmt_info);
1948 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1951 if (!vectype)
1953 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 "Build SLP failed: unsupported data-type ");
1957 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1958 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1961 return false;
1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1965 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1966 scalar_stmts.create (group_size);
1967 stmt_vec_info next_info = stmt_info;
1968 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1970 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1971 while (next_info)
1973 if (STMT_VINFO_IN_PATTERN_P (next_info)
1974 && STMT_VINFO_RELATED_STMT (next_info))
1975 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
1976 else
1977 scalar_stmts.safe_push (next_info);
1978 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1981 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1983 /* Collect the reduction stmts and store them in
1984 SLP_TREE_SCALAR_STMTS. */
1985 while (next_info)
1987 if (STMT_VINFO_IN_PATTERN_P (next_info)
1988 && STMT_VINFO_RELATED_STMT (next_info))
1989 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
1990 else
1991 scalar_stmts.safe_push (next_info);
1992 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1994 /* Mark the first element of the reduction chain as reduction to properly
1995 transform the node. In the reduction analysis phase only the last
1996 element of the chain is marked as reduction. */
1997 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1999 else
2001 /* Collect reduction statements. */
2002 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2003 for (i = 0; reductions.iterate (i, &next_info); i++)
2004 scalar_stmts.safe_push (next_info);
2007 loads.create (group_size);
2009 /* Build the tree for the SLP instance. */
2010 bool *matches = XALLOCAVEC (bool, group_size);
2011 unsigned npermutes = 0;
2012 bst_fail = new scalar_stmts_set_t ();
2013 poly_uint64 max_nunits = nunits;
2014 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2015 &max_nunits, &loads, matches, &npermutes,
2016 NULL, max_tree_size);
2017 delete bst_fail;
2018 if (node != NULL)
2020 /* Calculate the unrolling factor based on the smallest type. */
2021 poly_uint64 unrolling_factor
2022 = calculate_unrolling_factor (max_nunits, group_size);
2024 if (maybe_ne (unrolling_factor, 1U)
2025 && is_a <bb_vec_info> (vinfo))
2027 unsigned HOST_WIDE_INT const_max_nunits;
2028 if (!max_nunits.is_constant (&const_max_nunits)
2029 || const_max_nunits > group_size)
2031 if (dump_enabled_p ())
2032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2033 "Build SLP failed: store group "
2034 "size not a multiple of the vector size "
2035 "in basic block SLP\n");
2036 vect_free_slp_tree (node, false);
2037 loads.release ();
2038 return false;
2040 /* Fatal mismatch. */
2041 matches[group_size / const_max_nunits * const_max_nunits] = false;
2042 vect_free_slp_tree (node, false);
2043 loads.release ();
2045 else
2047 /* Create a new SLP instance. */
2048 new_instance = XNEW (struct _slp_instance);
2049 SLP_INSTANCE_TREE (new_instance) = node;
2050 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2051 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2052 SLP_INSTANCE_LOADS (new_instance) = loads;
2054 /* Compute the load permutation. */
2055 slp_tree load_node;
2056 bool loads_permuted = false;
2057 FOR_EACH_VEC_ELT (loads, i, load_node)
2059 vec<unsigned> load_permutation;
2060 int j;
2061 stmt_vec_info load_info;
2062 bool this_load_permuted = false;
2063 load_permutation.create (group_size);
2064 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2065 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2066 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2068 int load_place = vect_get_place_in_interleaving_chain
2069 (load_info, first_stmt_info);
2070 gcc_assert (load_place != -1);
2071 if (load_place != j)
2072 this_load_permuted = true;
2073 load_permutation.safe_push (load_place);
2075 if (!this_load_permuted
2076 /* The load requires permutation when unrolling exposes
2077 a gap either because the group is larger than the SLP
2078 group-size or because there is a gap between the groups. */
2079 && (known_eq (unrolling_factor, 1U)
2080 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2081 && DR_GROUP_GAP (first_stmt_info) == 0)))
2083 load_permutation.release ();
2084 continue;
2086 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2087 loads_permuted = true;
2090 if (loads_permuted)
2092 if (!vect_supported_load_permutation_p (new_instance))
2094 if (dump_enabled_p ())
2096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2097 "Build SLP failed: unsupported load "
2098 "permutation ");
2099 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2100 TDF_SLIM, stmt_info->stmt, 0);
2102 vect_free_slp_instance (new_instance, false);
2103 return false;
2107 /* If the loads and stores can be handled with load/store-lan
2108 instructions do not generate this SLP instance. */
2109 if (is_a <loop_vec_info> (vinfo)
2110 && loads_permuted
2111 && dr && vect_store_lanes_supported (vectype, group_size, false))
2113 slp_tree load_node;
2114 FOR_EACH_VEC_ELT (loads, i, load_node)
2116 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2117 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2118 /* Use SLP for strided accesses (or if we can't load-lanes). */
2119 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2120 || ! vect_load_lanes_supported
2121 (STMT_VINFO_VECTYPE (stmt_vinfo),
2122 DR_GROUP_SIZE (stmt_vinfo), false))
2123 break;
2125 if (i == loads.length ())
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129 "Built SLP cancelled: can use "
2130 "load/store-lanes\n");
2131 vect_free_slp_instance (new_instance, false);
2132 return false;
2136 vinfo->slp_instances.safe_push (new_instance);
2138 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_NOTE, vect_location,
2141 "Final SLP tree for instance:\n");
2142 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2145 return true;
2148 else
2150 /* Failed to SLP. */
2151 /* Free the allocated memory. */
2152 scalar_stmts.release ();
2153 loads.release ();
2156 /* For basic block SLP, try to break the group up into multiples of the
2157 vector size. */
2158 unsigned HOST_WIDE_INT const_nunits;
2159 if (is_a <bb_vec_info> (vinfo)
2160 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2161 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2162 && nunits.is_constant (&const_nunits))
2164 /* We consider breaking the group only on VF boundaries from the existing
2165 start. */
2166 for (i = 0; i < group_size; i++)
2167 if (!matches[i]) break;
2169 if (i >= const_nunits && i < group_size)
2171 /* Split into two groups at the first vector boundary before i. */
2172 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2173 unsigned group1_size = i & ~(const_nunits - 1);
2175 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2176 group1_size);
2177 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2178 max_tree_size);
2179 /* If the first non-match was in the middle of a vector,
2180 skip the rest of that vector. */
2181 if (group1_size < i)
2183 i = group1_size + const_nunits;
2184 if (i < group_size)
2185 rest = vect_split_slp_store_group (rest, const_nunits);
2187 if (i < group_size)
2188 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2189 return res;
2191 /* Even though the first vector did not all match, we might be able to SLP
2192 (some) of the remainder. FORNOW ignore this possibility. */
2195 return false;
2199 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2200 trees of packed scalar stmts if SLP is possible. */
2202 bool
2203 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2205 unsigned int i;
2206 stmt_vec_info first_element;
2208 DUMP_VECT_SCOPE ("vect_analyze_slp");
2210 /* Find SLP sequences starting from groups of grouped stores. */
2211 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2212 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2214 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2216 if (loop_vinfo->reduction_chains.length () > 0)
2218 /* Find SLP sequences starting from reduction chains. */
2219 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2220 if (! vect_analyze_slp_instance (vinfo, first_element,
2221 max_tree_size))
2223 /* Dissolve reduction chain group. */
2224 stmt_vec_info vinfo = first_element;
2225 while (vinfo)
2227 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2228 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2229 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2230 vinfo = next;
2232 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2236 /* Find SLP sequences starting from groups of reductions. */
2237 if (loop_vinfo->reductions.length () > 1)
2238 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2239 max_tree_size);
2242 return true;
2246 /* For each possible SLP instance decide whether to SLP it and calculate overall
2247 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2248 least one instance. */
2250 bool
2251 vect_make_slp_decision (loop_vec_info loop_vinfo)
2253 unsigned int i;
2254 poly_uint64 unrolling_factor = 1;
2255 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2256 slp_instance instance;
2257 int decided_to_slp = 0;
2259 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2261 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2263 /* FORNOW: SLP if you can. */
2264 /* All unroll factors have the form current_vector_size * X for some
2265 rational X, so they must have a common multiple. */
2266 unrolling_factor
2267 = force_common_multiple (unrolling_factor,
2268 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2270 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2271 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2272 loop-based vectorization. Such stmts will be marked as HYBRID. */
2273 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2274 decided_to_slp++;
2277 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2279 if (decided_to_slp && dump_enabled_p ())
2281 dump_printf_loc (MSG_NOTE, vect_location,
2282 "Decided to SLP %d instances. Unrolling factor ",
2283 decided_to_slp);
2284 dump_dec (MSG_NOTE, unrolling_factor);
2285 dump_printf (MSG_NOTE, "\n");
2288 return (decided_to_slp > 0);
2292 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2293 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2295 static void
2296 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2298 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2299 imm_use_iterator imm_iter;
2300 gimple *use_stmt;
2301 stmt_vec_info use_vinfo;
2302 slp_tree child;
2303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2304 int j;
2306 /* Propagate hybrid down the SLP tree. */
2307 if (stype == hybrid)
2309 else if (HYBRID_SLP_STMT (stmt_vinfo))
2310 stype = hybrid;
2311 else
2313 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2314 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2315 /* If we get a pattern stmt here we have to use the LHS of the
2316 original stmt for immediate uses. */
2317 gimple *stmt = stmt_vinfo->stmt;
2318 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2319 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2320 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo)->stmt;
2321 tree def;
2322 if (gimple_code (stmt) == GIMPLE_PHI)
2323 def = gimple_phi_result (stmt);
2324 else
2325 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2326 if (def)
2327 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2329 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2330 if (!use_vinfo)
2331 continue;
2332 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2333 && STMT_VINFO_RELATED_STMT (use_vinfo))
2334 use_vinfo = STMT_VINFO_RELATED_STMT (use_vinfo);
2335 if (!STMT_SLP_TYPE (use_vinfo)
2336 && (STMT_VINFO_RELEVANT (use_vinfo)
2337 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2338 && !(gimple_code (use_stmt) == GIMPLE_PHI
2339 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2341 if (dump_enabled_p ())
2343 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2344 "def in non-SLP stmt: ");
2345 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2347 stype = hybrid;
2352 if (stype == hybrid
2353 && !HYBRID_SLP_STMT (stmt_vinfo))
2355 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2358 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
2360 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2363 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2364 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2365 vect_detect_hybrid_slp_stmts (child, i, stype);
2368 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2370 static tree
2371 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2373 walk_stmt_info *wi = (walk_stmt_info *)data;
2374 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2376 if (wi->is_lhs)
2377 return NULL_TREE;
2379 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2380 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2382 if (dump_enabled_p ())
2384 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2385 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
2387 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2390 return NULL_TREE;
2393 static tree
2394 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2395 walk_stmt_info *wi)
2397 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2398 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2399 /* If the stmt is in a SLP instance then this isn't a reason
2400 to mark use definitions in other SLP instances as hybrid. */
2401 if (! STMT_SLP_TYPE (use_vinfo)
2402 && (STMT_VINFO_RELEVANT (use_vinfo)
2403 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2404 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2405 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2407 else
2408 *handled = true;
2409 return NULL_TREE;
2412 /* Find stmts that must be both vectorized and SLPed. */
2414 void
2415 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2417 unsigned int i;
2418 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2419 slp_instance instance;
2421 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2423 /* First walk all pattern stmt in the loop and mark defs of uses as
2424 hybrid because immediate uses in them are not recorded. */
2425 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2427 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2428 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2429 gsi_next (&gsi))
2431 gimple *stmt = gsi_stmt (gsi);
2432 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2433 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2435 walk_stmt_info wi;
2436 memset (&wi, 0, sizeof (wi));
2437 wi.info = loop_vinfo;
2438 gimple_stmt_iterator gsi2
2439 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2440 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2441 vect_detect_hybrid_slp_1, &wi);
2442 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2443 vect_detect_hybrid_slp_2,
2444 vect_detect_hybrid_slp_1, &wi);
2449 /* Then walk the SLP instance trees marking stmts with uses in
2450 non-SLP stmts as hybrid, also propagating hybrid down the
2451 SLP tree, collecting the above info on-the-fly. */
2452 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2454 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2455 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2456 i, pure_slp);
2461 /* Initialize a bb_vec_info struct for the statements between
2462 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2464 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2465 gimple_stmt_iterator region_end_in,
2466 vec_info_shared *shared)
2467 : vec_info (vec_info::bb, init_cost (NULL), shared),
2468 bb (gsi_bb (region_begin_in)),
2469 region_begin (region_begin_in),
2470 region_end (region_end_in)
2472 gimple_stmt_iterator gsi;
2474 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2475 gsi_next (&gsi))
2477 gimple *stmt = gsi_stmt (gsi);
2478 gimple_set_uid (stmt, 0);
2479 add_stmt (stmt);
2482 bb->aux = this;
2486 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2487 stmts in the basic block. */
2489 _bb_vec_info::~_bb_vec_info ()
2491 for (gimple_stmt_iterator si = region_begin;
2492 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2493 /* Reset region marker. */
2494 gimple_set_uid (gsi_stmt (si), -1);
2496 bb->aux = NULL;
2499 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2500 given then that child nodes have already been processed, and that
2501 their def types currently match their SLP node's def type. */
2503 static bool
2504 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2505 slp_instance node_instance,
2506 stmt_vector_for_cost *cost_vec)
2508 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2509 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2511 /* For BB vectorization vector types are assigned here.
2512 Memory accesses already got their vector type assigned
2513 in vect_analyze_data_refs. */
2514 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2515 if (bb_vinfo
2516 && ! STMT_VINFO_DATA_REF (stmt_info))
2518 tree vectype, nunits_vectype;
2519 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2520 &nunits_vectype))
2521 /* We checked this when building the node. */
2522 gcc_unreachable ();
2523 if (vectype == boolean_type_node)
2525 vectype = vect_get_mask_type_for_stmt (stmt_info);
2526 if (!vectype)
2527 /* vect_get_mask_type_for_stmt has already explained the
2528 failure. */
2529 return false;
2532 stmt_vec_info sstmt_info;
2533 unsigned int i;
2534 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2535 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2538 /* Calculate the number of vector statements to be created for the
2539 scalar stmts in this node. For SLP reductions it is equal to the
2540 number of vector statements in the children (which has already been
2541 calculated by the recursive call). Otherwise it is the number of
2542 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2543 VF divided by the number of elements in a vector. */
2544 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2545 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2546 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2547 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2548 else
2550 poly_uint64 vf;
2551 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2552 vf = loop_vinfo->vectorization_factor;
2553 else
2554 vf = 1;
2555 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2557 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2558 = vect_get_num_vectors (vf * group_size, vectype);
2561 bool dummy;
2562 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2565 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2566 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2568 Return true if the operations are supported. */
2570 static bool
2571 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2572 slp_instance node_instance,
2573 scalar_stmts_to_slp_tree_map_t *visited,
2574 scalar_stmts_to_slp_tree_map_t *lvisited,
2575 stmt_vector_for_cost *cost_vec)
2577 int i, j;
2578 slp_tree child;
2580 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2581 return true;
2583 /* If we already analyzed the exact same set of scalar stmts we're done.
2584 We share the generated vector stmts for those. */
2585 slp_tree *leader;
2586 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2587 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2589 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2590 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2591 return true;
2594 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2595 doesn't result in any issue since we throw away the lvisited set
2596 when we fail. */
2597 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2599 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2600 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2601 visited, lvisited, cost_vec))
2602 return false;
2604 /* Push SLP node def-type to stmt operands. */
2605 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2606 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2607 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2608 = SLP_TREE_DEF_TYPE (child);
2609 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2610 cost_vec);
2611 /* Restore def-types. */
2612 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2613 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2614 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2615 = vect_internal_def;
2616 if (! res)
2617 return false;
2619 return true;
2623 /* Analyze statements in SLP instances of VINFO. Return true if the
2624 operations are supported. */
2626 bool
2627 vect_slp_analyze_operations (vec_info *vinfo)
2629 slp_instance instance;
2630 int i;
2632 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2634 scalar_stmts_to_slp_tree_map_t *visited
2635 = new scalar_stmts_to_slp_tree_map_t ();
2636 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2638 scalar_stmts_to_slp_tree_map_t lvisited;
2639 stmt_vector_for_cost cost_vec;
2640 cost_vec.create (2);
2641 if (!vect_slp_analyze_node_operations (vinfo,
2642 SLP_INSTANCE_TREE (instance),
2643 instance, visited, &lvisited,
2644 &cost_vec))
2646 slp_tree node = SLP_INSTANCE_TREE (instance);
2647 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2648 dump_printf_loc (MSG_NOTE, vect_location,
2649 "removing SLP instance operations starting from: ");
2650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2651 vect_free_slp_instance (instance, false);
2652 vinfo->slp_instances.ordered_remove (i);
2653 cost_vec.release ();
2655 else
2657 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2658 x != lvisited.end(); ++x)
2659 visited->put ((*x).first.copy (), (*x).second);
2660 i++;
2662 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2663 cost_vec.release ();
2666 delete visited;
2668 return !vinfo->slp_instances.is_empty ();
2672 /* Compute the scalar cost of the SLP node NODE and its children
2673 and return it. Do not account defs that are marked in LIFE and
2674 update LIFE according to uses of NODE. */
2676 static void
2677 vect_bb_slp_scalar_cost (basic_block bb,
2678 slp_tree node, vec<bool, va_heap> *life,
2679 stmt_vector_for_cost *cost_vec)
2681 unsigned i;
2682 stmt_vec_info stmt_info;
2683 slp_tree child;
2685 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2687 gimple *stmt = stmt_info->stmt;
2688 vec_info *vinfo = stmt_info->vinfo;
2689 ssa_op_iter op_iter;
2690 def_operand_p def_p;
2692 if ((*life)[i])
2693 continue;
2695 /* If there is a non-vectorized use of the defs then the scalar
2696 stmt is kept live in which case we do not account it or any
2697 required defs in the SLP children in the scalar cost. This
2698 way we make the vectorization more costly when compared to
2699 the scalar cost. */
2700 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2702 imm_use_iterator use_iter;
2703 gimple *use_stmt;
2704 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2705 if (!is_gimple_debug (use_stmt))
2707 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2708 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2710 (*life)[i] = true;
2711 BREAK_FROM_IMM_USE_STMT (use_iter);
2715 if ((*life)[i])
2716 continue;
2718 /* Count scalar stmts only once. */
2719 if (gimple_visited_p (stmt))
2720 continue;
2721 gimple_set_visited (stmt, true);
2723 vect_cost_for_stmt kind;
2724 if (STMT_VINFO_DATA_REF (stmt_info))
2726 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2727 kind = scalar_load;
2728 else
2729 kind = scalar_store;
2731 else
2732 kind = scalar_stmt;
2733 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2736 auto_vec<bool, 20> subtree_life;
2737 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2739 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2741 /* Do not directly pass LIFE to the recursive call, copy it to
2742 confine changes in the callee to the current child/subtree. */
2743 subtree_life.safe_splice (*life);
2744 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2745 subtree_life.truncate (0);
2750 /* Check if vectorization of the basic block is profitable. */
2752 static bool
2753 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2755 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2756 slp_instance instance;
2757 int i;
2758 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2759 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2761 /* Calculate scalar cost. */
2762 stmt_vector_for_cost scalar_costs;
2763 scalar_costs.create (0);
2764 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2766 auto_vec<bool, 20> life;
2767 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2768 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2769 SLP_INSTANCE_TREE (instance),
2770 &life, &scalar_costs);
2772 void *target_cost_data = init_cost (NULL);
2773 add_stmt_costs (target_cost_data, &scalar_costs);
2774 scalar_costs.release ();
2775 unsigned dummy;
2776 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2777 destroy_cost_data (target_cost_data);
2779 /* Unset visited flag. */
2780 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2781 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2782 gimple_set_visited (gsi_stmt (gsi), false);
2784 /* Complete the target-specific cost calculation. */
2785 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2786 &vec_inside_cost, &vec_epilogue_cost);
2788 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2790 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2793 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2794 vec_inside_cost);
2795 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2796 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2797 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2800 /* Vectorization is profitable if its cost is more than the cost of scalar
2801 version. Note that we err on the vector side for equal cost because
2802 the cost estimate is otherwise quite pessimistic (constant uses are
2803 free on the scalar side but cost a load on the vector side for
2804 example). */
2805 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2806 return false;
2808 return true;
2811 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2812 if so and sets fatal to true if failure is independent of
2813 current_vector_size. */
2815 static bb_vec_info
2816 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2817 gimple_stmt_iterator region_end,
2818 vec<data_reference_p> datarefs, int n_stmts,
2819 bool &fatal, vec_info_shared *shared)
2821 bb_vec_info bb_vinfo;
2822 slp_instance instance;
2823 int i;
2824 poly_uint64 min_vf = 2;
2826 /* The first group of checks is independent of the vector size. */
2827 fatal = true;
2829 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2831 if (dump_enabled_p ())
2832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2833 "not vectorized: too many instructions in "
2834 "basic block.\n");
2835 free_data_refs (datarefs);
2836 return NULL;
2839 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2840 if (!bb_vinfo)
2841 return NULL;
2843 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2844 bb_vinfo->shared->save_datarefs ();
2846 /* Analyze the data references. */
2848 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2852 "not vectorized: unhandled data-ref in basic "
2853 "block.\n");
2855 delete bb_vinfo;
2856 return NULL;
2859 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2861 if (dump_enabled_p ())
2862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2863 "not vectorized: not enough data-refs in "
2864 "basic block.\n");
2866 delete bb_vinfo;
2867 return NULL;
2870 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2872 if (dump_enabled_p ())
2873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2874 "not vectorized: unhandled data access in "
2875 "basic block.\n");
2877 delete bb_vinfo;
2878 return NULL;
2881 /* If there are no grouped stores in the region there is no need
2882 to continue with pattern recog as vect_analyze_slp will fail
2883 anyway. */
2884 if (bb_vinfo->grouped_stores.is_empty ())
2886 if (dump_enabled_p ())
2887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2888 "not vectorized: no grouped stores in "
2889 "basic block.\n");
2891 delete bb_vinfo;
2892 return NULL;
2895 /* While the rest of the analysis below depends on it in some way. */
2896 fatal = false;
2898 vect_pattern_recog (bb_vinfo);
2900 /* Check the SLP opportunities in the basic block, analyze and build SLP
2901 trees. */
2902 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2904 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2907 "Failed to SLP the basic block.\n");
2908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2909 "not vectorized: failed to find SLP opportunities "
2910 "in basic block.\n");
2913 delete bb_vinfo;
2914 return NULL;
2917 vect_record_base_alignments (bb_vinfo);
2919 /* Analyze and verify the alignment of data references and the
2920 dependence in the SLP instances. */
2921 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2923 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2924 || ! vect_slp_analyze_instance_dependence (instance))
2926 slp_tree node = SLP_INSTANCE_TREE (instance);
2927 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2928 dump_printf_loc (MSG_NOTE, vect_location,
2929 "removing SLP instance operations starting from: ");
2930 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2931 vect_free_slp_instance (instance, false);
2932 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2933 continue;
2936 /* Mark all the statements that we want to vectorize as pure SLP and
2937 relevant. */
2938 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2939 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2941 i++;
2943 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2945 delete bb_vinfo;
2946 return NULL;
2949 if (!vect_slp_analyze_operations (bb_vinfo))
2951 if (dump_enabled_p ())
2952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2953 "not vectorized: bad operation in basic block.\n");
2955 delete bb_vinfo;
2956 return NULL;
2959 /* Cost model: check if the vectorization is worthwhile. */
2960 if (!unlimited_cost_model (NULL)
2961 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2963 if (dump_enabled_p ())
2964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2965 "not vectorized: vectorization is not "
2966 "profitable.\n");
2968 delete bb_vinfo;
2969 return NULL;
2972 if (dump_enabled_p ())
2973 dump_printf_loc (MSG_NOTE, vect_location,
2974 "Basic block will be vectorized using SLP\n");
2976 return bb_vinfo;
2980 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2981 true if anything in the basic-block was vectorized. */
2983 bool
2984 vect_slp_bb (basic_block bb)
2986 bb_vec_info bb_vinfo;
2987 gimple_stmt_iterator gsi;
2988 bool any_vectorized = false;
2989 auto_vector_sizes vector_sizes;
2991 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2993 /* Autodetect first vector size we try. */
2994 current_vector_size = 0;
2995 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2996 unsigned int next_size = 0;
2998 gsi = gsi_start_bb (bb);
3000 poly_uint64 autodetected_vector_size = 0;
3001 while (1)
3003 if (gsi_end_p (gsi))
3004 break;
3006 gimple_stmt_iterator region_begin = gsi;
3007 vec<data_reference_p> datarefs = vNULL;
3008 int insns = 0;
3010 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3012 gimple *stmt = gsi_stmt (gsi);
3013 if (is_gimple_debug (stmt))
3014 continue;
3015 insns++;
3017 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3018 vect_location = stmt;
3020 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3021 break;
3024 /* Skip leading unhandled stmts. */
3025 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3027 gsi_next (&gsi);
3028 continue;
3031 gimple_stmt_iterator region_end = gsi;
3033 bool vectorized = false;
3034 bool fatal = false;
3035 vec_info_shared shared;
3036 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3037 datarefs, insns, fatal, &shared);
3038 if (bb_vinfo
3039 && dbg_cnt (vect_slp))
3041 if (dump_enabled_p ())
3042 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3044 bb_vinfo->shared->check_datarefs ();
3045 vect_schedule_slp (bb_vinfo);
3047 unsigned HOST_WIDE_INT bytes;
3048 if (current_vector_size.is_constant (&bytes))
3049 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3050 "basic block part vectorized using "
3051 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3052 "vectors\n", bytes);
3053 else
3054 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3055 "basic block part vectorized using variable "
3056 "length vectors\n");
3058 vectorized = true;
3060 delete bb_vinfo;
3062 any_vectorized |= vectorized;
3064 if (next_size == 0)
3065 autodetected_vector_size = current_vector_size;
3067 if (next_size < vector_sizes.length ()
3068 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3069 next_size += 1;
3071 if (vectorized
3072 || next_size == vector_sizes.length ()
3073 || known_eq (current_vector_size, 0U)
3074 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3075 vector sizes will fail do not bother iterating. */
3076 || fatal)
3078 if (gsi_end_p (region_end))
3079 break;
3081 /* Skip the unhandled stmt. */
3082 gsi_next (&gsi);
3084 /* And reset vector sizes. */
3085 current_vector_size = 0;
3086 next_size = 0;
3088 else
3090 /* Try the next biggest vector size. */
3091 current_vector_size = vector_sizes[next_size++];
3092 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_NOTE, vect_location,
3095 "***** Re-trying analysis with "
3096 "vector size ");
3097 dump_dec (MSG_NOTE, current_vector_size);
3098 dump_printf (MSG_NOTE, "\n");
3101 /* Start over. */
3102 gsi = region_begin;
3106 return any_vectorized;
3110 /* Return 1 if vector type of boolean constant which is OPNUM
3111 operand in statement STMT_VINFO is a boolean vector. */
3113 static bool
3114 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
3116 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3117 tree op, vectype;
3118 enum vect_def_type dt;
3120 /* For comparison and COND_EXPR type is chosen depending
3121 on the other comparison operand. */
3122 if (TREE_CODE_CLASS (code) == tcc_comparison)
3124 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3125 if (opnum)
3126 op = gimple_assign_rhs1 (stmt);
3127 else
3128 op = gimple_assign_rhs2 (stmt);
3130 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3131 gcc_unreachable ();
3133 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3136 if (code == COND_EXPR)
3138 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3139 tree cond = gimple_assign_rhs1 (stmt);
3141 if (TREE_CODE (cond) == SSA_NAME)
3142 op = cond;
3143 else if (opnum)
3144 op = TREE_OPERAND (cond, 1);
3145 else
3146 op = TREE_OPERAND (cond, 0);
3148 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3149 gcc_unreachable ();
3151 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3154 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3157 /* Build a variable-length vector in which the elements in ELTS are repeated
3158 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3159 RESULTS and add any new instructions to SEQ.
3161 The approach we use is:
3163 (1) Find a vector mode VM with integer elements of mode IM.
3165 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3166 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3167 from small vectors to IM.
3169 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3171 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3172 correct byte contents.
3174 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3176 We try to find the largest IM for which this sequence works, in order
3177 to cut down on the number of interleaves. */
3179 void
3180 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3181 unsigned int nresults, vec<tree> &results)
3183 unsigned int nelts = elts.length ();
3184 tree element_type = TREE_TYPE (vector_type);
3186 /* (1) Find a vector mode VM with integer elements of mode IM. */
3187 unsigned int nvectors = 1;
3188 tree new_vector_type;
3189 tree permutes[2];
3190 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3191 &nvectors, &new_vector_type,
3192 permutes))
3193 gcc_unreachable ();
3195 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3196 unsigned int partial_nelts = nelts / nvectors;
3197 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3199 tree_vector_builder partial_elts;
3200 auto_vec<tree, 32> pieces (nvectors * 2);
3201 pieces.quick_grow (nvectors * 2);
3202 for (unsigned int i = 0; i < nvectors; ++i)
3204 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3205 ELTS' has mode IM. */
3206 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3207 for (unsigned int j = 0; j < partial_nelts; ++j)
3208 partial_elts.quick_push (elts[i * partial_nelts + j]);
3209 tree t = gimple_build_vector (seq, &partial_elts);
3210 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3211 TREE_TYPE (new_vector_type), t);
3213 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3214 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3217 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3218 correct byte contents.
3220 We need to repeat the following operation log2(nvectors) times:
3222 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3223 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3225 However, if each input repeats every N elements and the VF is
3226 a multiple of N * 2, the HI result is the same as the LO. */
3227 unsigned int in_start = 0;
3228 unsigned int out_start = nvectors;
3229 unsigned int hi_start = nvectors / 2;
3230 /* A bound on the number of outputs needed to produce NRESULTS results
3231 in the final iteration. */
3232 unsigned int noutputs_bound = nvectors * nresults;
3233 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3235 noutputs_bound /= 2;
3236 unsigned int limit = MIN (noutputs_bound, nvectors);
3237 for (unsigned int i = 0; i < limit; ++i)
3239 if ((i & 1) != 0
3240 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3241 2 * in_repeat))
3243 pieces[out_start + i] = pieces[out_start + i - 1];
3244 continue;
3247 tree output = make_ssa_name (new_vector_type);
3248 tree input1 = pieces[in_start + (i / 2)];
3249 tree input2 = pieces[in_start + (i / 2) + hi_start];
3250 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3251 input1, input2,
3252 permutes[i & 1]);
3253 gimple_seq_add_stmt (seq, stmt);
3254 pieces[out_start + i] = output;
3256 std::swap (in_start, out_start);
3259 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3260 results.reserve (nresults);
3261 for (unsigned int i = 0; i < nresults; ++i)
3262 if (i < nvectors)
3263 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3264 pieces[in_start + i]));
3265 else
3266 results.quick_push (results[i - nvectors]);
3270 /* For constant and loop invariant defs of SLP_NODE this function returns
3271 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3272 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3273 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3274 REDUC_INDEX is the index of the reduction operand in the statements, unless
3275 it is -1. */
3277 static void
3278 vect_get_constant_vectors (tree op, slp_tree slp_node,
3279 vec<tree> *vec_oprnds,
3280 unsigned int op_num, unsigned int number_of_vectors)
3282 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3283 stmt_vec_info stmt_vinfo = stmts[0];
3284 gimple *stmt = stmt_vinfo->stmt;
3285 unsigned HOST_WIDE_INT nunits;
3286 tree vec_cst;
3287 unsigned j, number_of_places_left_in_vector;
3288 tree vector_type;
3289 tree vop;
3290 int group_size = stmts.length ();
3291 unsigned int vec_num, i;
3292 unsigned number_of_copies = 1;
3293 vec<tree> voprnds;
3294 voprnds.create (number_of_vectors);
3295 bool constant_p, is_store;
3296 tree neutral_op = NULL;
3297 enum tree_code code = gimple_expr_code (stmt);
3298 gimple_seq ctor_seq = NULL;
3299 auto_vec<tree, 16> permute_results;
3301 /* Check if vector type is a boolean vector. */
3302 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3303 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3304 vector_type
3305 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3306 else
3307 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3309 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3311 is_store = true;
3312 op = gimple_assign_rhs1 (stmt);
3314 else
3315 is_store = false;
3317 gcc_assert (op);
3319 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3320 created vectors. It is greater than 1 if unrolling is performed.
3322 For example, we have two scalar operands, s1 and s2 (e.g., group of
3323 strided accesses of size two), while NUNITS is four (i.e., four scalars
3324 of this type can be packed in a vector). The output vector will contain
3325 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3326 will be 2).
3328 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3329 containing the operands.
3331 For example, NUNITS is four as before, and the group size is 8
3332 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3333 {s5, s6, s7, s8}. */
3335 /* When using duplicate_and_interleave, we just need one element for
3336 each scalar statement. */
3337 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3338 nunits = group_size;
3340 number_of_copies = nunits * number_of_vectors / group_size;
3342 number_of_places_left_in_vector = nunits;
3343 constant_p = true;
3344 tree_vector_builder elts (vector_type, nunits, 1);
3345 elts.quick_grow (nunits);
3346 bool place_after_defs = false;
3347 for (j = 0; j < number_of_copies; j++)
3349 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3351 stmt = stmt_vinfo->stmt;
3352 if (is_store)
3353 op = gimple_assign_rhs1 (stmt);
3354 else
3356 switch (code)
3358 case COND_EXPR:
3360 tree cond = gimple_assign_rhs1 (stmt);
3361 if (TREE_CODE (cond) == SSA_NAME)
3362 op = gimple_op (stmt, op_num + 1);
3363 else if (op_num == 0 || op_num == 1)
3364 op = TREE_OPERAND (cond, op_num);
3365 else
3367 if (op_num == 2)
3368 op = gimple_assign_rhs2 (stmt);
3369 else
3370 op = gimple_assign_rhs3 (stmt);
3373 break;
3375 case CALL_EXPR:
3376 op = gimple_call_arg (stmt, op_num);
3377 break;
3379 case LSHIFT_EXPR:
3380 case RSHIFT_EXPR:
3381 case LROTATE_EXPR:
3382 case RROTATE_EXPR:
3383 op = gimple_op (stmt, op_num + 1);
3384 /* Unlike the other binary operators, shifts/rotates have
3385 the shift count being int, instead of the same type as
3386 the lhs, so make sure the scalar is the right type if
3387 we are dealing with vectors of
3388 long long/long/short/char. */
3389 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3390 op = fold_convert (TREE_TYPE (vector_type), op);
3391 break;
3393 default:
3394 op = gimple_op (stmt, op_num + 1);
3395 break;
3399 /* Create 'vect_ = {op0,op1,...,opn}'. */
3400 number_of_places_left_in_vector--;
3401 tree orig_op = op;
3402 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3404 if (CONSTANT_CLASS_P (op))
3406 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3408 /* Can't use VIEW_CONVERT_EXPR for booleans because
3409 of possibly different sizes of scalar value and
3410 vector element. */
3411 if (integer_zerop (op))
3412 op = build_int_cst (TREE_TYPE (vector_type), 0);
3413 else if (integer_onep (op))
3414 op = build_all_ones_cst (TREE_TYPE (vector_type));
3415 else
3416 gcc_unreachable ();
3418 else
3419 op = fold_unary (VIEW_CONVERT_EXPR,
3420 TREE_TYPE (vector_type), op);
3421 gcc_assert (op && CONSTANT_CLASS_P (op));
3423 else
3425 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3426 gimple *init_stmt;
3427 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3429 tree true_val
3430 = build_all_ones_cst (TREE_TYPE (vector_type));
3431 tree false_val
3432 = build_zero_cst (TREE_TYPE (vector_type));
3433 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3434 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3435 op, true_val,
3436 false_val);
3438 else
3440 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3441 op);
3442 init_stmt
3443 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3444 op);
3446 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3447 op = new_temp;
3450 elts[number_of_places_left_in_vector] = op;
3451 if (!CONSTANT_CLASS_P (op))
3452 constant_p = false;
3453 if (TREE_CODE (orig_op) == SSA_NAME
3454 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3455 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3456 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3457 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3458 place_after_defs = true;
3460 if (number_of_places_left_in_vector == 0)
3462 if (constant_p
3463 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3464 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3465 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3466 else
3468 if (vec_oprnds->is_empty ())
3469 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3470 number_of_vectors,
3471 permute_results);
3472 vec_cst = permute_results[number_of_vectors - j - 1];
3474 tree init;
3475 gimple_stmt_iterator gsi;
3476 if (place_after_defs)
3478 stmt_vec_info last_stmt_info
3479 = vect_find_last_scalar_stmt_in_slp (slp_node);
3480 gsi = gsi_for_stmt (last_stmt_info->stmt);
3481 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3482 &gsi);
3484 else
3485 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3486 NULL);
3487 if (ctor_seq != NULL)
3489 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3490 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3491 ctor_seq = NULL;
3493 voprnds.quick_push (init);
3494 place_after_defs = false;
3495 number_of_places_left_in_vector = nunits;
3496 constant_p = true;
3497 elts.new_vector (vector_type, nunits, 1);
3498 elts.quick_grow (nunits);
3503 /* Since the vectors are created in the reverse order, we should invert
3504 them. */
3505 vec_num = voprnds.length ();
3506 for (j = vec_num; j != 0; j--)
3508 vop = voprnds[j - 1];
3509 vec_oprnds->quick_push (vop);
3512 voprnds.release ();
3514 /* In case that VF is greater than the unrolling factor needed for the SLP
3515 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3516 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3517 to replicate the vectors. */
3518 while (number_of_vectors > vec_oprnds->length ())
3520 tree neutral_vec = NULL;
3522 if (neutral_op)
3524 if (!neutral_vec)
3525 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3527 vec_oprnds->quick_push (neutral_vec);
3529 else
3531 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3532 vec_oprnds->quick_push (vop);
3538 /* Get vectorized definitions from SLP_NODE that contains corresponding
3539 vectorized def-stmts. */
3541 static void
3542 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3544 tree vec_oprnd;
3545 stmt_vec_info vec_def_stmt_info;
3546 unsigned int i;
3548 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3550 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3552 gcc_assert (vec_def_stmt_info);
3553 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3554 vec_oprnd = gimple_phi_result (vec_def_phi);
3555 else
3556 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3557 vec_oprnds->quick_push (vec_oprnd);
3562 /* Get vectorized definitions for SLP_NODE.
3563 If the scalar definitions are loop invariants or constants, collect them and
3564 call vect_get_constant_vectors() to create vector stmts.
3565 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3566 must be stored in the corresponding child of SLP_NODE, and we call
3567 vect_get_slp_vect_defs () to retrieve them. */
3569 void
3570 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3571 vec<vec<tree> > *vec_oprnds)
3573 int number_of_vects = 0, i;
3574 unsigned int child_index = 0;
3575 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3576 slp_tree child = NULL;
3577 vec<tree> vec_defs;
3578 tree oprnd;
3579 bool vectorized_defs;
3581 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3582 FOR_EACH_VEC_ELT (ops, i, oprnd)
3584 /* For each operand we check if it has vectorized definitions in a child
3585 node or we need to create them (for invariants and constants). We
3586 check if the LHS of the first stmt of the next child matches OPRND.
3587 If it does, we found the correct child. Otherwise, we call
3588 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3589 to check this child node for the next operand. */
3590 vectorized_defs = false;
3591 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3593 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3595 /* We have to check both pattern and original def, if available. */
3596 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3598 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3599 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3600 tree first_def_op;
3602 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3603 first_def_op = gimple_phi_result (first_def);
3604 else
3605 first_def_op = gimple_get_lhs (first_def_info->stmt);
3606 if (operand_equal_p (oprnd, first_def_op, 0)
3607 || (related
3608 && operand_equal_p (oprnd,
3609 gimple_get_lhs (related->stmt), 0)))
3611 /* The number of vector defs is determined by the number of
3612 vector statements in the node from which we get those
3613 statements. */
3614 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3615 vectorized_defs = true;
3616 child_index++;
3619 else
3620 child_index++;
3623 if (!vectorized_defs)
3625 if (i == 0)
3627 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3628 /* Number of vector stmts was calculated according to LHS in
3629 vect_schedule_slp_instance (), fix it by replacing LHS with
3630 RHS, if necessary. See vect_get_smallest_scalar_type () for
3631 details. */
3632 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3633 &rhs_size_unit);
3634 if (rhs_size_unit != lhs_size_unit)
3636 number_of_vects *= rhs_size_unit;
3637 number_of_vects /= lhs_size_unit;
3642 /* Allocate memory for vectorized defs. */
3643 vec_defs = vNULL;
3644 vec_defs.create (number_of_vects);
3646 /* For reduction defs we call vect_get_constant_vectors (), since we are
3647 looking for initial loop invariant values. */
3648 if (vectorized_defs)
3649 /* The defs are already vectorized. */
3650 vect_get_slp_vect_defs (child, &vec_defs);
3651 else
3652 /* Build vectors from scalar defs. */
3653 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3654 number_of_vects);
3656 vec_oprnds->quick_push (vec_defs);
3660 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3661 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3662 permute statements for the SLP node NODE of the SLP instance
3663 SLP_NODE_INSTANCE. */
3665 bool
3666 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3667 gimple_stmt_iterator *gsi, poly_uint64 vf,
3668 slp_instance slp_node_instance, bool analyze_only,
3669 unsigned *n_perms)
3671 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3672 vec_info *vinfo = stmt_info->vinfo;
3673 tree mask_element_type = NULL_TREE, mask_type;
3674 int vec_index = 0;
3675 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3676 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3677 unsigned int mask_element;
3678 machine_mode mode;
3679 unsigned HOST_WIDE_INT nunits, const_vf;
3681 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3682 return false;
3684 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3686 mode = TYPE_MODE (vectype);
3688 /* At the moment, all permutations are represented using per-element
3689 indices, so we can't cope with variable vector lengths or
3690 vectorization factors. */
3691 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3692 || !vf.is_constant (&const_vf))
3693 return false;
3695 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3696 same size as the vector element being permuted. */
3697 mask_element_type = lang_hooks.types.type_for_mode
3698 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3699 mask_type = get_vectype_for_scalar_type (mask_element_type);
3700 vec_perm_builder mask (nunits, nunits, 1);
3701 mask.quick_grow (nunits);
3702 vec_perm_indices indices;
3704 /* Initialize the vect stmts of NODE to properly insert the generated
3705 stmts later. */
3706 if (! analyze_only)
3707 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3708 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3709 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3711 /* Generate permutation masks for every NODE. Number of masks for each NODE
3712 is equal to GROUP_SIZE.
3713 E.g., we have a group of three nodes with three loads from the same
3714 location in each node, and the vector size is 4. I.e., we have a
3715 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3716 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3717 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3720 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3721 The last mask is illegal since we assume two operands for permute
3722 operation, and the mask element values can't be outside that range.
3723 Hence, the last mask must be converted into {2,5,5,5}.
3724 For the first two permutations we need the first and the second input
3725 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3726 we need the second and the third vectors: {b1,c1,a2,b2} and
3727 {c2,a3,b3,c3}. */
3729 int vect_stmts_counter = 0;
3730 unsigned int index = 0;
3731 int first_vec_index = -1;
3732 int second_vec_index = -1;
3733 bool noop_p = true;
3734 *n_perms = 0;
3736 for (unsigned int j = 0; j < const_vf; j++)
3738 for (int k = 0; k < group_size; k++)
3740 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3741 + j * DR_GROUP_SIZE (stmt_info));
3742 vec_index = i / nunits;
3743 mask_element = i % nunits;
3744 if (vec_index == first_vec_index
3745 || first_vec_index == -1)
3747 first_vec_index = vec_index;
3749 else if (vec_index == second_vec_index
3750 || second_vec_index == -1)
3752 second_vec_index = vec_index;
3753 mask_element += nunits;
3755 else
3757 if (dump_enabled_p ())
3759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3760 "permutation requires at "
3761 "least three vectors ");
3762 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3763 stmt_info->stmt, 0);
3765 gcc_assert (analyze_only);
3766 return false;
3769 gcc_assert (mask_element < 2 * nunits);
3770 if (mask_element != index)
3771 noop_p = false;
3772 mask[index++] = mask_element;
3774 if (index == nunits && !noop_p)
3776 indices.new_vector (mask, 2, nunits);
3777 if (!can_vec_perm_const_p (mode, indices))
3779 if (dump_enabled_p ())
3781 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3782 vect_location,
3783 "unsupported vect permute { ");
3784 for (i = 0; i < nunits; ++i)
3786 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3787 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3789 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3791 gcc_assert (analyze_only);
3792 return false;
3795 ++*n_perms;
3798 if (index == nunits)
3800 if (!analyze_only)
3802 tree mask_vec = NULL_TREE;
3804 if (! noop_p)
3805 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3807 if (second_vec_index == -1)
3808 second_vec_index = first_vec_index;
3810 /* Generate the permute statement if necessary. */
3811 tree first_vec = dr_chain[first_vec_index];
3812 tree second_vec = dr_chain[second_vec_index];
3813 stmt_vec_info perm_stmt_info;
3814 if (! noop_p)
3816 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3817 tree perm_dest
3818 = vect_create_destination_var (gimple_assign_lhs (stmt),
3819 vectype);
3820 perm_dest = make_ssa_name (perm_dest);
3821 gassign *perm_stmt
3822 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3823 first_vec, second_vec,
3824 mask_vec);
3825 perm_stmt_info
3826 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3827 gsi);
3829 else
3830 /* If mask was NULL_TREE generate the requested
3831 identity transform. */
3832 perm_stmt_info = vinfo->lookup_def (first_vec);
3834 /* Store the vector statement in NODE. */
3835 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3836 = perm_stmt_info;
3839 index = 0;
3840 first_vec_index = -1;
3841 second_vec_index = -1;
3842 noop_p = true;
3847 return true;
3850 /* Vectorize SLP instance tree in postorder. */
3852 static bool
3853 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3854 scalar_stmts_to_slp_tree_map_t *bst_map)
3856 bool grouped_store, is_store;
3857 gimple_stmt_iterator si;
3858 stmt_vec_info stmt_info;
3859 unsigned int group_size;
3860 tree vectype;
3861 int i, j;
3862 slp_tree child;
3864 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3865 return false;
3867 /* See if we have already vectorized the same set of stmts and reuse their
3868 vectorized stmts. */
3869 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3871 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3872 return false;
3875 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3876 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3877 vect_schedule_slp_instance (child, instance, bst_map);
3879 /* Push SLP node def-type to stmts. */
3880 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3881 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3883 stmt_vec_info child_stmt_info;
3884 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3885 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3888 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3890 /* VECTYPE is the type of the destination. */
3891 vectype = STMT_VINFO_VECTYPE (stmt_info);
3892 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3893 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3895 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3896 if (!SLP_TREE_VEC_STMTS (node).exists ())
3897 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3899 if (dump_enabled_p ())
3901 dump_printf_loc (MSG_NOTE,vect_location,
3902 "------>vectorizing SLP node starting from: ");
3903 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
3906 /* Vectorized stmts go before the last scalar stmt which is where
3907 all uses are ready. */
3908 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3909 si = gsi_for_stmt (last_stmt_info->stmt);
3911 /* Mark the first element of the reduction chain as reduction to properly
3912 transform the node. In the analysis phase only the last element of the
3913 chain is marked as reduction. */
3914 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3915 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3916 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3918 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3919 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3922 /* Handle two-operation SLP nodes by vectorizing the group with
3923 both operations and then performing a merge. */
3924 if (SLP_TREE_TWO_OPERATORS (node))
3926 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3927 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3928 enum tree_code ocode = ERROR_MARK;
3929 stmt_vec_info ostmt_info;
3930 vec_perm_builder mask (group_size, group_size, 1);
3931 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3933 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3934 if (gimple_assign_rhs_code (ostmt) != code0)
3936 mask.quick_push (1);
3937 ocode = gimple_assign_rhs_code (ostmt);
3939 else
3940 mask.quick_push (0);
3942 if (ocode != ERROR_MARK)
3944 vec<stmt_vec_info> v0;
3945 vec<stmt_vec_info> v1;
3946 unsigned j;
3947 tree tmask = NULL_TREE;
3948 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
3949 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3950 SLP_TREE_VEC_STMTS (node).truncate (0);
3951 gimple_assign_set_rhs_code (stmt, ocode);
3952 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
3953 gimple_assign_set_rhs_code (stmt, code0);
3954 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3955 SLP_TREE_VEC_STMTS (node).truncate (0);
3956 tree meltype = build_nonstandard_integer_type
3957 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3958 tree mvectype = get_same_sized_vectype (meltype, vectype);
3959 unsigned k = 0, l;
3960 for (j = 0; j < v0.length (); ++j)
3962 /* Enforced by vect_build_slp_tree, which rejects variable-length
3963 vectors for SLP_TREE_TWO_OPERATORS. */
3964 unsigned int const_nunits = nunits.to_constant ();
3965 tree_vector_builder melts (mvectype, const_nunits, 1);
3966 for (l = 0; l < const_nunits; ++l)
3968 if (k >= group_size)
3969 k = 0;
3970 tree t = build_int_cst (meltype,
3971 mask[k++] * const_nunits + l);
3972 melts.quick_push (t);
3974 tmask = melts.build ();
3976 /* ??? Not all targets support a VEC_PERM_EXPR with a
3977 constant mask that would translate to a vec_merge RTX
3978 (with their vec_perm_const_ok). We can either not
3979 vectorize in that case or let veclower do its job.
3980 Unfortunately that isn't too great and at least for
3981 plus/minus we'd eventually like to match targets
3982 vector addsub instructions. */
3983 gimple *vstmt;
3984 vstmt = gimple_build_assign (make_ssa_name (vectype),
3985 VEC_PERM_EXPR,
3986 gimple_assign_lhs (v0[j]->stmt),
3987 gimple_assign_lhs (v1[j]->stmt),
3988 tmask);
3989 SLP_TREE_VEC_STMTS (node).quick_push
3990 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
3992 v0.release ();
3993 v1.release ();
3994 return false;
3997 is_store = vect_transform_stmt (stmt_info, &si, &grouped_store, node,
3998 instance);
4000 /* Restore stmt def-types. */
4001 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4002 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4004 stmt_vec_info child_stmt_info;
4005 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4006 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4009 return is_store;
4012 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4013 For loop vectorization this is done in vectorizable_call, but for SLP
4014 it needs to be deferred until end of vect_schedule_slp, because multiple
4015 SLP instances may refer to the same scalar stmt. */
4017 static void
4018 vect_remove_slp_scalar_calls (slp_tree node)
4020 gimple *new_stmt;
4021 gimple_stmt_iterator gsi;
4022 int i;
4023 slp_tree child;
4024 tree lhs;
4025 stmt_vec_info stmt_info;
4027 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4028 return;
4030 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4031 vect_remove_slp_scalar_calls (child);
4033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4035 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4036 if (!stmt || gimple_bb (stmt) == NULL)
4037 continue;
4038 if (is_pattern_stmt_p (stmt_info)
4039 || !PURE_SLP_STMT (stmt_info))
4040 continue;
4041 lhs = gimple_call_lhs (stmt);
4042 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4043 gsi = gsi_for_stmt (stmt);
4044 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4045 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4049 /* Generate vector code for all SLP instances in the loop/basic block. */
4051 bool
4052 vect_schedule_slp (vec_info *vinfo)
4054 vec<slp_instance> slp_instances;
4055 slp_instance instance;
4056 unsigned int i;
4057 bool is_store = false;
4060 scalar_stmts_to_slp_tree_map_t *bst_map
4061 = new scalar_stmts_to_slp_tree_map_t ();
4062 slp_instances = vinfo->slp_instances;
4063 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4065 /* Schedule the tree of INSTANCE. */
4066 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4067 instance, bst_map);
4068 if (dump_enabled_p ())
4069 dump_printf_loc (MSG_NOTE, vect_location,
4070 "vectorizing stmts using SLP.\n");
4072 delete bst_map;
4074 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4076 slp_tree root = SLP_INSTANCE_TREE (instance);
4077 stmt_vec_info store_info;
4078 unsigned int j;
4080 /* Remove scalar call stmts. Do not do this for basic-block
4081 vectorization as not all uses may be vectorized.
4082 ??? Why should this be necessary? DCE should be able to
4083 remove the stmts itself.
4084 ??? For BB vectorization we can as well remove scalar
4085 stmts starting from the SLP tree root if they have no
4086 uses. */
4087 if (is_a <loop_vec_info> (vinfo))
4088 vect_remove_slp_scalar_calls (root);
4090 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4091 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4093 if (!STMT_VINFO_DATA_REF (store_info))
4094 break;
4096 if (is_pattern_stmt_p (store_info))
4097 store_info = STMT_VINFO_RELATED_STMT (store_info);
4098 /* Free the attached stmt_vec_info and remove the stmt. */
4099 vinfo->remove_stmt (store_info);
4103 return is_store;