[19/46] Make vect_dr_stmt return a stmt_vec_info
[official-gcc.git] / gcc / tree-vect-slp.c
blob6f530fe61995ff412caaec47835b140d10fd88e5
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
104 slp_tree node;
105 stmt_vec_info stmt_info = scalar_stmts[0];
106 unsigned int nops;
108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else if (is_a <gphi *> (stmt_info->stmt))
117 nops = 0;
118 else
119 return NULL;
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
134 return node;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec<stmt_vec_info> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
176 return oprnds_info;
180 /* Free operands info. */
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 int i;
186 slp_oprnd_info oprnd_info;
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
194 oprnds_info.release ();
198 /* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
202 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
204 gimple *next_stmt = first_stmt;
205 int result = 0;
207 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
208 return -1;
212 if (next_stmt == stmt)
213 return result;
214 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
215 if (next_stmt)
216 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
218 while (next_stmt);
220 return -1;
223 /* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
227 (if nonnull). */
229 bool
230 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
231 unsigned int *nvectors_out,
232 tree *vector_type_out,
233 tree *permutes)
235 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
236 poly_int64 nelts;
237 unsigned int nvectors = 1;
238 for (;;)
240 scalar_int_mode int_mode;
241 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
242 if (multiple_p (current_vector_size, elt_bytes, &nelts)
243 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
245 tree int_type = build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode), 1);
247 tree vector_type = build_vector_type (int_type, nelts);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
250 vec_perm_builder sel1 (nelts, 2, 3);
251 vec_perm_builder sel2 (nelts, 2, 3);
252 poly_int64 half_nelts = exact_div (nelts, 2);
253 for (unsigned int i = 0; i < 3; ++i)
255 sel1.quick_push (i);
256 sel1.quick_push (i + nelts);
257 sel2.quick_push (half_nelts + i);
258 sel2.quick_push (half_nelts + i + nelts);
260 vec_perm_indices indices1 (sel1, 2, nelts);
261 vec_perm_indices indices2 (sel2, 2, nelts);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
265 if (nvectors_out)
266 *nvectors_out = nvectors;
267 if (vector_type_out)
268 *vector_type_out = vector_type;
269 if (permutes)
271 permutes[0] = vect_gen_perm_mask_checked (vector_type,
272 indices1);
273 permutes[1] = vect_gen_perm_mask_checked (vector_type,
274 indices2);
276 return true;
280 if (!multiple_p (elt_bytes, 2, &elt_bytes))
281 return false;
282 nvectors *= 2;
286 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
294 value.
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
297 ok return 0. */
298 static int
299 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
300 vec<stmt_vec_info> stmts, unsigned stmt_num,
301 vec<slp_oprnd_info> *oprnds_info)
303 stmt_vec_info stmt_info = stmts[stmt_num];
304 tree oprnd;
305 unsigned int i, number_of_oprnds;
306 enum vect_def_type dt = vect_uninitialized_def;
307 bool pattern = false;
308 slp_oprnd_info oprnd_info;
309 int first_op_idx = 1;
310 bool commutative = false;
311 bool first_op_cond = false;
312 bool first = stmt_num == 0;
313 bool second = stmt_num == 1;
315 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
317 number_of_oprnds = gimple_call_num_args (stmt);
318 first_op_idx = 3;
320 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
322 enum tree_code code = gimple_assign_rhs_code (stmt);
323 number_of_oprnds = gimple_num_ops (stmt) - 1;
324 /* Swap can only be done for cond_expr if asked to, otherwise we
325 could result in different comparison code to the first stmt. */
326 if (gimple_assign_rhs_code (stmt) == COND_EXPR
327 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
329 first_op_cond = true;
330 number_of_oprnds++;
332 else
333 commutative = commutative_tree_code (code);
335 else
336 return -1;
338 bool swapped = (*swap != 0);
339 gcc_assert (!swapped || first_op_cond);
340 for (i = 0; i < number_of_oprnds; i++)
342 again:
343 if (first_op_cond)
345 /* Map indicating how operands of cond_expr should be swapped. */
346 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
347 int *map = maps[*swap];
349 if (i < 2)
350 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
351 first_op_idx), map[i]);
352 else
353 oprnd = gimple_op (stmt_info->stmt, map[i]);
355 else
356 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
358 oprnd_info = (*oprnds_info)[i];
360 stmt_vec_info def_stmt_info;
361 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
363 if (dump_enabled_p ())
365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
366 "Build SLP failed: can't analyze def for ");
367 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
368 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
371 return -1;
374 /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
375 the def stmt from the pattern. Check that all the stmts of the
376 node are in the pattern. */
377 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
379 pattern = true;
380 if (!first && !oprnd_info->first_pattern
381 /* Allow different pattern state for the defs of the
382 first stmt in reduction chains. */
383 && (oprnd_info->first_dt != vect_reduction_def
384 || (!second && !oprnd_info->second_pattern)))
386 if (i == 0
387 && !swapped
388 && commutative)
390 swapped = true;
391 goto again;
394 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: some of the stmts"
398 " are in a pattern, and others are not ");
399 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
400 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
403 return 1;
406 dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
408 if (dt == vect_unknown_def_type)
410 if (dump_enabled_p ())
411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
412 "Unsupported pattern.\n");
413 return -1;
416 switch (gimple_code (def_stmt_info->stmt))
418 case GIMPLE_PHI:
419 case GIMPLE_ASSIGN:
420 break;
422 default:
423 if (dump_enabled_p ())
424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
425 "unsupported defining stmt:\n");
426 return -1;
430 if (second)
431 oprnd_info->second_pattern = pattern;
433 if (first)
435 oprnd_info->first_dt = dt;
436 oprnd_info->first_pattern = pattern;
437 oprnd_info->first_op_type = TREE_TYPE (oprnd);
439 else
441 /* Not first stmt of the group, check that the def-stmt/s match
442 the def-stmt/s of the first stmt. Allow different definition
443 types for reduction chains: the first stmt must be a
444 vect_reduction_def (a phi node), and the rest
445 vect_internal_def. */
446 tree type = TREE_TYPE (oprnd);
447 if ((oprnd_info->first_dt != dt
448 && !(oprnd_info->first_dt == vect_reduction_def
449 && dt == vect_internal_def)
450 && !((oprnd_info->first_dt == vect_external_def
451 || oprnd_info->first_dt == vect_constant_def)
452 && (dt == vect_external_def
453 || dt == vect_constant_def)))
454 || !types_compatible_p (oprnd_info->first_op_type, type))
456 /* Try swapping operands if we got a mismatch. */
457 if (i == 0
458 && !swapped
459 && commutative)
461 swapped = true;
462 goto again;
465 if (dump_enabled_p ())
466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
467 "Build SLP failed: different types\n");
469 return 1;
471 if ((dt == vect_constant_def
472 || dt == vect_external_def)
473 && !current_vector_size.is_constant ()
474 && (TREE_CODE (type) == BOOLEAN_TYPE
475 || !can_duplicate_and_interleave_p (stmts.length (),
476 TYPE_MODE (type))))
478 if (dump_enabled_p ())
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
481 "Build SLP failed: invalid type of def "
482 "for variable-length SLP ");
483 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
484 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
486 return -1;
490 /* Check the types of the definitions. */
491 switch (dt)
493 case vect_constant_def:
494 case vect_external_def:
495 break;
497 case vect_reduction_def:
498 case vect_induction_def:
499 case vect_internal_def:
500 oprnd_info->def_stmts.quick_push (def_stmt_info);
501 break;
503 default:
504 /* FORNOW: Not supported. */
505 if (dump_enabled_p ())
507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
508 "Build SLP failed: illegal type of def ");
509 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
510 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
513 return -1;
517 /* Swap operands. */
518 if (swapped)
520 /* If there are already uses of this stmt in a SLP instance then
521 we've committed to the operand order and can't swap it. */
522 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
524 if (dump_enabled_p ())
526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
527 "Build SLP failed: cannot swap operands of "
528 "shared stmt ");
529 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
530 stmt_info->stmt, 0);
532 return -1;
535 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
536 if (first_op_cond)
538 tree cond = gimple_assign_rhs1 (stmt);
539 enum tree_code code = TREE_CODE (cond);
541 /* Swap. */
542 if (*swap == 1)
544 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
545 &TREE_OPERAND (cond, 1));
546 TREE_SET_CODE (cond, swap_tree_comparison (code));
548 /* Invert. */
549 else
551 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
552 gimple_assign_rhs3_ptr (stmt));
553 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
554 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
555 gcc_assert (code != ERROR_MARK);
556 TREE_SET_CODE (cond, code);
559 else
560 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
561 gimple_assign_rhs2_ptr (stmt));
562 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
570 *swap = swapped;
571 return 0;
574 /* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
577 static bool
578 compatible_calls_p (gcall *call1, gcall *call2)
580 unsigned int nargs = gimple_call_num_args (call1);
581 if (nargs != gimple_call_num_args (call2))
582 return false;
584 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
585 return false;
587 if (gimple_call_internal_p (call1))
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
590 TREE_TYPE (gimple_call_lhs (call2))))
591 return false;
592 for (unsigned int i = 0; i < nargs; ++i)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
594 TREE_TYPE (gimple_call_arg (call2, i))))
595 return false;
597 else
599 if (!operand_equal_p (gimple_call_fn (call1),
600 gimple_call_fn (call2), 0))
601 return false;
603 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
604 return false;
606 return true;
609 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
616 static bool
617 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
618 tree vectype, poly_uint64 *max_nunits)
620 if (!vectype)
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
629 /* Fatal mismatch. */
630 return false;
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
635 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
636 unsigned HOST_WIDE_INT const_nunits;
637 if (is_a <bb_vec_info> (vinfo)
638 && (!nunits.is_constant (&const_nunits)
639 || const_nunits > group_size))
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
645 return false;
648 /* In case of multiple types we need to detect the smallest type. */
649 vect_update_max_nunits (max_nunits, vectype);
650 return true;
653 /* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
660 static bool
661 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
662 unsigned int group_size, tree vectype,
663 tree_code alt_stmt_code)
665 unsigned HOST_WIDE_INT count;
666 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
667 return false;
669 vec_perm_builder sel (count, count, 1);
670 for (unsigned int i = 0; i < count; ++i)
672 unsigned int elt = i;
673 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
674 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
675 elt += count;
676 sel.quick_push (elt);
678 vec_perm_indices indices (sel, 2, count);
679 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
682 /* Verify if the scalar stmts STMTS are isomorphic, require data
683 permutation or are of unsupported types of operation. Return
684 true if they are, otherwise return false and indicate in *MATCHES
685 which stmts are not isomorphic to the first one. If MATCHES[0]
686 is false then this indicates the comparison could not be
687 carried out or the stmts will never be vectorized by SLP.
689 Note COND_EXPR is possibly ismorphic to another one after swapping its
690 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
691 the first stmt by swapping the two operands of comparison; set SWAP[i]
692 to 2 if stmt I is isormorphic to the first stmt by inverting the code
693 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
694 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
696 static bool
697 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
698 vec<stmt_vec_info> stmts, unsigned int group_size,
699 poly_uint64 *max_nunits, bool *matches,
700 bool *two_operators)
702 unsigned int i;
703 stmt_vec_info first_stmt_info = stmts[0];
704 enum tree_code first_stmt_code = ERROR_MARK;
705 enum tree_code alt_stmt_code = ERROR_MARK;
706 enum tree_code rhs_code = ERROR_MARK;
707 enum tree_code first_cond_code = ERROR_MARK;
708 tree lhs;
709 bool need_same_oprnds = false;
710 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
711 optab optab;
712 int icode;
713 machine_mode optab_op2_mode;
714 machine_mode vec_mode;
715 gimple *first_load = NULL, *prev_first_load = NULL;
717 /* For every stmt in NODE find its def stmt/s. */
718 stmt_vec_info stmt_info;
719 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
721 gimple *stmt = stmt_info->stmt;
722 swap[i] = 0;
723 matches[i] = false;
725 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
728 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
731 /* Fail to vectorize statements marked as unvectorizable. */
732 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
734 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "Build SLP failed: unvectorizable statement ");
738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
740 /* Fatal mismatch. */
741 matches[0] = false;
742 return false;
745 lhs = gimple_get_lhs (stmt);
746 if (lhs == NULL_TREE)
748 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "Build SLP failed: not GIMPLE_ASSIGN nor "
752 "GIMPLE_CALL ");
753 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
755 /* Fatal mismatch. */
756 matches[0] = false;
757 return false;
760 tree nunits_vectype;
761 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
762 &nunits_vectype)
763 || (nunits_vectype
764 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
765 nunits_vectype, max_nunits)))
767 /* Fatal mismatch. */
768 matches[0] = false;
769 return false;
772 gcc_assert (vectype);
774 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
776 rhs_code = CALL_EXPR;
777 if ((gimple_call_internal_p (call_stmt)
778 && (!vectorizable_internal_fn_p
779 (gimple_call_internal_fn (call_stmt))))
780 || gimple_call_tail_p (call_stmt)
781 || gimple_call_noreturn_p (call_stmt)
782 || !gimple_call_nothrow_p (call_stmt)
783 || gimple_call_chain (call_stmt))
785 if (dump_enabled_p ())
787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
788 "Build SLP failed: unsupported call type ");
789 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
790 call_stmt, 0);
792 /* Fatal mismatch. */
793 matches[0] = false;
794 return false;
797 else
798 rhs_code = gimple_assign_rhs_code (stmt);
800 /* Check the operation. */
801 if (i == 0)
803 first_stmt_code = rhs_code;
805 /* Shift arguments should be equal in all the packed stmts for a
806 vector shift with scalar shift operand. */
807 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
808 || rhs_code == LROTATE_EXPR
809 || rhs_code == RROTATE_EXPR)
811 if (vectype == boolean_type_node)
813 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: shift of a"
816 " boolean.\n");
817 /* Fatal mismatch. */
818 matches[0] = false;
819 return false;
822 vec_mode = TYPE_MODE (vectype);
824 /* First see if we have a vector/vector shift. */
825 optab = optab_for_tree_code (rhs_code, vectype,
826 optab_vector);
828 if (!optab
829 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
831 /* No vector/vector shift, try for a vector/scalar shift. */
832 optab = optab_for_tree_code (rhs_code, vectype,
833 optab_scalar);
835 if (!optab)
837 if (dump_enabled_p ())
838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
839 "Build SLP failed: no optab.\n");
840 /* Fatal mismatch. */
841 matches[0] = false;
842 return false;
844 icode = (int) optab_handler (optab, vec_mode);
845 if (icode == CODE_FOR_nothing)
847 if (dump_enabled_p ())
848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
849 "Build SLP failed: "
850 "op not supported by target.\n");
851 /* Fatal mismatch. */
852 matches[0] = false;
853 return false;
855 optab_op2_mode = insn_data[icode].operand[2].mode;
856 if (!VECTOR_MODE_P (optab_op2_mode))
858 need_same_oprnds = true;
859 first_op1 = gimple_assign_rhs2 (stmt);
863 else if (rhs_code == WIDEN_LSHIFT_EXPR)
865 need_same_oprnds = true;
866 first_op1 = gimple_assign_rhs2 (stmt);
869 else
871 if (first_stmt_code != rhs_code
872 && alt_stmt_code == ERROR_MARK)
873 alt_stmt_code = rhs_code;
874 if (first_stmt_code != rhs_code
875 && (first_stmt_code != IMAGPART_EXPR
876 || rhs_code != REALPART_EXPR)
877 && (first_stmt_code != REALPART_EXPR
878 || rhs_code != IMAGPART_EXPR)
879 /* Handle mismatches in plus/minus by computing both
880 and merging the results. */
881 && !((first_stmt_code == PLUS_EXPR
882 || first_stmt_code == MINUS_EXPR)
883 && (alt_stmt_code == PLUS_EXPR
884 || alt_stmt_code == MINUS_EXPR)
885 && rhs_code == alt_stmt_code)
886 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
887 && (first_stmt_code == ARRAY_REF
888 || first_stmt_code == BIT_FIELD_REF
889 || first_stmt_code == INDIRECT_REF
890 || first_stmt_code == COMPONENT_REF
891 || first_stmt_code == MEM_REF)))
893 if (dump_enabled_p ())
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
896 "Build SLP failed: different operation "
897 "in stmt ");
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900 "original stmt ");
901 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
902 first_stmt_info->stmt, 0);
904 /* Mismatch. */
905 continue;
908 if (need_same_oprnds
909 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
911 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: different shift "
915 "arguments in ");
916 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
918 /* Mismatch. */
919 continue;
922 if (rhs_code == CALL_EXPR)
924 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
925 as_a <gcall *> (stmt)))
927 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
930 "Build SLP failed: different calls in ");
931 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
932 stmt, 0);
934 /* Mismatch. */
935 continue;
940 /* Grouped store or load. */
941 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
943 if (REFERENCE_CLASS_P (lhs))
945 /* Store. */
948 else
950 /* Load. */
951 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
952 if (prev_first_load)
954 /* Check that there are no loads from different interleaving
955 chains in the same node. */
956 if (prev_first_load != first_load)
958 if (dump_enabled_p ())
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
961 vect_location,
962 "Build SLP failed: different "
963 "interleaving chains in one node ");
964 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
965 stmt, 0);
967 /* Mismatch. */
968 continue;
971 else
972 prev_first_load = first_load;
974 } /* Grouped access. */
975 else
977 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
979 /* Not grouped load. */
980 if (dump_enabled_p ())
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
983 "Build SLP failed: not grouped load ");
984 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
987 /* FORNOW: Not grouped loads are not supported. */
988 /* Fatal mismatch. */
989 matches[0] = false;
990 return false;
993 /* Not memory operation. */
994 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
995 && TREE_CODE_CLASS (rhs_code) != tcc_unary
996 && TREE_CODE_CLASS (rhs_code) != tcc_expression
997 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
998 && rhs_code != CALL_EXPR)
1000 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1003 "Build SLP failed: operation");
1004 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1005 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1007 /* Fatal mismatch. */
1008 matches[0] = false;
1009 return false;
1012 if (rhs_code == COND_EXPR)
1014 tree cond_expr = gimple_assign_rhs1 (stmt);
1015 enum tree_code cond_code = TREE_CODE (cond_expr);
1016 enum tree_code swap_code = ERROR_MARK;
1017 enum tree_code invert_code = ERROR_MARK;
1019 if (i == 0)
1020 first_cond_code = TREE_CODE (cond_expr);
1021 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1023 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1024 swap_code = swap_tree_comparison (cond_code);
1025 invert_code = invert_tree_comparison (cond_code, honor_nans);
1028 if (first_cond_code == cond_code)
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code == swap_code)
1032 swap[i] = 1;
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code == invert_code)
1035 swap[i] = 2;
1036 else
1038 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1041 "Build SLP failed: different"
1042 " operation");
1043 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1044 stmt, 0);
1046 /* Mismatch. */
1047 continue;
1052 matches[i] = true;
1055 for (i = 0; i < group_size; ++i)
1056 if (!matches[i])
1057 return false;
1059 /* If we allowed a two-operation SLP node verify the target can cope
1060 with the permute we are going to use. */
1061 if (alt_stmt_code != ERROR_MARK
1062 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1064 if (vectype == boolean_type_node
1065 || !vect_two_operations_perm_ok_p (stmts, group_size,
1066 vectype, alt_stmt_code))
1068 for (i = 0; i < group_size; ++i)
1069 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1071 matches[i] = false;
1072 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1075 "Build SLP failed: different operation "
1076 "in stmt ");
1077 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1078 stmts[i]->stmt, 0);
1079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1080 "original stmt ");
1081 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1082 first_stmt_info->stmt, 0);
1085 return false;
1087 *two_operators = true;
1090 return true;
1093 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1094 Note we never remove apart from at destruction time so we do not
1095 need a special value for deleted that differs from empty. */
1096 struct bst_traits
1098 typedef vec <stmt_vec_info> value_type;
1099 typedef vec <stmt_vec_info> compare_type;
1100 static inline hashval_t hash (value_type);
1101 static inline bool equal (value_type existing, value_type candidate);
1102 static inline bool is_empty (value_type x) { return !x.exists (); }
1103 static inline bool is_deleted (value_type x) { return !x.exists (); }
1104 static inline void mark_empty (value_type &x) { x.release (); }
1105 static inline void mark_deleted (value_type &x) { x.release (); }
1106 static inline void remove (value_type &x) { x.release (); }
1108 inline hashval_t
1109 bst_traits::hash (value_type x)
1111 inchash::hash h;
1112 for (unsigned i = 0; i < x.length (); ++i)
1113 h.add_int (gimple_uid (x[i]->stmt));
1114 return h.end ();
1116 inline bool
1117 bst_traits::equal (value_type existing, value_type candidate)
1119 if (existing.length () != candidate.length ())
1120 return false;
1121 for (unsigned i = 0; i < existing.length (); ++i)
1122 if (existing[i] != candidate[i])
1123 return false;
1124 return true;
1127 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1128 static scalar_stmts_set_t *bst_fail;
1130 typedef hash_map <vec <gimple *>, slp_tree,
1131 simple_hashmap_traits <bst_traits, slp_tree> >
1132 scalar_stmts_to_slp_tree_map_t;
1134 static slp_tree
1135 vect_build_slp_tree_2 (vec_info *vinfo,
1136 vec<stmt_vec_info> stmts, unsigned int group_size,
1137 poly_uint64 *max_nunits,
1138 vec<slp_tree> *loads,
1139 bool *matches, unsigned *npermutes, unsigned *tree_size,
1140 unsigned max_tree_size);
1142 static slp_tree
1143 vect_build_slp_tree (vec_info *vinfo,
1144 vec<stmt_vec_info> stmts, unsigned int group_size,
1145 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1146 bool *matches, unsigned *npermutes, unsigned *tree_size,
1147 unsigned max_tree_size)
1149 if (bst_fail->contains (stmts))
1150 return NULL;
1151 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1152 loads, matches, npermutes, tree_size,
1153 max_tree_size);
1154 /* When SLP build fails for stmts record this, otherwise SLP build
1155 can be exponential in time when we allow to construct parts from
1156 scalars, see PR81723. */
1157 if (! res)
1159 vec <stmt_vec_info> x;
1160 x.create (stmts.length ());
1161 x.splice (stmts);
1162 bst_fail->add (x);
1164 return res;
1167 /* Recursively build an SLP tree starting from NODE.
1168 Fail (and return a value not equal to zero) if def-stmts are not
1169 isomorphic, require data permutation or are of unsupported types of
1170 operation. Otherwise, return 0.
1171 The value returned is the depth in the SLP tree where a mismatch
1172 was found. */
1174 static slp_tree
1175 vect_build_slp_tree_2 (vec_info *vinfo,
1176 vec<stmt_vec_info> stmts, unsigned int group_size,
1177 poly_uint64 *max_nunits,
1178 vec<slp_tree> *loads,
1179 bool *matches, unsigned *npermutes, unsigned *tree_size,
1180 unsigned max_tree_size)
1182 unsigned nops, i, this_tree_size = 0;
1183 poly_uint64 this_max_nunits = *max_nunits;
1184 slp_tree node;
1186 matches[0] = false;
1188 stmt_vec_info stmt_info = stmts[0];
1189 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1190 nops = gimple_call_num_args (stmt);
1191 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1193 nops = gimple_num_ops (stmt) - 1;
1194 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1195 nops++;
1197 else if (is_a <gphi *> (stmt_info->stmt))
1198 nops = 0;
1199 else
1200 return NULL;
1202 /* If the SLP node is a PHI (induction or reduction), terminate
1203 the recursion. */
1204 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1206 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1207 tree vectype = get_vectype_for_scalar_type (scalar_type);
1208 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
1209 max_nunits))
1210 return NULL;
1212 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1213 /* Induction from different IVs is not supported. */
1214 if (def_type == vect_induction_def)
1216 stmt_vec_info other_info;
1217 FOR_EACH_VEC_ELT (stmts, i, other_info)
1218 if (stmt_info != other_info)
1219 return NULL;
1221 else
1223 /* Else def types have to match. */
1224 stmt_vec_info other_info;
1225 FOR_EACH_VEC_ELT (stmts, i, other_info)
1227 /* But for reduction chains only check on the first stmt. */
1228 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1229 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1230 continue;
1231 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1232 return NULL;
1235 node = vect_create_new_slp_node (stmts);
1236 return node;
1240 bool two_operators = false;
1241 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1242 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
1243 &this_max_nunits, matches, &two_operators))
1244 return NULL;
1246 /* If the SLP node is a load, terminate the recursion. */
1247 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1248 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1250 *max_nunits = this_max_nunits;
1251 node = vect_create_new_slp_node (stmts);
1252 loads->safe_push (node);
1253 return node;
1256 /* Get at the operands, verifying they are compatible. */
1257 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1258 slp_oprnd_info oprnd_info;
1259 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1261 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1262 stmts, i, &oprnds_info);
1263 if (res != 0)
1264 matches[(res == -1) ? 0 : i] = false;
1265 if (!matches[0])
1266 break;
1268 for (i = 0; i < group_size; ++i)
1269 if (!matches[i])
1271 vect_free_oprnd_info (oprnds_info);
1272 return NULL;
1275 auto_vec<slp_tree, 4> children;
1276 auto_vec<slp_tree> this_loads;
1278 stmt_info = stmts[0];
1280 if (tree_size)
1281 max_tree_size -= *tree_size;
1283 /* Create SLP_TREE nodes for the definition node/s. */
1284 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1286 slp_tree child;
1287 unsigned old_nloads = this_loads.length ();
1288 unsigned old_tree_size = this_tree_size;
1289 unsigned int j;
1291 if (oprnd_info->first_dt != vect_internal_def
1292 && oprnd_info->first_dt != vect_reduction_def
1293 && oprnd_info->first_dt != vect_induction_def)
1294 continue;
1296 if (++this_tree_size > max_tree_size)
1298 FOR_EACH_VEC_ELT (children, j, child)
1299 vect_free_slp_tree (child, false);
1300 vect_free_oprnd_info (oprnds_info);
1301 return NULL;
1304 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1305 group_size, &this_max_nunits,
1306 &this_loads, matches, npermutes,
1307 &this_tree_size,
1308 max_tree_size)) != NULL)
1310 /* If we have all children of child built up from scalars then just
1311 throw that away and build it up this node from scalars. */
1312 if (!SLP_TREE_CHILDREN (child).is_empty ()
1313 /* ??? Rejecting patterns this way doesn't work. We'd have to
1314 do extra work to cancel the pattern so the uses see the
1315 scalar version. */
1316 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1318 slp_tree grandchild;
1320 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1321 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1322 break;
1323 if (!grandchild)
1325 /* Roll back. */
1326 this_loads.truncate (old_nloads);
1327 this_tree_size = old_tree_size;
1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1329 vect_free_slp_tree (grandchild, false);
1330 SLP_TREE_CHILDREN (child).truncate (0);
1332 dump_printf_loc (MSG_NOTE, vect_location,
1333 "Building parent vector operands from "
1334 "scalars instead\n");
1335 oprnd_info->def_stmts = vNULL;
1336 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1337 children.safe_push (child);
1338 continue;
1342 oprnd_info->def_stmts = vNULL;
1343 children.safe_push (child);
1344 continue;
1347 /* If the SLP build failed fatally and we analyze a basic-block
1348 simply treat nodes we fail to build as externally defined
1349 (and thus build vectors from the scalar defs).
1350 The cost model will reject outright expensive cases.
1351 ??? This doesn't treat cases where permutation ultimatively
1352 fails (or we don't try permutation below). Ideally we'd
1353 even compute a permutation that will end up with the maximum
1354 SLP tree size... */
1355 if (is_a <bb_vec_info> (vinfo)
1356 && !matches[0]
1357 /* ??? Rejecting patterns this way doesn't work. We'd have to
1358 do extra work to cancel the pattern so the uses see the
1359 scalar version. */
1360 && !is_pattern_stmt_p (stmt_info))
1362 dump_printf_loc (MSG_NOTE, vect_location,
1363 "Building vector operands from scalars\n");
1364 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1365 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1366 children.safe_push (child);
1367 oprnd_info->def_stmts = vNULL;
1368 continue;
1371 /* If the SLP build for operand zero failed and operand zero
1372 and one can be commutated try that for the scalar stmts
1373 that failed the match. */
1374 if (i == 0
1375 /* A first scalar stmt mismatch signals a fatal mismatch. */
1376 && matches[0]
1377 /* ??? For COND_EXPRs we can swap the comparison operands
1378 as well as the arms under some constraints. */
1379 && nops == 2
1380 && oprnds_info[1]->first_dt == vect_internal_def
1381 && is_gimple_assign (stmt_info->stmt)
1382 /* Do so only if the number of not successful permutes was nor more
1383 than a cut-ff as re-trying the recursive match on
1384 possibly each level of the tree would expose exponential
1385 behavior. */
1386 && *npermutes < 4)
1388 /* See whether we can swap the matching or the non-matching
1389 stmt operands. */
1390 bool swap_not_matching = true;
1393 for (j = 0; j < group_size; ++j)
1395 if (matches[j] != !swap_not_matching)
1396 continue;
1397 stmt_vec_info stmt_info = stmts[j];
1398 /* Verify if we can swap operands of this stmt. */
1399 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1400 if (!stmt
1401 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1403 if (!swap_not_matching)
1404 goto fail;
1405 swap_not_matching = false;
1406 break;
1408 /* Verify if we can safely swap or if we committed to a
1409 specific operand order already.
1410 ??? Instead of modifying GIMPLE stmts here we could
1411 record whether we want to swap operands in the SLP
1412 node and temporarily do that when processing it
1413 (or wrap operand accessors in a helper). */
1414 else if (swap[j] != 0
1415 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1417 if (!swap_not_matching)
1419 if (dump_enabled_p ())
1421 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1422 vect_location,
1423 "Build SLP failed: cannot swap "
1424 "operands of shared stmt ");
1425 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1426 TDF_SLIM, stmts[j]->stmt, 0);
1428 goto fail;
1430 swap_not_matching = false;
1431 break;
1435 while (j != group_size);
1437 /* Swap mismatched definition stmts. */
1438 dump_printf_loc (MSG_NOTE, vect_location,
1439 "Re-trying with swapped operands of stmts ");
1440 for (j = 0; j < group_size; ++j)
1441 if (matches[j] == !swap_not_matching)
1443 std::swap (oprnds_info[0]->def_stmts[j],
1444 oprnds_info[1]->def_stmts[j]);
1445 dump_printf (MSG_NOTE, "%d ", j);
1447 dump_printf (MSG_NOTE, "\n");
1448 /* And try again with scratch 'matches' ... */
1449 bool *tem = XALLOCAVEC (bool, group_size);
1450 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1451 group_size, &this_max_nunits,
1452 &this_loads, tem, npermutes,
1453 &this_tree_size,
1454 max_tree_size)) != NULL)
1456 /* ... so if successful we can apply the operand swapping
1457 to the GIMPLE IL. This is necessary because for example
1458 vect_get_slp_defs uses operand indexes and thus expects
1459 canonical operand order. This is also necessary even
1460 if we end up building the operand from scalars as
1461 we'll continue to process swapped operand two. */
1462 for (j = 0; j < group_size; ++j)
1463 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1464 for (j = 0; j < group_size; ++j)
1465 if (matches[j] == !swap_not_matching)
1467 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1468 /* Avoid swapping operands twice. */
1469 if (gimple_plf (stmt, GF_PLF_1))
1470 continue;
1471 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1472 gimple_assign_rhs2_ptr (stmt));
1473 gimple_set_plf (stmt, GF_PLF_1, true);
1475 /* Verify we swap all duplicates or none. */
1476 if (flag_checking)
1477 for (j = 0; j < group_size; ++j)
1478 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1479 == (matches[j] == !swap_not_matching));
1481 /* If we have all children of child built up from scalars then
1482 just throw that away and build it up this node from scalars. */
1483 if (!SLP_TREE_CHILDREN (child).is_empty ()
1484 /* ??? Rejecting patterns this way doesn't work. We'd have
1485 to do extra work to cancel the pattern so the uses see the
1486 scalar version. */
1487 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1489 unsigned int j;
1490 slp_tree grandchild;
1492 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1493 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1494 break;
1495 if (!grandchild)
1497 /* Roll back. */
1498 this_loads.truncate (old_nloads);
1499 this_tree_size = old_tree_size;
1500 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1501 vect_free_slp_tree (grandchild, false);
1502 SLP_TREE_CHILDREN (child).truncate (0);
1504 dump_printf_loc (MSG_NOTE, vect_location,
1505 "Building parent vector operands from "
1506 "scalars instead\n");
1507 oprnd_info->def_stmts = vNULL;
1508 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1509 children.safe_push (child);
1510 continue;
1514 oprnd_info->def_stmts = vNULL;
1515 children.safe_push (child);
1516 continue;
1519 ++*npermutes;
1522 fail:
1523 gcc_assert (child == NULL);
1524 FOR_EACH_VEC_ELT (children, j, child)
1525 vect_free_slp_tree (child, false);
1526 vect_free_oprnd_info (oprnds_info);
1527 return NULL;
1530 vect_free_oprnd_info (oprnds_info);
1532 if (tree_size)
1533 *tree_size += this_tree_size;
1534 *max_nunits = this_max_nunits;
1535 loads->safe_splice (this_loads);
1537 node = vect_create_new_slp_node (stmts);
1538 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1539 SLP_TREE_CHILDREN (node).splice (children);
1540 return node;
1543 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1545 static void
1546 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1547 slp_tree node)
1549 int i;
1550 stmt_vec_info stmt_info;
1551 slp_tree child;
1553 dump_printf_loc (dump_kind, loc, "node%s\n",
1554 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1555 ? " (external)" : "");
1556 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1558 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1559 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt_info->stmt, 0);
1561 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1562 vect_print_slp_tree (dump_kind, loc, child);
1566 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1567 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1568 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1569 stmts in NODE are to be marked. */
1571 static void
1572 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1574 int i;
1575 stmt_vec_info stmt_info;
1576 slp_tree child;
1578 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1579 return;
1581 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1582 if (j < 0 || i == j)
1583 STMT_SLP_TYPE (stmt_info) = mark;
1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1586 vect_mark_slp_stmts (child, mark, j);
1590 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1592 static void
1593 vect_mark_slp_stmts_relevant (slp_tree node)
1595 int i;
1596 stmt_vec_info stmt_info;
1597 slp_tree child;
1599 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1600 return;
1602 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1604 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1605 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1606 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1609 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1610 vect_mark_slp_stmts_relevant (child);
1614 /* Rearrange the statements of NODE according to PERMUTATION. */
1616 static void
1617 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1618 vec<unsigned> permutation)
1620 stmt_vec_info stmt_info;
1621 vec<stmt_vec_info> tmp_stmts;
1622 unsigned int i;
1623 slp_tree child;
1625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1626 vect_slp_rearrange_stmts (child, group_size, permutation);
1628 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1629 tmp_stmts.create (group_size);
1630 tmp_stmts.quick_grow_cleared (group_size);
1632 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1633 tmp_stmts[permutation[i]] = stmt_info;
1635 SLP_TREE_SCALAR_STMTS (node).release ();
1636 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1640 /* Attempt to reorder stmts in a reduction chain so that we don't
1641 require any load permutation. Return true if that was possible,
1642 otherwise return false. */
1644 static bool
1645 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1647 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1648 unsigned int i, j;
1649 unsigned int lidx;
1650 slp_tree node, load;
1652 /* Compare all the permutation sequences to the first one. We know
1653 that at least one load is permuted. */
1654 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1655 if (!node->load_permutation.exists ())
1656 return false;
1657 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1659 if (!load->load_permutation.exists ())
1660 return false;
1661 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1662 if (lidx != node->load_permutation[j])
1663 return false;
1666 /* Check that the loads in the first sequence are different and there
1667 are no gaps between them. */
1668 auto_sbitmap load_index (group_size);
1669 bitmap_clear (load_index);
1670 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1672 if (lidx >= group_size)
1673 return false;
1674 if (bitmap_bit_p (load_index, lidx))
1675 return false;
1677 bitmap_set_bit (load_index, lidx);
1679 for (i = 0; i < group_size; i++)
1680 if (!bitmap_bit_p (load_index, i))
1681 return false;
1683 /* This permutation is valid for reduction. Since the order of the
1684 statements in the nodes is not important unless they are memory
1685 accesses, we can rearrange the statements in all the nodes
1686 according to the order of the loads. */
1687 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1688 node->load_permutation);
1690 /* We are done, no actual permutations need to be generated. */
1691 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1694 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1695 first_stmt_info
1696 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (first_stmt_info));
1697 /* But we have to keep those permutations that are required because
1698 of handling of gaps. */
1699 if (known_eq (unrolling_factor, 1U)
1700 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1701 && DR_GROUP_GAP (first_stmt_info) == 0))
1702 SLP_TREE_LOAD_PERMUTATION (node).release ();
1703 else
1704 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1705 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1708 return true;
1711 /* Check if the required load permutations in the SLP instance
1712 SLP_INSTN are supported. */
1714 static bool
1715 vect_supported_load_permutation_p (slp_instance slp_instn)
1717 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1718 unsigned int i, j, k, next;
1719 slp_tree node;
1720 gimple *next_load;
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1725 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1726 if (node->load_permutation.exists ())
1727 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1728 dump_printf (MSG_NOTE, "%d ", next);
1729 else
1730 for (k = 0; k < group_size; ++k)
1731 dump_printf (MSG_NOTE, "%d ", k);
1732 dump_printf (MSG_NOTE, "\n");
1735 /* In case of reduction every load permutation is allowed, since the order
1736 of the reduction statements is not important (as opposed to the case of
1737 grouped stores). The only condition we need to check is that all the
1738 load nodes are of the same size and have the same permutation (and then
1739 rearrange all the nodes of the SLP instance according to this
1740 permutation). */
1742 /* Check that all the load nodes are of the same size. */
1743 /* ??? Can't we assert this? */
1744 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1745 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1746 return false;
1748 node = SLP_INSTANCE_TREE (slp_instn);
1749 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1751 /* Reduction (there are no data-refs in the root).
1752 In reduction chain the order of the loads is not important. */
1753 if (!STMT_VINFO_DATA_REF (stmt_info)
1754 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1755 vect_attempt_slp_rearrange_stmts (slp_instn);
1757 /* In basic block vectorization we allow any subchain of an interleaving
1758 chain.
1759 FORNOW: not supported in loop SLP because of realignment compications. */
1760 if (STMT_VINFO_BB_VINFO (stmt_info))
1762 /* Check whether the loads in an instance form a subchain and thus
1763 no permutation is necessary. */
1764 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1766 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1767 continue;
1768 bool subchain_p = true;
1769 next_load = NULL;
1770 stmt_vec_info load_info;
1771 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1773 if (j != 0
1774 && (next_load != load_info
1775 || DR_GROUP_GAP (load_info) != 1))
1777 subchain_p = false;
1778 break;
1780 next_load = DR_GROUP_NEXT_ELEMENT (load_info);
1782 if (subchain_p)
1783 SLP_TREE_LOAD_PERMUTATION (node).release ();
1784 else
1786 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1787 group_info
1788 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
1789 unsigned HOST_WIDE_INT nunits;
1790 unsigned k, maxk = 0;
1791 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1792 if (k > maxk)
1793 maxk = k;
1794 /* In BB vectorization we may not actually use a loaded vector
1795 accessing elements in excess of DR_GROUP_SIZE. */
1796 tree vectype = STMT_VINFO_VECTYPE (group_info);
1797 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1798 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "BB vectorization with gaps at the end of "
1802 "a load is not supported\n");
1803 return false;
1806 /* Verify the permutation can be generated. */
1807 vec<tree> tem;
1808 unsigned n_perms;
1809 if (!vect_transform_slp_perm_load (node, tem, NULL,
1810 1, slp_instn, true, &n_perms))
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1813 vect_location,
1814 "unsupported load permutation\n");
1815 return false;
1819 return true;
1822 /* For loop vectorization verify we can generate the permutation. Be
1823 conservative about the vectorization factor, there are permutations
1824 that will use three vector inputs only starting from a specific factor
1825 and the vectorization factor is not yet final.
1826 ??? The SLP instance unrolling factor might not be the maximum one. */
1827 unsigned n_perms;
1828 poly_uint64 test_vf
1829 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1830 LOOP_VINFO_VECT_FACTOR
1831 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1832 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1833 if (node->load_permutation.exists ()
1834 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1835 slp_instn, true, &n_perms))
1836 return false;
1838 return true;
1842 /* Find the last store in SLP INSTANCE. */
1844 gimple *
1845 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1847 gimple *last = NULL;
1848 stmt_vec_info stmt_vinfo;
1850 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1852 if (is_pattern_stmt_p (stmt_vinfo))
1853 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1854 else
1855 last = get_later_stmt (stmt_vinfo, last);
1858 return last;
1861 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1862 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1863 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1864 containing the remainder.
1865 Return the first stmt in the second group. */
1867 static gimple *
1868 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1870 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1871 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1872 gcc_assert (group1_size > 0);
1873 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1874 gcc_assert (group2_size > 0);
1875 DR_GROUP_SIZE (first_vinfo) = group1_size;
1877 gimple *stmt = first_stmt;
1878 for (unsigned i = group1_size; i > 1; i--)
1880 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1881 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1883 /* STMT is now the last element of the first group. */
1884 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1885 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1887 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1888 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1890 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1891 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1894 /* For the second group, the DR_GROUP_GAP is that before the original group,
1895 plus skipping over the first vector. */
1896 DR_GROUP_GAP (vinfo_for_stmt (group2))
1897 = DR_GROUP_GAP (first_vinfo) + group1_size;
1899 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1900 DR_GROUP_GAP (first_vinfo) += group2_size;
1902 if (dump_enabled_p ())
1903 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1904 group1_size, group2_size);
1906 return group2;
1909 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1910 statements and a vector of NUNITS elements. */
1912 static poly_uint64
1913 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1915 return exact_div (common_multiple (nunits, group_size), group_size);
1918 /* Analyze an SLP instance starting from a group of grouped stores. Call
1919 vect_build_slp_tree to build a tree of packed stmts if possible.
1920 Return FALSE if it's impossible to SLP any stmt in the loop. */
1922 static bool
1923 vect_analyze_slp_instance (vec_info *vinfo,
1924 gimple *stmt, unsigned max_tree_size)
1926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1927 slp_instance new_instance;
1928 slp_tree node;
1929 unsigned int group_size;
1930 tree vectype, scalar_type = NULL_TREE;
1931 gimple *next;
1932 stmt_vec_info next_info;
1933 unsigned int i;
1934 vec<slp_tree> loads;
1935 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1936 vec<stmt_vec_info> scalar_stmts;
1938 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1940 scalar_type = TREE_TYPE (DR_REF (dr));
1941 vectype = get_vectype_for_scalar_type (scalar_type);
1942 group_size = DR_GROUP_SIZE (stmt_info);
1944 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1946 gcc_assert (is_a <loop_vec_info> (vinfo));
1947 vectype = STMT_VINFO_VECTYPE (stmt_info);
1948 group_size = REDUC_GROUP_SIZE (stmt_info);
1950 else
1952 gcc_assert (is_a <loop_vec_info> (vinfo));
1953 vectype = STMT_VINFO_VECTYPE (stmt_info);
1954 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1957 if (!vectype)
1959 if (dump_enabled_p ())
1961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1962 "Build SLP failed: unsupported data-type ");
1963 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1964 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1967 return false;
1969 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1971 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1972 scalar_stmts.create (group_size);
1973 next = stmt;
1974 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1976 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1977 while (next)
1979 next_info = vinfo_for_stmt (next);
1980 if (STMT_VINFO_IN_PATTERN_P (next_info)
1981 && STMT_VINFO_RELATED_STMT (next_info))
1982 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
1983 else
1984 scalar_stmts.safe_push (next_info);
1985 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1988 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1990 /* Collect the reduction stmts and store them in
1991 SLP_TREE_SCALAR_STMTS. */
1992 while (next)
1994 next_info = vinfo_for_stmt (next);
1995 if (STMT_VINFO_IN_PATTERN_P (next_info)
1996 && STMT_VINFO_RELATED_STMT (next_info))
1997 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
1998 else
1999 scalar_stmts.safe_push (next_info);
2000 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2002 /* Mark the first element of the reduction chain as reduction to properly
2003 transform the node. In the reduction analysis phase only the last
2004 element of the chain is marked as reduction. */
2005 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2007 else
2009 /* Collect reduction statements. */
2010 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2011 for (i = 0; reductions.iterate (i, &next_info); i++)
2012 scalar_stmts.safe_push (next_info);
2015 loads.create (group_size);
2017 /* Build the tree for the SLP instance. */
2018 bool *matches = XALLOCAVEC (bool, group_size);
2019 unsigned npermutes = 0;
2020 bst_fail = new scalar_stmts_set_t ();
2021 poly_uint64 max_nunits = nunits;
2022 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2023 &max_nunits, &loads, matches, &npermutes,
2024 NULL, max_tree_size);
2025 delete bst_fail;
2026 if (node != NULL)
2028 /* Calculate the unrolling factor based on the smallest type. */
2029 poly_uint64 unrolling_factor
2030 = calculate_unrolling_factor (max_nunits, group_size);
2032 if (maybe_ne (unrolling_factor, 1U)
2033 && is_a <bb_vec_info> (vinfo))
2035 unsigned HOST_WIDE_INT const_max_nunits;
2036 if (!max_nunits.is_constant (&const_max_nunits)
2037 || const_max_nunits > group_size)
2039 if (dump_enabled_p ())
2040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2041 "Build SLP failed: store group "
2042 "size not a multiple of the vector size "
2043 "in basic block SLP\n");
2044 vect_free_slp_tree (node, false);
2045 loads.release ();
2046 return false;
2048 /* Fatal mismatch. */
2049 matches[group_size / const_max_nunits * const_max_nunits] = false;
2050 vect_free_slp_tree (node, false);
2051 loads.release ();
2053 else
2055 /* Create a new SLP instance. */
2056 new_instance = XNEW (struct _slp_instance);
2057 SLP_INSTANCE_TREE (new_instance) = node;
2058 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2059 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2060 SLP_INSTANCE_LOADS (new_instance) = loads;
2062 /* Compute the load permutation. */
2063 slp_tree load_node;
2064 bool loads_permuted = false;
2065 FOR_EACH_VEC_ELT (loads, i, load_node)
2067 vec<unsigned> load_permutation;
2068 int j;
2069 stmt_vec_info load_info;
2070 gimple *first_stmt;
2071 bool this_load_permuted = false;
2072 load_permutation.create (group_size);
2073 first_stmt = DR_GROUP_FIRST_ELEMENT
2074 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2075 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2077 int load_place = vect_get_place_in_interleaving_chain
2078 (load_info, first_stmt);
2079 gcc_assert (load_place != -1);
2080 if (load_place != j)
2081 this_load_permuted = true;
2082 load_permutation.safe_push (load_place);
2084 if (!this_load_permuted
2085 /* The load requires permutation when unrolling exposes
2086 a gap either because the group is larger than the SLP
2087 group-size or because there is a gap between the groups. */
2088 && (known_eq (unrolling_factor, 1U)
2089 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2090 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2092 load_permutation.release ();
2093 continue;
2095 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2096 loads_permuted = true;
2099 if (loads_permuted)
2101 if (!vect_supported_load_permutation_p (new_instance))
2103 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2106 "Build SLP failed: unsupported load "
2107 "permutation ");
2108 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2109 TDF_SLIM, stmt, 0);
2111 vect_free_slp_instance (new_instance, false);
2112 return false;
2116 /* If the loads and stores can be handled with load/store-lan
2117 instructions do not generate this SLP instance. */
2118 if (is_a <loop_vec_info> (vinfo)
2119 && loads_permuted
2120 && dr && vect_store_lanes_supported (vectype, group_size, false))
2122 slp_tree load_node;
2123 FOR_EACH_VEC_ELT (loads, i, load_node)
2125 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
2126 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2127 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2128 /* Use SLP for strided accesses (or if we
2129 can't load-lanes). */
2130 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2131 || ! vect_load_lanes_supported
2132 (STMT_VINFO_VECTYPE (stmt_vinfo),
2133 DR_GROUP_SIZE (stmt_vinfo), false))
2134 break;
2136 if (i == loads.length ())
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2140 "Built SLP cancelled: can use "
2141 "load/store-lanes\n");
2142 vect_free_slp_instance (new_instance, false);
2143 return false;
2147 vinfo->slp_instances.safe_push (new_instance);
2149 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_NOTE, vect_location,
2152 "Final SLP tree for instance:\n");
2153 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2156 return true;
2159 else
2161 /* Failed to SLP. */
2162 /* Free the allocated memory. */
2163 scalar_stmts.release ();
2164 loads.release ();
2167 /* For basic block SLP, try to break the group up into multiples of the
2168 vector size. */
2169 unsigned HOST_WIDE_INT const_nunits;
2170 if (is_a <bb_vec_info> (vinfo)
2171 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2172 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2173 && nunits.is_constant (&const_nunits))
2175 /* We consider breaking the group only on VF boundaries from the existing
2176 start. */
2177 for (i = 0; i < group_size; i++)
2178 if (!matches[i]) break;
2180 if (i >= const_nunits && i < group_size)
2182 /* Split into two groups at the first vector boundary before i. */
2183 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2184 unsigned group1_size = i & ~(const_nunits - 1);
2186 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2187 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2188 /* If the first non-match was in the middle of a vector,
2189 skip the rest of that vector. */
2190 if (group1_size < i)
2192 i = group1_size + const_nunits;
2193 if (i < group_size)
2194 rest = vect_split_slp_store_group (rest, const_nunits);
2196 if (i < group_size)
2197 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2198 return res;
2200 /* Even though the first vector did not all match, we might be able to SLP
2201 (some) of the remainder. FORNOW ignore this possibility. */
2204 return false;
2208 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2209 trees of packed scalar stmts if SLP is possible. */
2211 bool
2212 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2214 unsigned int i;
2215 gimple *first_element;
2217 DUMP_VECT_SCOPE ("vect_analyze_slp");
2219 /* Find SLP sequences starting from groups of grouped stores. */
2220 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2221 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2223 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2225 if (loop_vinfo->reduction_chains.length () > 0)
2227 /* Find SLP sequences starting from reduction chains. */
2228 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2229 if (! vect_analyze_slp_instance (vinfo, first_element,
2230 max_tree_size))
2232 /* Dissolve reduction chain group. */
2233 gimple *next, *stmt = first_element;
2234 while (stmt)
2236 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2237 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2238 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2239 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2240 stmt = next;
2242 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2243 = vect_internal_def;
2247 /* Find SLP sequences starting from groups of reductions. */
2248 if (loop_vinfo->reductions.length () > 1)
2249 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2250 max_tree_size);
2253 return true;
2257 /* For each possible SLP instance decide whether to SLP it and calculate overall
2258 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2259 least one instance. */
2261 bool
2262 vect_make_slp_decision (loop_vec_info loop_vinfo)
2264 unsigned int i;
2265 poly_uint64 unrolling_factor = 1;
2266 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2267 slp_instance instance;
2268 int decided_to_slp = 0;
2270 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2272 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2274 /* FORNOW: SLP if you can. */
2275 /* All unroll factors have the form current_vector_size * X for some
2276 rational X, so they must have a common multiple. */
2277 unrolling_factor
2278 = force_common_multiple (unrolling_factor,
2279 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2281 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2282 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2283 loop-based vectorization. Such stmts will be marked as HYBRID. */
2284 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2285 decided_to_slp++;
2288 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2290 if (decided_to_slp && dump_enabled_p ())
2292 dump_printf_loc (MSG_NOTE, vect_location,
2293 "Decided to SLP %d instances. Unrolling factor ",
2294 decided_to_slp);
2295 dump_dec (MSG_NOTE, unrolling_factor);
2296 dump_printf (MSG_NOTE, "\n");
2299 return (decided_to_slp > 0);
2303 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2304 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2306 static void
2307 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2309 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2310 imm_use_iterator imm_iter;
2311 gimple *use_stmt;
2312 stmt_vec_info use_vinfo;
2313 slp_tree child;
2314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2315 int j;
2317 /* Propagate hybrid down the SLP tree. */
2318 if (stype == hybrid)
2320 else if (HYBRID_SLP_STMT (stmt_vinfo))
2321 stype = hybrid;
2322 else
2324 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2325 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2326 /* If we get a pattern stmt here we have to use the LHS of the
2327 original stmt for immediate uses. */
2328 gimple *stmt = stmt_vinfo->stmt;
2329 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2330 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2331 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo)->stmt;
2332 tree def;
2333 if (gimple_code (stmt) == GIMPLE_PHI)
2334 def = gimple_phi_result (stmt);
2335 else
2336 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2337 if (def)
2338 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2340 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2341 if (!use_vinfo)
2342 continue;
2343 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2344 && STMT_VINFO_RELATED_STMT (use_vinfo))
2345 use_vinfo = STMT_VINFO_RELATED_STMT (use_vinfo);
2346 if (!STMT_SLP_TYPE (use_vinfo)
2347 && (STMT_VINFO_RELEVANT (use_vinfo)
2348 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2349 && !(gimple_code (use_stmt) == GIMPLE_PHI
2350 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2352 if (dump_enabled_p ())
2354 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2355 "def in non-SLP stmt: ");
2356 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2358 stype = hybrid;
2363 if (stype == hybrid
2364 && !HYBRID_SLP_STMT (stmt_vinfo))
2366 if (dump_enabled_p ())
2368 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2369 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
2371 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2374 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2375 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2376 vect_detect_hybrid_slp_stmts (child, i, stype);
2379 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2381 static tree
2382 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2384 walk_stmt_info *wi = (walk_stmt_info *)data;
2385 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2387 if (wi->is_lhs)
2388 return NULL_TREE;
2390 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2391 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2393 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2396 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
2398 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2401 return NULL_TREE;
2404 static tree
2405 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2406 walk_stmt_info *wi)
2408 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2409 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2410 /* If the stmt is in a SLP instance then this isn't a reason
2411 to mark use definitions in other SLP instances as hybrid. */
2412 if (! STMT_SLP_TYPE (use_vinfo)
2413 && (STMT_VINFO_RELEVANT (use_vinfo)
2414 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2415 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2416 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2418 else
2419 *handled = true;
2420 return NULL_TREE;
2423 /* Find stmts that must be both vectorized and SLPed. */
2425 void
2426 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2428 unsigned int i;
2429 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2430 slp_instance instance;
2432 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2434 /* First walk all pattern stmt in the loop and mark defs of uses as
2435 hybrid because immediate uses in them are not recorded. */
2436 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2438 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2439 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2440 gsi_next (&gsi))
2442 gimple *stmt = gsi_stmt (gsi);
2443 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2444 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2446 walk_stmt_info wi;
2447 memset (&wi, 0, sizeof (wi));
2448 wi.info = loop_vinfo;
2449 gimple_stmt_iterator gsi2
2450 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2451 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2452 vect_detect_hybrid_slp_1, &wi);
2453 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2454 vect_detect_hybrid_slp_2,
2455 vect_detect_hybrid_slp_1, &wi);
2460 /* Then walk the SLP instance trees marking stmts with uses in
2461 non-SLP stmts as hybrid, also propagating hybrid down the
2462 SLP tree, collecting the above info on-the-fly. */
2463 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2465 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2466 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2467 i, pure_slp);
2472 /* Initialize a bb_vec_info struct for the statements between
2473 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2475 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2476 gimple_stmt_iterator region_end_in,
2477 vec_info_shared *shared)
2478 : vec_info (vec_info::bb, init_cost (NULL), shared),
2479 bb (gsi_bb (region_begin_in)),
2480 region_begin (region_begin_in),
2481 region_end (region_end_in)
2483 gimple_stmt_iterator gsi;
2485 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2486 gsi_next (&gsi))
2488 gimple *stmt = gsi_stmt (gsi);
2489 gimple_set_uid (stmt, 0);
2490 add_stmt (stmt);
2493 bb->aux = this;
2497 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2498 stmts in the basic block. */
2500 _bb_vec_info::~_bb_vec_info ()
2502 for (gimple_stmt_iterator si = region_begin;
2503 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2505 gimple *stmt = gsi_stmt (si);
2506 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2508 if (stmt_info)
2509 /* Free stmt_vec_info. */
2510 free_stmt_vec_info (stmt);
2512 /* Reset region marker. */
2513 gimple_set_uid (stmt, -1);
2516 bb->aux = NULL;
2519 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2520 given then that child nodes have already been processed, and that
2521 their def types currently match their SLP node's def type. */
2523 static bool
2524 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2525 slp_instance node_instance,
2526 stmt_vector_for_cost *cost_vec)
2528 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2529 gimple *stmt = stmt_info->stmt;
2530 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2532 /* For BB vectorization vector types are assigned here.
2533 Memory accesses already got their vector type assigned
2534 in vect_analyze_data_refs. */
2535 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2536 if (bb_vinfo
2537 && ! STMT_VINFO_DATA_REF (stmt_info))
2539 tree vectype, nunits_vectype;
2540 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2541 &nunits_vectype))
2542 /* We checked this when building the node. */
2543 gcc_unreachable ();
2544 if (vectype == boolean_type_node)
2546 vectype = vect_get_mask_type_for_stmt (stmt_info);
2547 if (!vectype)
2548 /* vect_get_mask_type_for_stmt has already explained the
2549 failure. */
2550 return false;
2553 stmt_vec_info sstmt_info;
2554 unsigned int i;
2555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2556 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2559 /* Calculate the number of vector statements to be created for the
2560 scalar stmts in this node. For SLP reductions it is equal to the
2561 number of vector statements in the children (which has already been
2562 calculated by the recursive call). Otherwise it is the number of
2563 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2564 VF divided by the number of elements in a vector. */
2565 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2566 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2567 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2568 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2569 else
2571 poly_uint64 vf;
2572 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2573 vf = loop_vinfo->vectorization_factor;
2574 else
2575 vf = 1;
2576 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2577 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2578 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2579 = vect_get_num_vectors (vf * group_size, vectype);
2582 bool dummy;
2583 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2586 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2587 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2589 Return true if the operations are supported. */
2591 static bool
2592 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2593 slp_instance node_instance,
2594 scalar_stmts_to_slp_tree_map_t *visited,
2595 scalar_stmts_to_slp_tree_map_t *lvisited,
2596 stmt_vector_for_cost *cost_vec)
2598 int i, j;
2599 slp_tree child;
2601 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2602 return true;
2604 /* If we already analyzed the exact same set of scalar stmts we're done.
2605 We share the generated vector stmts for those. */
2606 slp_tree *leader;
2607 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2608 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2610 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2611 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2612 return true;
2615 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2616 doesn't result in any issue since we throw away the lvisited set
2617 when we fail. */
2618 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2620 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2621 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2622 visited, lvisited, cost_vec))
2623 return false;
2625 /* Push SLP node def-type to stmt operands. */
2626 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2627 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2628 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2629 = SLP_TREE_DEF_TYPE (child);
2630 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2631 cost_vec);
2632 /* Restore def-types. */
2633 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2634 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2635 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2636 = vect_internal_def;
2637 if (! res)
2638 return false;
2640 return true;
2644 /* Analyze statements in SLP instances of VINFO. Return true if the
2645 operations are supported. */
2647 bool
2648 vect_slp_analyze_operations (vec_info *vinfo)
2650 slp_instance instance;
2651 int i;
2653 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2655 scalar_stmts_to_slp_tree_map_t *visited
2656 = new scalar_stmts_to_slp_tree_map_t ();
2657 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2659 scalar_stmts_to_slp_tree_map_t lvisited;
2660 stmt_vector_for_cost cost_vec;
2661 cost_vec.create (2);
2662 if (!vect_slp_analyze_node_operations (vinfo,
2663 SLP_INSTANCE_TREE (instance),
2664 instance, visited, &lvisited,
2665 &cost_vec))
2667 slp_tree node = SLP_INSTANCE_TREE (instance);
2668 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2669 dump_printf_loc (MSG_NOTE, vect_location,
2670 "removing SLP instance operations starting from: ");
2671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2672 vect_free_slp_instance (instance, false);
2673 vinfo->slp_instances.ordered_remove (i);
2674 cost_vec.release ();
2676 else
2678 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2679 x != lvisited.end(); ++x)
2680 visited->put ((*x).first.copy (), (*x).second);
2681 i++;
2683 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2684 cost_vec.release ();
2687 delete visited;
2689 return !vinfo->slp_instances.is_empty ();
2693 /* Compute the scalar cost of the SLP node NODE and its children
2694 and return it. Do not account defs that are marked in LIFE and
2695 update LIFE according to uses of NODE. */
2697 static void
2698 vect_bb_slp_scalar_cost (basic_block bb,
2699 slp_tree node, vec<bool, va_heap> *life,
2700 stmt_vector_for_cost *cost_vec)
2702 unsigned i;
2703 stmt_vec_info stmt_info;
2704 slp_tree child;
2706 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2708 gimple *stmt = stmt_info->stmt;
2709 ssa_op_iter op_iter;
2710 def_operand_p def_p;
2712 if ((*life)[i])
2713 continue;
2715 /* If there is a non-vectorized use of the defs then the scalar
2716 stmt is kept live in which case we do not account it or any
2717 required defs in the SLP children in the scalar cost. This
2718 way we make the vectorization more costly when compared to
2719 the scalar cost. */
2720 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2722 imm_use_iterator use_iter;
2723 gimple *use_stmt;
2724 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2725 if (!is_gimple_debug (use_stmt)
2726 && (! vect_stmt_in_region_p (stmt_info->vinfo, use_stmt)
2727 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2729 (*life)[i] = true;
2730 BREAK_FROM_IMM_USE_STMT (use_iter);
2733 if ((*life)[i])
2734 continue;
2736 /* Count scalar stmts only once. */
2737 if (gimple_visited_p (stmt))
2738 continue;
2739 gimple_set_visited (stmt, true);
2741 vect_cost_for_stmt kind;
2742 if (STMT_VINFO_DATA_REF (stmt_info))
2744 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2745 kind = scalar_load;
2746 else
2747 kind = scalar_store;
2749 else
2750 kind = scalar_stmt;
2751 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2754 auto_vec<bool, 20> subtree_life;
2755 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2757 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2759 /* Do not directly pass LIFE to the recursive call, copy it to
2760 confine changes in the callee to the current child/subtree. */
2761 subtree_life.safe_splice (*life);
2762 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2763 subtree_life.truncate (0);
2768 /* Check if vectorization of the basic block is profitable. */
2770 static bool
2771 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2773 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2774 slp_instance instance;
2775 int i;
2776 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2777 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2779 /* Calculate scalar cost. */
2780 stmt_vector_for_cost scalar_costs;
2781 scalar_costs.create (0);
2782 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2784 auto_vec<bool, 20> life;
2785 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2786 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2787 SLP_INSTANCE_TREE (instance),
2788 &life, &scalar_costs);
2790 void *target_cost_data = init_cost (NULL);
2791 add_stmt_costs (target_cost_data, &scalar_costs);
2792 scalar_costs.release ();
2793 unsigned dummy;
2794 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2795 destroy_cost_data (target_cost_data);
2797 /* Unset visited flag. */
2798 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2799 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2800 gimple_set_visited (gsi_stmt (gsi), false);
2802 /* Complete the target-specific cost calculation. */
2803 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2804 &vec_inside_cost, &vec_epilogue_cost);
2806 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2808 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2811 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2812 vec_inside_cost);
2813 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2814 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2815 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2818 /* Vectorization is profitable if its cost is more than the cost of scalar
2819 version. Note that we err on the vector side for equal cost because
2820 the cost estimate is otherwise quite pessimistic (constant uses are
2821 free on the scalar side but cost a load on the vector side for
2822 example). */
2823 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2824 return false;
2826 return true;
2829 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2830 if so and sets fatal to true if failure is independent of
2831 current_vector_size. */
2833 static bb_vec_info
2834 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2835 gimple_stmt_iterator region_end,
2836 vec<data_reference_p> datarefs, int n_stmts,
2837 bool &fatal, vec_info_shared *shared)
2839 bb_vec_info bb_vinfo;
2840 slp_instance instance;
2841 int i;
2842 poly_uint64 min_vf = 2;
2844 /* The first group of checks is independent of the vector size. */
2845 fatal = true;
2847 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2851 "not vectorized: too many instructions in "
2852 "basic block.\n");
2853 free_data_refs (datarefs);
2854 return NULL;
2857 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2858 if (!bb_vinfo)
2859 return NULL;
2861 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2862 bb_vinfo->shared->save_datarefs ();
2864 /* Analyze the data references. */
2866 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2868 if (dump_enabled_p ())
2869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2870 "not vectorized: unhandled data-ref in basic "
2871 "block.\n");
2873 delete bb_vinfo;
2874 return NULL;
2877 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2881 "not vectorized: not enough data-refs in "
2882 "basic block.\n");
2884 delete bb_vinfo;
2885 return NULL;
2888 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2890 if (dump_enabled_p ())
2891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2892 "not vectorized: unhandled data access in "
2893 "basic block.\n");
2895 delete bb_vinfo;
2896 return NULL;
2899 /* If there are no grouped stores in the region there is no need
2900 to continue with pattern recog as vect_analyze_slp will fail
2901 anyway. */
2902 if (bb_vinfo->grouped_stores.is_empty ())
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2906 "not vectorized: no grouped stores in "
2907 "basic block.\n");
2909 delete bb_vinfo;
2910 return NULL;
2913 /* While the rest of the analysis below depends on it in some way. */
2914 fatal = false;
2916 vect_pattern_recog (bb_vinfo);
2918 /* Check the SLP opportunities in the basic block, analyze and build SLP
2919 trees. */
2920 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2922 if (dump_enabled_p ())
2924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2925 "Failed to SLP the basic block.\n");
2926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2927 "not vectorized: failed to find SLP opportunities "
2928 "in basic block.\n");
2931 delete bb_vinfo;
2932 return NULL;
2935 vect_record_base_alignments (bb_vinfo);
2937 /* Analyze and verify the alignment of data references and the
2938 dependence in the SLP instances. */
2939 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2941 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2942 || ! vect_slp_analyze_instance_dependence (instance))
2944 slp_tree node = SLP_INSTANCE_TREE (instance);
2945 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2946 dump_printf_loc (MSG_NOTE, vect_location,
2947 "removing SLP instance operations starting from: ");
2948 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
2949 vect_free_slp_instance (instance, false);
2950 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2951 continue;
2954 /* Mark all the statements that we want to vectorize as pure SLP and
2955 relevant. */
2956 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2957 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2959 i++;
2961 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2963 delete bb_vinfo;
2964 return NULL;
2967 if (!vect_slp_analyze_operations (bb_vinfo))
2969 if (dump_enabled_p ())
2970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2971 "not vectorized: bad operation in basic block.\n");
2973 delete bb_vinfo;
2974 return NULL;
2977 /* Cost model: check if the vectorization is worthwhile. */
2978 if (!unlimited_cost_model (NULL)
2979 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2981 if (dump_enabled_p ())
2982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2983 "not vectorized: vectorization is not "
2984 "profitable.\n");
2986 delete bb_vinfo;
2987 return NULL;
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_NOTE, vect_location,
2992 "Basic block will be vectorized using SLP\n");
2994 return bb_vinfo;
2998 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2999 true if anything in the basic-block was vectorized. */
3001 bool
3002 vect_slp_bb (basic_block bb)
3004 bb_vec_info bb_vinfo;
3005 gimple_stmt_iterator gsi;
3006 bool any_vectorized = false;
3007 auto_vector_sizes vector_sizes;
3009 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3011 /* Autodetect first vector size we try. */
3012 current_vector_size = 0;
3013 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3014 unsigned int next_size = 0;
3016 gsi = gsi_start_bb (bb);
3018 poly_uint64 autodetected_vector_size = 0;
3019 while (1)
3021 if (gsi_end_p (gsi))
3022 break;
3024 gimple_stmt_iterator region_begin = gsi;
3025 vec<data_reference_p> datarefs = vNULL;
3026 int insns = 0;
3028 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3030 gimple *stmt = gsi_stmt (gsi);
3031 if (is_gimple_debug (stmt))
3032 continue;
3033 insns++;
3035 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3036 vect_location = stmt;
3038 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3039 break;
3042 /* Skip leading unhandled stmts. */
3043 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3045 gsi_next (&gsi);
3046 continue;
3049 gimple_stmt_iterator region_end = gsi;
3051 bool vectorized = false;
3052 bool fatal = false;
3053 vec_info_shared shared;
3054 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3055 datarefs, insns, fatal, &shared);
3056 if (bb_vinfo
3057 && dbg_cnt (vect_slp))
3059 if (dump_enabled_p ())
3060 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3062 bb_vinfo->shared->check_datarefs ();
3063 vect_schedule_slp (bb_vinfo);
3065 unsigned HOST_WIDE_INT bytes;
3066 if (current_vector_size.is_constant (&bytes))
3067 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3068 "basic block part vectorized using "
3069 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3070 "vectors\n", bytes);
3071 else
3072 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3073 "basic block part vectorized using variable "
3074 "length vectors\n");
3076 vectorized = true;
3078 delete bb_vinfo;
3080 any_vectorized |= vectorized;
3082 if (next_size == 0)
3083 autodetected_vector_size = current_vector_size;
3085 if (next_size < vector_sizes.length ()
3086 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3087 next_size += 1;
3089 if (vectorized
3090 || next_size == vector_sizes.length ()
3091 || known_eq (current_vector_size, 0U)
3092 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3093 vector sizes will fail do not bother iterating. */
3094 || fatal)
3096 if (gsi_end_p (region_end))
3097 break;
3099 /* Skip the unhandled stmt. */
3100 gsi_next (&gsi);
3102 /* And reset vector sizes. */
3103 current_vector_size = 0;
3104 next_size = 0;
3106 else
3108 /* Try the next biggest vector size. */
3109 current_vector_size = vector_sizes[next_size++];
3110 if (dump_enabled_p ())
3112 dump_printf_loc (MSG_NOTE, vect_location,
3113 "***** Re-trying analysis with "
3114 "vector size ");
3115 dump_dec (MSG_NOTE, current_vector_size);
3116 dump_printf (MSG_NOTE, "\n");
3119 /* Start over. */
3120 gsi = region_begin;
3124 return any_vectorized;
3128 /* Return 1 if vector type of boolean constant which is OPNUM
3129 operand in statement STMT is a boolean vector. */
3131 static bool
3132 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3135 enum tree_code code = gimple_expr_code (stmt);
3136 tree op, vectype;
3137 enum vect_def_type dt;
3139 /* For comparison and COND_EXPR type is chosen depending
3140 on the other comparison operand. */
3141 if (TREE_CODE_CLASS (code) == tcc_comparison)
3143 if (opnum)
3144 op = gimple_assign_rhs1 (stmt);
3145 else
3146 op = gimple_assign_rhs2 (stmt);
3148 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3149 gcc_unreachable ();
3151 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3154 if (code == COND_EXPR)
3156 tree cond = gimple_assign_rhs1 (stmt);
3158 if (TREE_CODE (cond) == SSA_NAME)
3159 op = cond;
3160 else if (opnum)
3161 op = TREE_OPERAND (cond, 1);
3162 else
3163 op = TREE_OPERAND (cond, 0);
3165 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3166 gcc_unreachable ();
3168 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3171 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3174 /* Build a variable-length vector in which the elements in ELTS are repeated
3175 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3176 RESULTS and add any new instructions to SEQ.
3178 The approach we use is:
3180 (1) Find a vector mode VM with integer elements of mode IM.
3182 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3183 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3184 from small vectors to IM.
3186 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3188 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3189 correct byte contents.
3191 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3193 We try to find the largest IM for which this sequence works, in order
3194 to cut down on the number of interleaves. */
3196 void
3197 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3198 unsigned int nresults, vec<tree> &results)
3200 unsigned int nelts = elts.length ();
3201 tree element_type = TREE_TYPE (vector_type);
3203 /* (1) Find a vector mode VM with integer elements of mode IM. */
3204 unsigned int nvectors = 1;
3205 tree new_vector_type;
3206 tree permutes[2];
3207 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3208 &nvectors, &new_vector_type,
3209 permutes))
3210 gcc_unreachable ();
3212 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3213 unsigned int partial_nelts = nelts / nvectors;
3214 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3216 tree_vector_builder partial_elts;
3217 auto_vec<tree, 32> pieces (nvectors * 2);
3218 pieces.quick_grow (nvectors * 2);
3219 for (unsigned int i = 0; i < nvectors; ++i)
3221 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3222 ELTS' has mode IM. */
3223 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3224 for (unsigned int j = 0; j < partial_nelts; ++j)
3225 partial_elts.quick_push (elts[i * partial_nelts + j]);
3226 tree t = gimple_build_vector (seq, &partial_elts);
3227 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3228 TREE_TYPE (new_vector_type), t);
3230 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3231 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3234 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3235 correct byte contents.
3237 We need to repeat the following operation log2(nvectors) times:
3239 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3240 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3242 However, if each input repeats every N elements and the VF is
3243 a multiple of N * 2, the HI result is the same as the LO. */
3244 unsigned int in_start = 0;
3245 unsigned int out_start = nvectors;
3246 unsigned int hi_start = nvectors / 2;
3247 /* A bound on the number of outputs needed to produce NRESULTS results
3248 in the final iteration. */
3249 unsigned int noutputs_bound = nvectors * nresults;
3250 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3252 noutputs_bound /= 2;
3253 unsigned int limit = MIN (noutputs_bound, nvectors);
3254 for (unsigned int i = 0; i < limit; ++i)
3256 if ((i & 1) != 0
3257 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3258 2 * in_repeat))
3260 pieces[out_start + i] = pieces[out_start + i - 1];
3261 continue;
3264 tree output = make_ssa_name (new_vector_type);
3265 tree input1 = pieces[in_start + (i / 2)];
3266 tree input2 = pieces[in_start + (i / 2) + hi_start];
3267 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3268 input1, input2,
3269 permutes[i & 1]);
3270 gimple_seq_add_stmt (seq, stmt);
3271 pieces[out_start + i] = output;
3273 std::swap (in_start, out_start);
3276 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3277 results.reserve (nresults);
3278 for (unsigned int i = 0; i < nresults; ++i)
3279 if (i < nvectors)
3280 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3281 pieces[in_start + i]));
3282 else
3283 results.quick_push (results[i - nvectors]);
3287 /* For constant and loop invariant defs of SLP_NODE this function returns
3288 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3289 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3290 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3291 REDUC_INDEX is the index of the reduction operand in the statements, unless
3292 it is -1. */
3294 static void
3295 vect_get_constant_vectors (tree op, slp_tree slp_node,
3296 vec<tree> *vec_oprnds,
3297 unsigned int op_num, unsigned int number_of_vectors)
3299 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3300 stmt_vec_info stmt_vinfo = stmts[0];
3301 gimple *stmt = stmt_vinfo->stmt;
3302 unsigned HOST_WIDE_INT nunits;
3303 tree vec_cst;
3304 unsigned j, number_of_places_left_in_vector;
3305 tree vector_type;
3306 tree vop;
3307 int group_size = stmts.length ();
3308 unsigned int vec_num, i;
3309 unsigned number_of_copies = 1;
3310 vec<tree> voprnds;
3311 voprnds.create (number_of_vectors);
3312 bool constant_p, is_store;
3313 tree neutral_op = NULL;
3314 enum tree_code code = gimple_expr_code (stmt);
3315 gimple_seq ctor_seq = NULL;
3316 auto_vec<tree, 16> permute_results;
3318 /* Check if vector type is a boolean vector. */
3319 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3320 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3321 vector_type
3322 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3323 else
3324 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3326 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3328 is_store = true;
3329 op = gimple_assign_rhs1 (stmt);
3331 else
3332 is_store = false;
3334 gcc_assert (op);
3336 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3337 created vectors. It is greater than 1 if unrolling is performed.
3339 For example, we have two scalar operands, s1 and s2 (e.g., group of
3340 strided accesses of size two), while NUNITS is four (i.e., four scalars
3341 of this type can be packed in a vector). The output vector will contain
3342 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3343 will be 2).
3345 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3346 containing the operands.
3348 For example, NUNITS is four as before, and the group size is 8
3349 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3350 {s5, s6, s7, s8}. */
3352 /* When using duplicate_and_interleave, we just need one element for
3353 each scalar statement. */
3354 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3355 nunits = group_size;
3357 number_of_copies = nunits * number_of_vectors / group_size;
3359 number_of_places_left_in_vector = nunits;
3360 constant_p = true;
3361 tree_vector_builder elts (vector_type, nunits, 1);
3362 elts.quick_grow (nunits);
3363 bool place_after_defs = false;
3364 for (j = 0; j < number_of_copies; j++)
3366 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3368 stmt = stmt_vinfo->stmt;
3369 if (is_store)
3370 op = gimple_assign_rhs1 (stmt);
3371 else
3373 switch (code)
3375 case COND_EXPR:
3377 tree cond = gimple_assign_rhs1 (stmt);
3378 if (TREE_CODE (cond) == SSA_NAME)
3379 op = gimple_op (stmt, op_num + 1);
3380 else if (op_num == 0 || op_num == 1)
3381 op = TREE_OPERAND (cond, op_num);
3382 else
3384 if (op_num == 2)
3385 op = gimple_assign_rhs2 (stmt);
3386 else
3387 op = gimple_assign_rhs3 (stmt);
3390 break;
3392 case CALL_EXPR:
3393 op = gimple_call_arg (stmt, op_num);
3394 break;
3396 case LSHIFT_EXPR:
3397 case RSHIFT_EXPR:
3398 case LROTATE_EXPR:
3399 case RROTATE_EXPR:
3400 op = gimple_op (stmt, op_num + 1);
3401 /* Unlike the other binary operators, shifts/rotates have
3402 the shift count being int, instead of the same type as
3403 the lhs, so make sure the scalar is the right type if
3404 we are dealing with vectors of
3405 long long/long/short/char. */
3406 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3407 op = fold_convert (TREE_TYPE (vector_type), op);
3408 break;
3410 default:
3411 op = gimple_op (stmt, op_num + 1);
3412 break;
3416 /* Create 'vect_ = {op0,op1,...,opn}'. */
3417 number_of_places_left_in_vector--;
3418 tree orig_op = op;
3419 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3421 if (CONSTANT_CLASS_P (op))
3423 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3425 /* Can't use VIEW_CONVERT_EXPR for booleans because
3426 of possibly different sizes of scalar value and
3427 vector element. */
3428 if (integer_zerop (op))
3429 op = build_int_cst (TREE_TYPE (vector_type), 0);
3430 else if (integer_onep (op))
3431 op = build_all_ones_cst (TREE_TYPE (vector_type));
3432 else
3433 gcc_unreachable ();
3435 else
3436 op = fold_unary (VIEW_CONVERT_EXPR,
3437 TREE_TYPE (vector_type), op);
3438 gcc_assert (op && CONSTANT_CLASS_P (op));
3440 else
3442 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3443 gimple *init_stmt;
3444 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3446 tree true_val
3447 = build_all_ones_cst (TREE_TYPE (vector_type));
3448 tree false_val
3449 = build_zero_cst (TREE_TYPE (vector_type));
3450 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3451 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3452 op, true_val,
3453 false_val);
3455 else
3457 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3458 op);
3459 init_stmt
3460 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3461 op);
3463 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3464 op = new_temp;
3467 elts[number_of_places_left_in_vector] = op;
3468 if (!CONSTANT_CLASS_P (op))
3469 constant_p = false;
3470 if (TREE_CODE (orig_op) == SSA_NAME
3471 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3472 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3473 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3474 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3475 place_after_defs = true;
3477 if (number_of_places_left_in_vector == 0)
3479 if (constant_p
3480 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3481 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3482 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3483 else
3485 if (vec_oprnds->is_empty ())
3486 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3487 number_of_vectors,
3488 permute_results);
3489 vec_cst = permute_results[number_of_vectors - j - 1];
3491 tree init;
3492 gimple_stmt_iterator gsi;
3493 if (place_after_defs)
3495 gsi = gsi_for_stmt
3496 (vect_find_last_scalar_stmt_in_slp (slp_node));
3497 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3498 &gsi);
3500 else
3501 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3502 NULL);
3503 if (ctor_seq != NULL)
3505 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3506 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3507 ctor_seq = NULL;
3509 voprnds.quick_push (init);
3510 place_after_defs = false;
3511 number_of_places_left_in_vector = nunits;
3512 constant_p = true;
3513 elts.new_vector (vector_type, nunits, 1);
3514 elts.quick_grow (nunits);
3519 /* Since the vectors are created in the reverse order, we should invert
3520 them. */
3521 vec_num = voprnds.length ();
3522 for (j = vec_num; j != 0; j--)
3524 vop = voprnds[j - 1];
3525 vec_oprnds->quick_push (vop);
3528 voprnds.release ();
3530 /* In case that VF is greater than the unrolling factor needed for the SLP
3531 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3532 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3533 to replicate the vectors. */
3534 while (number_of_vectors > vec_oprnds->length ())
3536 tree neutral_vec = NULL;
3538 if (neutral_op)
3540 if (!neutral_vec)
3541 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3543 vec_oprnds->quick_push (neutral_vec);
3545 else
3547 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3548 vec_oprnds->quick_push (vop);
3554 /* Get vectorized definitions from SLP_NODE that contains corresponding
3555 vectorized def-stmts. */
3557 static void
3558 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3560 tree vec_oprnd;
3561 stmt_vec_info vec_def_stmt_info;
3562 unsigned int i;
3564 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3566 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3568 gcc_assert (vec_def_stmt_info);
3569 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3570 vec_oprnd = gimple_phi_result (vec_def_phi);
3571 else
3572 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3573 vec_oprnds->quick_push (vec_oprnd);
3578 /* Get vectorized definitions for SLP_NODE.
3579 If the scalar definitions are loop invariants or constants, collect them and
3580 call vect_get_constant_vectors() to create vector stmts.
3581 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3582 must be stored in the corresponding child of SLP_NODE, and we call
3583 vect_get_slp_vect_defs () to retrieve them. */
3585 void
3586 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3587 vec<vec<tree> > *vec_oprnds)
3589 gimple *first_stmt;
3590 int number_of_vects = 0, i;
3591 unsigned int child_index = 0;
3592 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3593 slp_tree child = NULL;
3594 vec<tree> vec_defs;
3595 tree oprnd;
3596 bool vectorized_defs;
3598 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3599 FOR_EACH_VEC_ELT (ops, i, oprnd)
3601 /* For each operand we check if it has vectorized definitions in a child
3602 node or we need to create them (for invariants and constants). We
3603 check if the LHS of the first stmt of the next child matches OPRND.
3604 If it does, we found the correct child. Otherwise, we call
3605 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3606 to check this child node for the next operand. */
3607 vectorized_defs = false;
3608 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3610 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3612 /* We have to check both pattern and original def, if available. */
3613 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3615 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3616 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3617 tree first_def_op;
3619 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3620 first_def_op = gimple_phi_result (first_def);
3621 else
3622 first_def_op = gimple_get_lhs (first_def_info->stmt);
3623 if (operand_equal_p (oprnd, first_def_op, 0)
3624 || (related
3625 && operand_equal_p (oprnd,
3626 gimple_get_lhs (related->stmt), 0)))
3628 /* The number of vector defs is determined by the number of
3629 vector statements in the node from which we get those
3630 statements. */
3631 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3632 vectorized_defs = true;
3633 child_index++;
3636 else
3637 child_index++;
3640 if (!vectorized_defs)
3642 if (i == 0)
3644 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3645 /* Number of vector stmts was calculated according to LHS in
3646 vect_schedule_slp_instance (), fix it by replacing LHS with
3647 RHS, if necessary. See vect_get_smallest_scalar_type () for
3648 details. */
3649 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3650 &rhs_size_unit);
3651 if (rhs_size_unit != lhs_size_unit)
3653 number_of_vects *= rhs_size_unit;
3654 number_of_vects /= lhs_size_unit;
3659 /* Allocate memory for vectorized defs. */
3660 vec_defs = vNULL;
3661 vec_defs.create (number_of_vects);
3663 /* For reduction defs we call vect_get_constant_vectors (), since we are
3664 looking for initial loop invariant values. */
3665 if (vectorized_defs)
3666 /* The defs are already vectorized. */
3667 vect_get_slp_vect_defs (child, &vec_defs);
3668 else
3669 /* Build vectors from scalar defs. */
3670 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3671 number_of_vects);
3673 vec_oprnds->quick_push (vec_defs);
3677 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3678 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3679 permute statements for the SLP node NODE of the SLP instance
3680 SLP_NODE_INSTANCE. */
3682 bool
3683 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3684 gimple_stmt_iterator *gsi, poly_uint64 vf,
3685 slp_instance slp_node_instance, bool analyze_only,
3686 unsigned *n_perms)
3688 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3689 vec_info *vinfo = stmt_info->vinfo;
3690 tree mask_element_type = NULL_TREE, mask_type;
3691 int vec_index = 0;
3692 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3693 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3694 unsigned int mask_element;
3695 machine_mode mode;
3696 unsigned HOST_WIDE_INT nunits, const_vf;
3698 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3699 return false;
3701 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
3703 mode = TYPE_MODE (vectype);
3705 /* At the moment, all permutations are represented using per-element
3706 indices, so we can't cope with variable vector lengths or
3707 vectorization factors. */
3708 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3709 || !vf.is_constant (&const_vf))
3710 return false;
3712 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3713 same size as the vector element being permuted. */
3714 mask_element_type = lang_hooks.types.type_for_mode
3715 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3716 mask_type = get_vectype_for_scalar_type (mask_element_type);
3717 vec_perm_builder mask (nunits, nunits, 1);
3718 mask.quick_grow (nunits);
3719 vec_perm_indices indices;
3721 /* Initialize the vect stmts of NODE to properly insert the generated
3722 stmts later. */
3723 if (! analyze_only)
3724 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3725 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3726 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3728 /* Generate permutation masks for every NODE. Number of masks for each NODE
3729 is equal to GROUP_SIZE.
3730 E.g., we have a group of three nodes with three loads from the same
3731 location in each node, and the vector size is 4. I.e., we have a
3732 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3733 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3734 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3737 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3738 The last mask is illegal since we assume two operands for permute
3739 operation, and the mask element values can't be outside that range.
3740 Hence, the last mask must be converted into {2,5,5,5}.
3741 For the first two permutations we need the first and the second input
3742 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3743 we need the second and the third vectors: {b1,c1,a2,b2} and
3744 {c2,a3,b3,c3}. */
3746 int vect_stmts_counter = 0;
3747 unsigned int index = 0;
3748 int first_vec_index = -1;
3749 int second_vec_index = -1;
3750 bool noop_p = true;
3751 *n_perms = 0;
3753 for (unsigned int j = 0; j < const_vf; j++)
3755 for (int k = 0; k < group_size; k++)
3757 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3758 + j * DR_GROUP_SIZE (stmt_info));
3759 vec_index = i / nunits;
3760 mask_element = i % nunits;
3761 if (vec_index == first_vec_index
3762 || first_vec_index == -1)
3764 first_vec_index = vec_index;
3766 else if (vec_index == second_vec_index
3767 || second_vec_index == -1)
3769 second_vec_index = vec_index;
3770 mask_element += nunits;
3772 else
3774 if (dump_enabled_p ())
3776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3777 "permutation requires at "
3778 "least three vectors ");
3779 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3780 stmt_info->stmt, 0);
3782 gcc_assert (analyze_only);
3783 return false;
3786 gcc_assert (mask_element < 2 * nunits);
3787 if (mask_element != index)
3788 noop_p = false;
3789 mask[index++] = mask_element;
3791 if (index == nunits && !noop_p)
3793 indices.new_vector (mask, 2, nunits);
3794 if (!can_vec_perm_const_p (mode, indices))
3796 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3799 vect_location,
3800 "unsupported vect permute { ");
3801 for (i = 0; i < nunits; ++i)
3803 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3804 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3806 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3808 gcc_assert (analyze_only);
3809 return false;
3812 ++*n_perms;
3815 if (index == nunits)
3817 if (!analyze_only)
3819 tree mask_vec = NULL_TREE;
3821 if (! noop_p)
3822 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3824 if (second_vec_index == -1)
3825 second_vec_index = first_vec_index;
3827 /* Generate the permute statement if necessary. */
3828 tree first_vec = dr_chain[first_vec_index];
3829 tree second_vec = dr_chain[second_vec_index];
3830 stmt_vec_info perm_stmt_info;
3831 if (! noop_p)
3833 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3834 tree perm_dest
3835 = vect_create_destination_var (gimple_assign_lhs (stmt),
3836 vectype);
3837 perm_dest = make_ssa_name (perm_dest);
3838 gassign *perm_stmt
3839 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3840 first_vec, second_vec,
3841 mask_vec);
3842 perm_stmt_info
3843 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3844 gsi);
3846 else
3847 /* If mask was NULL_TREE generate the requested
3848 identity transform. */
3849 perm_stmt_info = vinfo->lookup_def (first_vec);
3851 /* Store the vector statement in NODE. */
3852 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3853 = perm_stmt_info;
3856 index = 0;
3857 first_vec_index = -1;
3858 second_vec_index = -1;
3859 noop_p = true;
3864 return true;
3867 /* Vectorize SLP instance tree in postorder. */
3869 static bool
3870 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3871 scalar_stmts_to_slp_tree_map_t *bst_map)
3873 bool grouped_store, is_store;
3874 gimple_stmt_iterator si;
3875 stmt_vec_info stmt_info;
3876 unsigned int group_size;
3877 tree vectype;
3878 int i, j;
3879 slp_tree child;
3881 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3882 return false;
3884 /* See if we have already vectorized the same set of stmts and reuse their
3885 vectorized stmts. */
3886 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3888 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3889 return false;
3892 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3893 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3894 vect_schedule_slp_instance (child, instance, bst_map);
3896 /* Push SLP node def-type to stmts. */
3897 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3898 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3900 stmt_vec_info child_stmt_info;
3901 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3902 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3905 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3907 /* VECTYPE is the type of the destination. */
3908 vectype = STMT_VINFO_VECTYPE (stmt_info);
3909 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3910 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3912 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3913 if (!SLP_TREE_VEC_STMTS (node).exists ())
3914 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3916 if (dump_enabled_p ())
3918 dump_printf_loc (MSG_NOTE,vect_location,
3919 "------>vectorizing SLP node starting from: ");
3920 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
3923 /* Vectorized stmts go before the last scalar stmt which is where
3924 all uses are ready. */
3925 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3927 /* Mark the first element of the reduction chain as reduction to properly
3928 transform the node. In the analysis phase only the last element of the
3929 chain is marked as reduction. */
3930 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3931 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3932 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3934 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3935 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3938 /* Handle two-operation SLP nodes by vectorizing the group with
3939 both operations and then performing a merge. */
3940 if (SLP_TREE_TWO_OPERATORS (node))
3942 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3943 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3944 enum tree_code ocode = ERROR_MARK;
3945 stmt_vec_info ostmt_info;
3946 vec_perm_builder mask (group_size, group_size, 1);
3947 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3949 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3950 if (gimple_assign_rhs_code (ostmt) != code0)
3952 mask.quick_push (1);
3953 ocode = gimple_assign_rhs_code (ostmt);
3955 else
3956 mask.quick_push (0);
3958 if (ocode != ERROR_MARK)
3960 vec<stmt_vec_info> v0;
3961 vec<stmt_vec_info> v1;
3962 unsigned j;
3963 tree tmask = NULL_TREE;
3964 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
3965 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3966 SLP_TREE_VEC_STMTS (node).truncate (0);
3967 gimple_assign_set_rhs_code (stmt, ocode);
3968 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
3969 gimple_assign_set_rhs_code (stmt, code0);
3970 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3971 SLP_TREE_VEC_STMTS (node).truncate (0);
3972 tree meltype = build_nonstandard_integer_type
3973 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3974 tree mvectype = get_same_sized_vectype (meltype, vectype);
3975 unsigned k = 0, l;
3976 for (j = 0; j < v0.length (); ++j)
3978 /* Enforced by vect_build_slp_tree, which rejects variable-length
3979 vectors for SLP_TREE_TWO_OPERATORS. */
3980 unsigned int const_nunits = nunits.to_constant ();
3981 tree_vector_builder melts (mvectype, const_nunits, 1);
3982 for (l = 0; l < const_nunits; ++l)
3984 if (k >= group_size)
3985 k = 0;
3986 tree t = build_int_cst (meltype,
3987 mask[k++] * const_nunits + l);
3988 melts.quick_push (t);
3990 tmask = melts.build ();
3992 /* ??? Not all targets support a VEC_PERM_EXPR with a
3993 constant mask that would translate to a vec_merge RTX
3994 (with their vec_perm_const_ok). We can either not
3995 vectorize in that case or let veclower do its job.
3996 Unfortunately that isn't too great and at least for
3997 plus/minus we'd eventually like to match targets
3998 vector addsub instructions. */
3999 gimple *vstmt;
4000 vstmt = gimple_build_assign (make_ssa_name (vectype),
4001 VEC_PERM_EXPR,
4002 gimple_assign_lhs (v0[j]->stmt),
4003 gimple_assign_lhs (v1[j]->stmt),
4004 tmask);
4005 SLP_TREE_VEC_STMTS (node).quick_push
4006 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4008 v0.release ();
4009 v1.release ();
4010 return false;
4013 is_store = vect_transform_stmt (stmt_info, &si, &grouped_store, node,
4014 instance);
4016 /* Restore stmt def-types. */
4017 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4018 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4020 stmt_vec_info child_stmt_info;
4021 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4022 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4025 return is_store;
4028 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4029 For loop vectorization this is done in vectorizable_call, but for SLP
4030 it needs to be deferred until end of vect_schedule_slp, because multiple
4031 SLP instances may refer to the same scalar stmt. */
4033 static void
4034 vect_remove_slp_scalar_calls (slp_tree node)
4036 gimple *new_stmt;
4037 gimple_stmt_iterator gsi;
4038 int i;
4039 slp_tree child;
4040 tree lhs;
4041 stmt_vec_info stmt_info;
4043 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4044 return;
4046 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4047 vect_remove_slp_scalar_calls (child);
4049 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4051 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4052 if (!stmt || gimple_bb (stmt) == NULL)
4053 continue;
4054 if (is_pattern_stmt_p (stmt_info)
4055 || !PURE_SLP_STMT (stmt_info))
4056 continue;
4057 lhs = gimple_call_lhs (stmt);
4058 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4059 set_vinfo_for_stmt (new_stmt, stmt_info);
4060 set_vinfo_for_stmt (stmt, NULL);
4061 STMT_VINFO_STMT (stmt_info) = new_stmt;
4062 gsi = gsi_for_stmt (stmt);
4063 gsi_replace (&gsi, new_stmt, false);
4064 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4068 /* Generate vector code for all SLP instances in the loop/basic block. */
4070 bool
4071 vect_schedule_slp (vec_info *vinfo)
4073 vec<slp_instance> slp_instances;
4074 slp_instance instance;
4075 unsigned int i;
4076 bool is_store = false;
4079 scalar_stmts_to_slp_tree_map_t *bst_map
4080 = new scalar_stmts_to_slp_tree_map_t ();
4081 slp_instances = vinfo->slp_instances;
4082 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4084 /* Schedule the tree of INSTANCE. */
4085 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4086 instance, bst_map);
4087 if (dump_enabled_p ())
4088 dump_printf_loc (MSG_NOTE, vect_location,
4089 "vectorizing stmts using SLP.\n");
4091 delete bst_map;
4093 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4095 slp_tree root = SLP_INSTANCE_TREE (instance);
4096 stmt_vec_info store_info;
4097 unsigned int j;
4098 gimple_stmt_iterator gsi;
4100 /* Remove scalar call stmts. Do not do this for basic-block
4101 vectorization as not all uses may be vectorized.
4102 ??? Why should this be necessary? DCE should be able to
4103 remove the stmts itself.
4104 ??? For BB vectorization we can as well remove scalar
4105 stmts starting from the SLP tree root if they have no
4106 uses. */
4107 if (is_a <loop_vec_info> (vinfo))
4108 vect_remove_slp_scalar_calls (root);
4110 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4111 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4113 if (!STMT_VINFO_DATA_REF (store_info))
4114 break;
4116 if (is_pattern_stmt_p (store_info))
4117 store_info = STMT_VINFO_RELATED_STMT (store_info);
4118 /* Free the attached stmt_vec_info and remove the stmt. */
4119 gsi = gsi_for_stmt (store_info);
4120 unlink_stmt_vdef (store_info);
4121 gsi_remove (&gsi, true);
4122 release_defs (store_info);
4123 free_stmt_vec_info (store_info);
4127 return is_store;