Relocation (= move+destroy)
[official-gcc.git] / gcc / tree-vect-slp.c
blob3aae1776ef91b27ea98d4e68e6e11cf566dfcbd6
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
104 slp_tree node;
105 stmt_vec_info stmt_info = scalar_stmts[0];
106 unsigned int nops;
108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else if (is_a <gphi *> (stmt_info->stmt))
117 nops = 0;
118 else
119 return NULL;
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
134 return node;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec<stmt_vec_info> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
176 return oprnds_info;
180 /* Free operands info. */
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 int i;
186 slp_oprnd_info oprnd_info;
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
194 oprnds_info.release ();
198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
200 of the chain. */
203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
204 stmt_vec_info first_stmt_info)
206 stmt_vec_info next_stmt_info = first_stmt_info;
207 int result = 0;
209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
210 return -1;
214 if (next_stmt_info == stmt_info)
215 return result;
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
220 while (next_stmt_info);
222 return -1;
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
231 bool
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
278 return true;
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
300 static int
301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
302 vec<stmt_vec_info> stmts, unsigned stmt_num,
303 vec<slp_oprnd_info> *oprnds_info)
305 stmt_vec_info stmt_info = stmts[stmt_num];
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
308 enum vect_def_type dt = vect_uninitialized_def;
309 bool pattern = false;
310 slp_oprnd_info oprnd_info;
311 int first_op_idx = 1;
312 unsigned int commutative_op = -1U;
313 bool first_op_cond = false;
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
319 number_of_oprnds = gimple_call_num_args (stmt);
320 first_op_idx = 3;
321 if (gimple_call_internal_p (stmt))
323 internal_fn ifn = gimple_call_internal_fn (stmt);
324 commutative_op = first_commutative_argument (ifn);
327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
329 enum tree_code code = gimple_assign_rhs_code (stmt);
330 number_of_oprnds = gimple_num_ops (stmt) - 1;
331 /* Swap can only be done for cond_expr if asked to, otherwise we
332 could result in different comparison code to the first stmt. */
333 if (gimple_assign_rhs_code (stmt) == COND_EXPR
334 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
336 first_op_cond = true;
337 number_of_oprnds++;
339 else
340 commutative_op = commutative_tree_code (code) ? 0U : -1U;
342 else
343 return -1;
345 bool swapped = (*swap != 0);
346 gcc_assert (!swapped || first_op_cond);
347 for (i = 0; i < number_of_oprnds; i++)
349 again:
350 if (first_op_cond)
352 /* Map indicating how operands of cond_expr should be swapped. */
353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
354 int *map = maps[*swap];
356 if (i < 2)
357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
358 first_op_idx), map[i]);
359 else
360 oprnd = gimple_op (stmt_info->stmt, map[i]);
362 else
363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
365 oprnd_info = (*oprnds_info)[i];
367 stmt_vec_info def_stmt_info;
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
370 if (dump_enabled_p ())
371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
372 "Build SLP failed: can't analyze def for %T\n",
373 oprnd);
375 return -1;
378 if (second)
379 oprnd_info->second_pattern = pattern;
381 if (first)
383 oprnd_info->first_dt = dt;
384 oprnd_info->first_pattern = pattern;
385 oprnd_info->first_op_type = TREE_TYPE (oprnd);
387 else
389 /* Not first stmt of the group, check that the def-stmt/s match
390 the def-stmt/s of the first stmt. Allow different definition
391 types for reduction chains: the first stmt must be a
392 vect_reduction_def (a phi node), and the rest
393 vect_internal_def. */
394 tree type = TREE_TYPE (oprnd);
395 if ((oprnd_info->first_dt != dt
396 && !(oprnd_info->first_dt == vect_reduction_def
397 && dt == vect_internal_def)
398 && !((oprnd_info->first_dt == vect_external_def
399 || oprnd_info->first_dt == vect_constant_def)
400 && (dt == vect_external_def
401 || dt == vect_constant_def)))
402 || !types_compatible_p (oprnd_info->first_op_type, type))
404 /* Try swapping operands if we got a mismatch. */
405 if (i == commutative_op && !swapped)
407 swapped = true;
408 goto again;
411 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
413 "Build SLP failed: different types\n");
415 return 1;
417 if ((dt == vect_constant_def
418 || dt == vect_external_def)
419 && !current_vector_size.is_constant ()
420 && (TREE_CODE (type) == BOOLEAN_TYPE
421 || !can_duplicate_and_interleave_p (stmts.length (),
422 TYPE_MODE (type))))
424 if (dump_enabled_p ())
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426 "Build SLP failed: invalid type of def "
427 "for variable-length SLP %T\n", oprnd);
428 return -1;
432 /* Check the types of the definitions. */
433 switch (dt)
435 case vect_constant_def:
436 case vect_external_def:
437 break;
439 case vect_reduction_def:
440 case vect_induction_def:
441 case vect_internal_def:
442 oprnd_info->def_stmts.quick_push (def_stmt_info);
443 break;
445 default:
446 /* FORNOW: Not supported. */
447 if (dump_enabled_p ())
448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
449 "Build SLP failed: illegal type of def %T\n",
450 oprnd);
452 return -1;
456 /* Swap operands. */
457 if (swapped)
459 /* If there are already uses of this stmt in a SLP instance then
460 we've committed to the operand order and can't swap it. */
461 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
465 "Build SLP failed: cannot swap operands of "
466 "shared stmt %G", stmt_info->stmt);
467 return -1;
470 if (first_op_cond)
472 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
473 tree cond = gimple_assign_rhs1 (stmt);
474 enum tree_code code = TREE_CODE (cond);
476 /* Swap. */
477 if (*swap == 1)
479 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
480 &TREE_OPERAND (cond, 1));
481 TREE_SET_CODE (cond, swap_tree_comparison (code));
483 /* Invert. */
484 else
486 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
487 gimple_assign_rhs3_ptr (stmt));
488 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
489 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
490 gcc_assert (code != ERROR_MARK);
491 TREE_SET_CODE (cond, code);
494 else
496 unsigned int op = commutative_op + first_op_idx;
497 swap_ssa_operands (stmt_info->stmt,
498 gimple_op_ptr (stmt_info->stmt, op),
499 gimple_op_ptr (stmt_info->stmt, op + 1));
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "swapped operands to match def types in %G",
504 stmt_info->stmt);
507 *swap = swapped;
508 return 0;
511 /* Return true if call statements CALL1 and CALL2 are similar enough
512 to be combined into the same SLP group. */
514 static bool
515 compatible_calls_p (gcall *call1, gcall *call2)
517 unsigned int nargs = gimple_call_num_args (call1);
518 if (nargs != gimple_call_num_args (call2))
519 return false;
521 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
522 return false;
524 if (gimple_call_internal_p (call1))
526 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
527 TREE_TYPE (gimple_call_lhs (call2))))
528 return false;
529 for (unsigned int i = 0; i < nargs; ++i)
530 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
531 TREE_TYPE (gimple_call_arg (call2, i))))
532 return false;
534 else
536 if (!operand_equal_p (gimple_call_fn (call1),
537 gimple_call_fn (call2), 0))
538 return false;
540 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
541 return false;
543 return true;
546 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
547 caller's attempt to find the vector type in STMT_INFO with the narrowest
548 element type. Return true if VECTYPE is nonnull and if it is valid
549 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
550 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
551 vect_build_slp_tree. */
553 static bool
554 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
555 tree vectype, poly_uint64 *max_nunits)
557 if (!vectype)
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type in %G\n",
562 stmt_info->stmt);
563 /* Fatal mismatch. */
564 return false;
567 /* If populating the vector type requires unrolling then fail
568 before adjusting *max_nunits for basic-block vectorization. */
569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
570 unsigned HOST_WIDE_INT const_nunits;
571 if (STMT_VINFO_BB_VINFO (stmt_info)
572 && (!nunits.is_constant (&const_nunits)
573 || const_nunits > group_size))
575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
576 "Build SLP failed: unrolling required "
577 "in basic block SLP\n");
578 /* Fatal mismatch. */
579 return false;
582 /* In case of multiple types we need to detect the smallest type. */
583 vect_update_max_nunits (max_nunits, vectype);
584 return true;
587 /* STMTS is a group of GROUP_SIZE SLP statements in which some
588 statements do the same operation as the first statement and in which
589 the others do ALT_STMT_CODE. Return true if we can take one vector
590 of the first operation and one vector of the second and permute them
591 to get the required result. VECTYPE is the type of the vector that
592 would be permuted. */
594 static bool
595 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
596 unsigned int group_size, tree vectype,
597 tree_code alt_stmt_code)
599 unsigned HOST_WIDE_INT count;
600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
601 return false;
603 vec_perm_builder sel (count, count, 1);
604 for (unsigned int i = 0; i < count; ++i)
606 unsigned int elt = i;
607 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
608 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
609 elt += count;
610 sel.quick_push (elt);
612 vec_perm_indices indices (sel, 2, count);
613 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
616 /* Verify if the scalar stmts STMTS are isomorphic, require data
617 permutation or are of unsupported types of operation. Return
618 true if they are, otherwise return false and indicate in *MATCHES
619 which stmts are not isomorphic to the first one. If MATCHES[0]
620 is false then this indicates the comparison could not be
621 carried out or the stmts will never be vectorized by SLP.
623 Note COND_EXPR is possibly ismorphic to another one after swapping its
624 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
625 the first stmt by swapping the two operands of comparison; set SWAP[i]
626 to 2 if stmt I is isormorphic to the first stmt by inverting the code
627 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
628 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
630 static bool
631 vect_build_slp_tree_1 (unsigned char *swap,
632 vec<stmt_vec_info> stmts, unsigned int group_size,
633 poly_uint64 *max_nunits, bool *matches,
634 bool *two_operators)
636 unsigned int i;
637 stmt_vec_info first_stmt_info = stmts[0];
638 enum tree_code first_stmt_code = ERROR_MARK;
639 enum tree_code alt_stmt_code = ERROR_MARK;
640 enum tree_code rhs_code = ERROR_MARK;
641 enum tree_code first_cond_code = ERROR_MARK;
642 tree lhs;
643 bool need_same_oprnds = false;
644 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
645 optab optab;
646 int icode;
647 machine_mode optab_op2_mode;
648 machine_mode vec_mode;
649 stmt_vec_info first_load = NULL, prev_first_load = NULL;
651 /* For every stmt in NODE find its def stmt/s. */
652 stmt_vec_info stmt_info;
653 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
655 gimple *stmt = stmt_info->stmt;
656 swap[i] = 0;
657 matches[i] = false;
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
662 /* Fail to vectorize statements marked as unvectorizable. */
663 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
667 "Build SLP failed: unvectorizable statement %G",
668 stmt);
669 /* Fatal mismatch. */
670 matches[0] = false;
671 return false;
674 lhs = gimple_get_lhs (stmt);
675 if (lhs == NULL_TREE)
677 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: not GIMPLE_ASSIGN nor "
680 "GIMPLE_CALL %G", stmt);
681 /* Fatal mismatch. */
682 matches[0] = false;
683 return false;
686 tree nunits_vectype;
687 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
688 &nunits_vectype)
689 || (nunits_vectype
690 && !vect_record_max_nunits (stmt_info, group_size,
691 nunits_vectype, max_nunits)))
693 /* Fatal mismatch. */
694 matches[0] = false;
695 return false;
698 gcc_assert (vectype);
700 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
702 rhs_code = CALL_EXPR;
703 if ((gimple_call_internal_p (call_stmt)
704 && (!vectorizable_internal_fn_p
705 (gimple_call_internal_fn (call_stmt))))
706 || gimple_call_tail_p (call_stmt)
707 || gimple_call_noreturn_p (call_stmt)
708 || !gimple_call_nothrow_p (call_stmt)
709 || gimple_call_chain (call_stmt))
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: unsupported call type %G",
714 call_stmt);
715 /* Fatal mismatch. */
716 matches[0] = false;
717 return false;
720 else
721 rhs_code = gimple_assign_rhs_code (stmt);
723 /* Check the operation. */
724 if (i == 0)
726 first_stmt_code = rhs_code;
728 /* Shift arguments should be equal in all the packed stmts for a
729 vector shift with scalar shift operand. */
730 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
731 || rhs_code == LROTATE_EXPR
732 || rhs_code == RROTATE_EXPR)
734 if (vectype == boolean_type_node)
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "Build SLP failed: shift of a"
739 " boolean.\n");
740 /* Fatal mismatch. */
741 matches[0] = false;
742 return false;
745 vec_mode = TYPE_MODE (vectype);
747 /* First see if we have a vector/vector shift. */
748 optab = optab_for_tree_code (rhs_code, vectype,
749 optab_vector);
751 if (!optab
752 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
754 /* No vector/vector shift, try for a vector/scalar shift. */
755 optab = optab_for_tree_code (rhs_code, vectype,
756 optab_scalar);
758 if (!optab)
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "Build SLP failed: no optab.\n");
763 /* Fatal mismatch. */
764 matches[0] = false;
765 return false;
767 icode = (int) optab_handler (optab, vec_mode);
768 if (icode == CODE_FOR_nothing)
770 if (dump_enabled_p ())
771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
772 "Build SLP failed: "
773 "op not supported by target.\n");
774 /* Fatal mismatch. */
775 matches[0] = false;
776 return false;
778 optab_op2_mode = insn_data[icode].operand[2].mode;
779 if (!VECTOR_MODE_P (optab_op2_mode))
781 need_same_oprnds = true;
782 first_op1 = gimple_assign_rhs2 (stmt);
786 else if (rhs_code == WIDEN_LSHIFT_EXPR)
788 need_same_oprnds = true;
789 first_op1 = gimple_assign_rhs2 (stmt);
792 else
794 if (first_stmt_code != rhs_code
795 && alt_stmt_code == ERROR_MARK)
796 alt_stmt_code = rhs_code;
797 if (first_stmt_code != rhs_code
798 && (first_stmt_code != IMAGPART_EXPR
799 || rhs_code != REALPART_EXPR)
800 && (first_stmt_code != REALPART_EXPR
801 || rhs_code != IMAGPART_EXPR)
802 /* Handle mismatches in plus/minus by computing both
803 and merging the results. */
804 && !((first_stmt_code == PLUS_EXPR
805 || first_stmt_code == MINUS_EXPR)
806 && (alt_stmt_code == PLUS_EXPR
807 || alt_stmt_code == MINUS_EXPR)
808 && rhs_code == alt_stmt_code)
809 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
810 && (first_stmt_code == ARRAY_REF
811 || first_stmt_code == BIT_FIELD_REF
812 || first_stmt_code == INDIRECT_REF
813 || first_stmt_code == COMPONENT_REF
814 || first_stmt_code == MEM_REF)))
816 if (dump_enabled_p ())
818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
819 "Build SLP failed: different operation "
820 "in stmt %G", stmt);
821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
822 "original stmt %G", first_stmt_info->stmt);
824 /* Mismatch. */
825 continue;
828 if (need_same_oprnds
829 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
831 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: different shift "
834 "arguments in %G", stmt);
835 /* Mismatch. */
836 continue;
839 if (rhs_code == CALL_EXPR)
841 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
842 as_a <gcall *> (stmt)))
844 if (dump_enabled_p ())
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: different calls in %G",
847 stmt);
848 /* Mismatch. */
849 continue;
854 /* Grouped store or load. */
855 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
857 if (REFERENCE_CLASS_P (lhs))
859 /* Store. */
862 else
864 /* Load. */
865 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
866 if (prev_first_load)
868 /* Check that there are no loads from different interleaving
869 chains in the same node. */
870 if (prev_first_load != first_load)
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
874 vect_location,
875 "Build SLP failed: different "
876 "interleaving chains in one node %G",
877 stmt);
878 /* Mismatch. */
879 continue;
882 else
883 prev_first_load = first_load;
885 } /* Grouped access. */
886 else
888 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
890 /* Not grouped load. */
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: not grouped load %G", stmt);
895 /* FORNOW: Not grouped loads are not supported. */
896 /* Fatal mismatch. */
897 matches[0] = false;
898 return false;
901 /* Not memory operation. */
902 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
903 && TREE_CODE_CLASS (rhs_code) != tcc_unary
904 && TREE_CODE_CLASS (rhs_code) != tcc_expression
905 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
906 && rhs_code != CALL_EXPR)
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 "Build SLP failed: operation unsupported %G",
911 stmt);
912 /* Fatal mismatch. */
913 matches[0] = false;
914 return false;
917 if (rhs_code == COND_EXPR)
919 tree cond_expr = gimple_assign_rhs1 (stmt);
920 enum tree_code cond_code = TREE_CODE (cond_expr);
921 enum tree_code swap_code = ERROR_MARK;
922 enum tree_code invert_code = ERROR_MARK;
924 if (i == 0)
925 first_cond_code = TREE_CODE (cond_expr);
926 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
928 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
929 swap_code = swap_tree_comparison (cond_code);
930 invert_code = invert_tree_comparison (cond_code, honor_nans);
933 if (first_cond_code == cond_code)
935 /* Isomorphic can be achieved by swapping. */
936 else if (first_cond_code == swap_code)
937 swap[i] = 1;
938 /* Isomorphic can be achieved by inverting. */
939 else if (first_cond_code == invert_code)
940 swap[i] = 2;
941 else
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945 "Build SLP failed: different"
946 " operation %G", stmt);
947 /* Mismatch. */
948 continue;
953 matches[i] = true;
956 for (i = 0; i < group_size; ++i)
957 if (!matches[i])
958 return false;
960 /* If we allowed a two-operation SLP node verify the target can cope
961 with the permute we are going to use. */
962 if (alt_stmt_code != ERROR_MARK
963 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
965 if (vectype == boolean_type_node
966 || !vect_two_operations_perm_ok_p (stmts, group_size,
967 vectype, alt_stmt_code))
969 for (i = 0; i < group_size; ++i)
970 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
972 matches[i] = false;
973 if (dump_enabled_p ())
975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
976 "Build SLP failed: different operation "
977 "in stmt %G", stmts[i]->stmt);
978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
979 "original stmt %G", first_stmt_info->stmt);
982 return false;
984 *two_operators = true;
987 return true;
990 /* Traits for the hash_set to record failed SLP builds for a stmt set.
991 Note we never remove apart from at destruction time so we do not
992 need a special value for deleted that differs from empty. */
993 struct bst_traits
995 typedef vec <stmt_vec_info> value_type;
996 typedef vec <stmt_vec_info> compare_type;
997 static inline hashval_t hash (value_type);
998 static inline bool equal (value_type existing, value_type candidate);
999 static inline bool is_empty (value_type x) { return !x.exists (); }
1000 static inline bool is_deleted (value_type x) { return !x.exists (); }
1001 static inline void mark_empty (value_type &x) { x.release (); }
1002 static inline void mark_deleted (value_type &x) { x.release (); }
1003 static inline void remove (value_type &x) { x.release (); }
1005 inline hashval_t
1006 bst_traits::hash (value_type x)
1008 inchash::hash h;
1009 for (unsigned i = 0; i < x.length (); ++i)
1010 h.add_int (gimple_uid (x[i]->stmt));
1011 return h.end ();
1013 inline bool
1014 bst_traits::equal (value_type existing, value_type candidate)
1016 if (existing.length () != candidate.length ())
1017 return false;
1018 for (unsigned i = 0; i < existing.length (); ++i)
1019 if (existing[i] != candidate[i])
1020 return false;
1021 return true;
1024 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1025 static scalar_stmts_set_t *bst_fail;
1027 typedef hash_map <vec <gimple *>, slp_tree,
1028 simple_hashmap_traits <bst_traits, slp_tree> >
1029 scalar_stmts_to_slp_tree_map_t;
1031 static slp_tree
1032 vect_build_slp_tree_2 (vec_info *vinfo,
1033 vec<stmt_vec_info> stmts, unsigned int group_size,
1034 poly_uint64 *max_nunits,
1035 vec<slp_tree> *loads,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
1037 unsigned max_tree_size);
1039 static slp_tree
1040 vect_build_slp_tree (vec_info *vinfo,
1041 vec<stmt_vec_info> stmts, unsigned int group_size,
1042 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1043 bool *matches, unsigned *npermutes, unsigned *tree_size,
1044 unsigned max_tree_size)
1046 if (bst_fail->contains (stmts))
1047 return NULL;
1048 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1049 loads, matches, npermutes, tree_size,
1050 max_tree_size);
1051 /* When SLP build fails for stmts record this, otherwise SLP build
1052 can be exponential in time when we allow to construct parts from
1053 scalars, see PR81723. */
1054 if (! res)
1056 vec <stmt_vec_info> x;
1057 x.create (stmts.length ());
1058 x.splice (stmts);
1059 bst_fail->add (x);
1061 return res;
1064 /* Recursively build an SLP tree starting from NODE.
1065 Fail (and return a value not equal to zero) if def-stmts are not
1066 isomorphic, require data permutation or are of unsupported types of
1067 operation. Otherwise, return 0.
1068 The value returned is the depth in the SLP tree where a mismatch
1069 was found. */
1071 static slp_tree
1072 vect_build_slp_tree_2 (vec_info *vinfo,
1073 vec<stmt_vec_info> stmts, unsigned int group_size,
1074 poly_uint64 *max_nunits,
1075 vec<slp_tree> *loads,
1076 bool *matches, unsigned *npermutes, unsigned *tree_size,
1077 unsigned max_tree_size)
1079 unsigned nops, i, this_tree_size = 0;
1080 poly_uint64 this_max_nunits = *max_nunits;
1081 slp_tree node;
1083 matches[0] = false;
1085 stmt_vec_info stmt_info = stmts[0];
1086 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1087 nops = gimple_call_num_args (stmt);
1088 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1090 nops = gimple_num_ops (stmt) - 1;
1091 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1092 nops++;
1094 else if (is_a <gphi *> (stmt_info->stmt))
1095 nops = 0;
1096 else
1097 return NULL;
1099 /* If the SLP node is a PHI (induction or reduction), terminate
1100 the recursion. */
1101 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1103 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1104 tree vectype = get_vectype_for_scalar_type (scalar_type);
1105 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1106 return NULL;
1108 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1109 /* Induction from different IVs is not supported. */
1110 if (def_type == vect_induction_def)
1112 stmt_vec_info other_info;
1113 FOR_EACH_VEC_ELT (stmts, i, other_info)
1114 if (stmt_info != other_info)
1115 return NULL;
1117 else
1119 /* Else def types have to match. */
1120 stmt_vec_info other_info;
1121 FOR_EACH_VEC_ELT (stmts, i, other_info)
1123 /* But for reduction chains only check on the first stmt. */
1124 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1125 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1126 continue;
1127 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1128 return NULL;
1131 node = vect_create_new_slp_node (stmts);
1132 return node;
1136 bool two_operators = false;
1137 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1138 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1139 &this_max_nunits, matches, &two_operators))
1140 return NULL;
1142 /* If the SLP node is a load, terminate the recursion. */
1143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1144 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1146 *max_nunits = this_max_nunits;
1147 node = vect_create_new_slp_node (stmts);
1148 loads->safe_push (node);
1149 return node;
1152 /* Get at the operands, verifying they are compatible. */
1153 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1154 slp_oprnd_info oprnd_info;
1155 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1157 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1158 stmts, i, &oprnds_info);
1159 if (res != 0)
1160 matches[(res == -1) ? 0 : i] = false;
1161 if (!matches[0])
1162 break;
1164 for (i = 0; i < group_size; ++i)
1165 if (!matches[i])
1167 vect_free_oprnd_info (oprnds_info);
1168 return NULL;
1171 auto_vec<slp_tree, 4> children;
1172 auto_vec<slp_tree> this_loads;
1174 stmt_info = stmts[0];
1176 if (tree_size)
1177 max_tree_size -= *tree_size;
1179 /* Create SLP_TREE nodes for the definition node/s. */
1180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1182 slp_tree child;
1183 unsigned old_nloads = this_loads.length ();
1184 unsigned old_tree_size = this_tree_size;
1185 unsigned int j;
1187 if (oprnd_info->first_dt != vect_internal_def
1188 && oprnd_info->first_dt != vect_reduction_def
1189 && oprnd_info->first_dt != vect_induction_def)
1190 continue;
1192 if (++this_tree_size > max_tree_size)
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1196 vect_location,
1197 "Build SLP failed: SLP tree too large\n");
1198 FOR_EACH_VEC_ELT (children, j, child)
1199 vect_free_slp_tree (child, false);
1200 vect_free_oprnd_info (oprnds_info);
1201 return NULL;
1204 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1205 group_size, &this_max_nunits,
1206 &this_loads, matches, npermutes,
1207 &this_tree_size,
1208 max_tree_size)) != NULL)
1210 /* If we have all children of child built up from scalars then just
1211 throw that away and build it up this node from scalars. */
1212 if (!SLP_TREE_CHILDREN (child).is_empty ()
1213 /* ??? Rejecting patterns this way doesn't work. We'd have to
1214 do extra work to cancel the pattern so the uses see the
1215 scalar version. */
1216 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1218 slp_tree grandchild;
1220 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1221 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1222 break;
1223 if (!grandchild)
1225 /* Roll back. */
1226 this_loads.truncate (old_nloads);
1227 this_tree_size = old_tree_size;
1228 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1229 vect_free_slp_tree (grandchild, false);
1230 SLP_TREE_CHILDREN (child).truncate (0);
1232 dump_printf_loc (MSG_NOTE, vect_location,
1233 "Building parent vector operands from "
1234 "scalars instead\n");
1235 oprnd_info->def_stmts = vNULL;
1236 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1237 children.safe_push (child);
1238 continue;
1242 oprnd_info->def_stmts = vNULL;
1243 children.safe_push (child);
1244 continue;
1247 /* If the SLP build failed fatally and we analyze a basic-block
1248 simply treat nodes we fail to build as externally defined
1249 (and thus build vectors from the scalar defs).
1250 The cost model will reject outright expensive cases.
1251 ??? This doesn't treat cases where permutation ultimatively
1252 fails (or we don't try permutation below). Ideally we'd
1253 even compute a permutation that will end up with the maximum
1254 SLP tree size... */
1255 if (is_a <bb_vec_info> (vinfo)
1256 && !matches[0]
1257 /* ??? Rejecting patterns this way doesn't work. We'd have to
1258 do extra work to cancel the pattern so the uses see the
1259 scalar version. */
1260 && !is_pattern_stmt_p (stmt_info))
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "Building vector operands from scalars\n");
1264 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1265 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1266 children.safe_push (child);
1267 oprnd_info->def_stmts = vNULL;
1268 continue;
1271 /* If the SLP build for operand zero failed and operand zero
1272 and one can be commutated try that for the scalar stmts
1273 that failed the match. */
1274 if (i == 0
1275 /* A first scalar stmt mismatch signals a fatal mismatch. */
1276 && matches[0]
1277 /* ??? For COND_EXPRs we can swap the comparison operands
1278 as well as the arms under some constraints. */
1279 && nops == 2
1280 && oprnds_info[1]->first_dt == vect_internal_def
1281 && is_gimple_assign (stmt_info->stmt)
1282 /* Do so only if the number of not successful permutes was nor more
1283 than a cut-ff as re-trying the recursive match on
1284 possibly each level of the tree would expose exponential
1285 behavior. */
1286 && *npermutes < 4)
1288 /* See whether we can swap the matching or the non-matching
1289 stmt operands. */
1290 bool swap_not_matching = true;
1293 for (j = 0; j < group_size; ++j)
1295 if (matches[j] != !swap_not_matching)
1296 continue;
1297 stmt_vec_info stmt_info = stmts[j];
1298 /* Verify if we can swap operands of this stmt. */
1299 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1300 if (!stmt
1301 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1303 if (!swap_not_matching)
1304 goto fail;
1305 swap_not_matching = false;
1306 break;
1308 /* Verify if we can safely swap or if we committed to a
1309 specific operand order already.
1310 ??? Instead of modifying GIMPLE stmts here we could
1311 record whether we want to swap operands in the SLP
1312 node and temporarily do that when processing it
1313 (or wrap operand accessors in a helper). */
1314 else if (swap[j] != 0
1315 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1317 if (!swap_not_matching)
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1321 vect_location,
1322 "Build SLP failed: cannot swap "
1323 "operands of shared stmt %G",
1324 stmts[j]->stmt);
1325 goto fail;
1327 swap_not_matching = false;
1328 break;
1332 while (j != group_size);
1334 /* Swap mismatched definition stmts. */
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "Re-trying with swapped operands of stmts ");
1337 for (j = 0; j < group_size; ++j)
1338 if (matches[j] == !swap_not_matching)
1340 std::swap (oprnds_info[0]->def_stmts[j],
1341 oprnds_info[1]->def_stmts[j]);
1342 dump_printf (MSG_NOTE, "%d ", j);
1344 dump_printf (MSG_NOTE, "\n");
1345 /* And try again with scratch 'matches' ... */
1346 bool *tem = XALLOCAVEC (bool, group_size);
1347 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1348 group_size, &this_max_nunits,
1349 &this_loads, tem, npermutes,
1350 &this_tree_size,
1351 max_tree_size)) != NULL)
1353 /* ... so if successful we can apply the operand swapping
1354 to the GIMPLE IL. This is necessary because for example
1355 vect_get_slp_defs uses operand indexes and thus expects
1356 canonical operand order. This is also necessary even
1357 if we end up building the operand from scalars as
1358 we'll continue to process swapped operand two. */
1359 for (j = 0; j < group_size; ++j)
1360 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1361 for (j = 0; j < group_size; ++j)
1362 if (matches[j] == !swap_not_matching)
1364 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1365 /* Avoid swapping operands twice. */
1366 if (gimple_plf (stmt, GF_PLF_1))
1367 continue;
1368 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1369 gimple_assign_rhs2_ptr (stmt));
1370 gimple_set_plf (stmt, GF_PLF_1, true);
1372 /* Verify we swap all duplicates or none. */
1373 if (flag_checking)
1374 for (j = 0; j < group_size; ++j)
1375 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1376 == (matches[j] == !swap_not_matching));
1378 /* If we have all children of child built up from scalars then
1379 just throw that away and build it up this node from scalars. */
1380 if (!SLP_TREE_CHILDREN (child).is_empty ()
1381 /* ??? Rejecting patterns this way doesn't work. We'd have
1382 to do extra work to cancel the pattern so the uses see the
1383 scalar version. */
1384 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1386 unsigned int j;
1387 slp_tree grandchild;
1389 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1390 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1391 break;
1392 if (!grandchild)
1394 /* Roll back. */
1395 this_loads.truncate (old_nloads);
1396 this_tree_size = old_tree_size;
1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1398 vect_free_slp_tree (grandchild, false);
1399 SLP_TREE_CHILDREN (child).truncate (0);
1401 dump_printf_loc (MSG_NOTE, vect_location,
1402 "Building parent vector operands from "
1403 "scalars instead\n");
1404 oprnd_info->def_stmts = vNULL;
1405 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1406 children.safe_push (child);
1407 continue;
1411 oprnd_info->def_stmts = vNULL;
1412 children.safe_push (child);
1413 continue;
1416 ++*npermutes;
1419 fail:
1420 gcc_assert (child == NULL);
1421 FOR_EACH_VEC_ELT (children, j, child)
1422 vect_free_slp_tree (child, false);
1423 vect_free_oprnd_info (oprnds_info);
1424 return NULL;
1427 vect_free_oprnd_info (oprnds_info);
1429 if (tree_size)
1430 *tree_size += this_tree_size;
1431 *max_nunits = this_max_nunits;
1432 loads->safe_splice (this_loads);
1434 node = vect_create_new_slp_node (stmts);
1435 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1436 SLP_TREE_CHILDREN (node).splice (children);
1437 return node;
1440 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1442 static void
1443 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1444 slp_tree node)
1446 int i;
1447 stmt_vec_info stmt_info;
1448 slp_tree child;
1450 dump_printf_loc (dump_kind, loc, "node%s\n",
1451 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1452 ? " (external)" : "");
1453 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1454 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
1455 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1456 vect_print_slp_tree (dump_kind, loc, child);
1460 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1461 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1462 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1463 stmts in NODE are to be marked. */
1465 static void
1466 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1468 int i;
1469 stmt_vec_info stmt_info;
1470 slp_tree child;
1472 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1473 return;
1475 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1476 if (j < 0 || i == j)
1477 STMT_SLP_TYPE (stmt_info) = mark;
1479 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1480 vect_mark_slp_stmts (child, mark, j);
1484 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1486 static void
1487 vect_mark_slp_stmts_relevant (slp_tree node)
1489 int i;
1490 stmt_vec_info stmt_info;
1491 slp_tree child;
1493 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1494 return;
1496 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1498 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1499 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1500 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1503 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1504 vect_mark_slp_stmts_relevant (child);
1508 /* Rearrange the statements of NODE according to PERMUTATION. */
1510 static void
1511 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1512 vec<unsigned> permutation)
1514 stmt_vec_info stmt_info;
1515 vec<stmt_vec_info> tmp_stmts;
1516 unsigned int i;
1517 slp_tree child;
1519 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1520 vect_slp_rearrange_stmts (child, group_size, permutation);
1522 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1523 tmp_stmts.create (group_size);
1524 tmp_stmts.quick_grow_cleared (group_size);
1526 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1527 tmp_stmts[permutation[i]] = stmt_info;
1529 SLP_TREE_SCALAR_STMTS (node).release ();
1530 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1534 /* Attempt to reorder stmts in a reduction chain so that we don't
1535 require any load permutation. Return true if that was possible,
1536 otherwise return false. */
1538 static bool
1539 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1541 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1542 unsigned int i, j;
1543 unsigned int lidx;
1544 slp_tree node, load;
1546 /* Compare all the permutation sequences to the first one. We know
1547 that at least one load is permuted. */
1548 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1549 if (!node->load_permutation.exists ())
1550 return false;
1551 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1553 if (!load->load_permutation.exists ())
1554 return false;
1555 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1556 if (lidx != node->load_permutation[j])
1557 return false;
1560 /* Check that the loads in the first sequence are different and there
1561 are no gaps between them. */
1562 auto_sbitmap load_index (group_size);
1563 bitmap_clear (load_index);
1564 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1566 if (lidx >= group_size)
1567 return false;
1568 if (bitmap_bit_p (load_index, lidx))
1569 return false;
1571 bitmap_set_bit (load_index, lidx);
1573 for (i = 0; i < group_size; i++)
1574 if (!bitmap_bit_p (load_index, i))
1575 return false;
1577 /* This permutation is valid for reduction. Since the order of the
1578 statements in the nodes is not important unless they are memory
1579 accesses, we can rearrange the statements in all the nodes
1580 according to the order of the loads. */
1581 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1582 node->load_permutation);
1584 /* We are done, no actual permutations need to be generated. */
1585 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1586 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1588 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1589 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1590 /* But we have to keep those permutations that are required because
1591 of handling of gaps. */
1592 if (known_eq (unrolling_factor, 1U)
1593 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1594 && DR_GROUP_GAP (first_stmt_info) == 0))
1595 SLP_TREE_LOAD_PERMUTATION (node).release ();
1596 else
1597 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1598 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1601 return true;
1604 /* Check if the required load permutations in the SLP instance
1605 SLP_INSTN are supported. */
1607 static bool
1608 vect_supported_load_permutation_p (slp_instance slp_instn)
1610 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1611 unsigned int i, j, k, next;
1612 slp_tree node;
1614 if (dump_enabled_p ())
1616 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1617 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1618 if (node->load_permutation.exists ())
1619 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1620 dump_printf (MSG_NOTE, "%d ", next);
1621 else
1622 for (k = 0; k < group_size; ++k)
1623 dump_printf (MSG_NOTE, "%d ", k);
1624 dump_printf (MSG_NOTE, "\n");
1627 /* In case of reduction every load permutation is allowed, since the order
1628 of the reduction statements is not important (as opposed to the case of
1629 grouped stores). The only condition we need to check is that all the
1630 load nodes are of the same size and have the same permutation (and then
1631 rearrange all the nodes of the SLP instance according to this
1632 permutation). */
1634 /* Check that all the load nodes are of the same size. */
1635 /* ??? Can't we assert this? */
1636 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1637 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1638 return false;
1640 node = SLP_INSTANCE_TREE (slp_instn);
1641 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1643 /* Reduction (there are no data-refs in the root).
1644 In reduction chain the order of the loads is not important. */
1645 if (!STMT_VINFO_DATA_REF (stmt_info)
1646 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1647 vect_attempt_slp_rearrange_stmts (slp_instn);
1649 /* In basic block vectorization we allow any subchain of an interleaving
1650 chain.
1651 FORNOW: not supported in loop SLP because of realignment compications. */
1652 if (STMT_VINFO_BB_VINFO (stmt_info))
1654 /* Check whether the loads in an instance form a subchain and thus
1655 no permutation is necessary. */
1656 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1658 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1659 continue;
1660 bool subchain_p = true;
1661 stmt_vec_info next_load_info = NULL;
1662 stmt_vec_info load_info;
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1665 if (j != 0
1666 && (next_load_info != load_info
1667 || DR_GROUP_GAP (load_info) != 1))
1669 subchain_p = false;
1670 break;
1672 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1674 if (subchain_p)
1675 SLP_TREE_LOAD_PERMUTATION (node).release ();
1676 else
1678 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1679 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1680 unsigned HOST_WIDE_INT nunits;
1681 unsigned k, maxk = 0;
1682 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1683 if (k > maxk)
1684 maxk = k;
1685 /* In BB vectorization we may not actually use a loaded vector
1686 accessing elements in excess of DR_GROUP_SIZE. */
1687 tree vectype = STMT_VINFO_VECTYPE (group_info);
1688 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1689 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692 "BB vectorization with gaps at the end of "
1693 "a load is not supported\n");
1694 return false;
1697 /* Verify the permutation can be generated. */
1698 vec<tree> tem;
1699 unsigned n_perms;
1700 if (!vect_transform_slp_perm_load (node, tem, NULL,
1701 1, slp_instn, true, &n_perms))
1703 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1704 vect_location,
1705 "unsupported load permutation\n");
1706 return false;
1710 return true;
1713 /* For loop vectorization verify we can generate the permutation. Be
1714 conservative about the vectorization factor, there are permutations
1715 that will use three vector inputs only starting from a specific factor
1716 and the vectorization factor is not yet final.
1717 ??? The SLP instance unrolling factor might not be the maximum one. */
1718 unsigned n_perms;
1719 poly_uint64 test_vf
1720 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1721 LOOP_VINFO_VECT_FACTOR
1722 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1724 if (node->load_permutation.exists ()
1725 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1726 slp_instn, true, &n_perms))
1727 return false;
1729 return true;
1733 /* Find the last store in SLP INSTANCE. */
1735 stmt_vec_info
1736 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1738 stmt_vec_info last = NULL;
1739 stmt_vec_info stmt_vinfo;
1741 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1743 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1744 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1747 return last;
1750 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1751 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1752 (also containing the first GROUP1_SIZE stmts, since stores are
1753 consecutive), the second containing the remainder.
1754 Return the first stmt in the second group. */
1756 static stmt_vec_info
1757 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1759 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1760 gcc_assert (group1_size > 0);
1761 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1762 gcc_assert (group2_size > 0);
1763 DR_GROUP_SIZE (first_vinfo) = group1_size;
1765 stmt_vec_info stmt_info = first_vinfo;
1766 for (unsigned i = group1_size; i > 1; i--)
1768 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1769 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1771 /* STMT is now the last element of the first group. */
1772 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1773 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1775 DR_GROUP_SIZE (group2) = group2_size;
1776 for (stmt_info = group2; stmt_info;
1777 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1779 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1780 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1783 /* For the second group, the DR_GROUP_GAP is that before the original group,
1784 plus skipping over the first vector. */
1785 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1787 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1788 DR_GROUP_GAP (first_vinfo) += group2_size;
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1792 group1_size, group2_size);
1794 return group2;
1797 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1798 statements and a vector of NUNITS elements. */
1800 static poly_uint64
1801 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1803 return exact_div (common_multiple (nunits, group_size), group_size);
1806 /* Analyze an SLP instance starting from a group of grouped stores. Call
1807 vect_build_slp_tree to build a tree of packed stmts if possible.
1808 Return FALSE if it's impossible to SLP any stmt in the loop. */
1810 static bool
1811 vect_analyze_slp_instance (vec_info *vinfo,
1812 stmt_vec_info stmt_info, unsigned max_tree_size)
1814 slp_instance new_instance;
1815 slp_tree node;
1816 unsigned int group_size;
1817 tree vectype, scalar_type = NULL_TREE;
1818 unsigned int i;
1819 vec<slp_tree> loads;
1820 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1821 vec<stmt_vec_info> scalar_stmts;
1823 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1825 scalar_type = TREE_TYPE (DR_REF (dr));
1826 vectype = get_vectype_for_scalar_type (scalar_type);
1827 group_size = DR_GROUP_SIZE (stmt_info);
1829 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1831 gcc_assert (is_a <loop_vec_info> (vinfo));
1832 vectype = STMT_VINFO_VECTYPE (stmt_info);
1833 group_size = REDUC_GROUP_SIZE (stmt_info);
1835 else
1837 gcc_assert (is_a <loop_vec_info> (vinfo));
1838 vectype = STMT_VINFO_VECTYPE (stmt_info);
1839 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1842 if (!vectype)
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "Build SLP failed: unsupported data-type %T\n",
1847 scalar_type);
1849 return false;
1851 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1853 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1854 scalar_stmts.create (group_size);
1855 stmt_vec_info next_info = stmt_info;
1856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1858 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1859 while (next_info)
1861 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1862 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1865 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1867 /* Collect the reduction stmts and store them in
1868 SLP_TREE_SCALAR_STMTS. */
1869 while (next_info)
1871 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1872 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1874 /* Mark the first element of the reduction chain as reduction to properly
1875 transform the node. In the reduction analysis phase only the last
1876 element of the chain is marked as reduction. */
1877 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1879 else
1881 /* Collect reduction statements. */
1882 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1883 for (i = 0; reductions.iterate (i, &next_info); i++)
1884 scalar_stmts.safe_push (next_info);
1887 loads.create (group_size);
1889 /* Build the tree for the SLP instance. */
1890 bool *matches = XALLOCAVEC (bool, group_size);
1891 unsigned npermutes = 0;
1892 bst_fail = new scalar_stmts_set_t ();
1893 poly_uint64 max_nunits = nunits;
1894 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1895 &max_nunits, &loads, matches, &npermutes,
1896 NULL, max_tree_size);
1897 delete bst_fail;
1898 if (node != NULL)
1900 /* Calculate the unrolling factor based on the smallest type. */
1901 poly_uint64 unrolling_factor
1902 = calculate_unrolling_factor (max_nunits, group_size);
1904 if (maybe_ne (unrolling_factor, 1U)
1905 && is_a <bb_vec_info> (vinfo))
1907 unsigned HOST_WIDE_INT const_max_nunits;
1908 if (!max_nunits.is_constant (&const_max_nunits)
1909 || const_max_nunits > group_size)
1911 if (dump_enabled_p ())
1912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1913 "Build SLP failed: store group "
1914 "size not a multiple of the vector size "
1915 "in basic block SLP\n");
1916 vect_free_slp_tree (node, false);
1917 loads.release ();
1918 return false;
1920 /* Fatal mismatch. */
1921 matches[group_size / const_max_nunits * const_max_nunits] = false;
1922 vect_free_slp_tree (node, false);
1923 loads.release ();
1925 else
1927 /* Create a new SLP instance. */
1928 new_instance = XNEW (struct _slp_instance);
1929 SLP_INSTANCE_TREE (new_instance) = node;
1930 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1931 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1932 SLP_INSTANCE_LOADS (new_instance) = loads;
1934 /* Compute the load permutation. */
1935 slp_tree load_node;
1936 bool loads_permuted = false;
1937 FOR_EACH_VEC_ELT (loads, i, load_node)
1939 vec<unsigned> load_permutation;
1940 int j;
1941 stmt_vec_info load_info;
1942 bool this_load_permuted = false;
1943 load_permutation.create (group_size);
1944 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
1945 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1946 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
1948 int load_place = vect_get_place_in_interleaving_chain
1949 (load_info, first_stmt_info);
1950 gcc_assert (load_place != -1);
1951 if (load_place != j)
1952 this_load_permuted = true;
1953 load_permutation.safe_push (load_place);
1955 if (!this_load_permuted
1956 /* The load requires permutation when unrolling exposes
1957 a gap either because the group is larger than the SLP
1958 group-size or because there is a gap between the groups. */
1959 && (known_eq (unrolling_factor, 1U)
1960 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1961 && DR_GROUP_GAP (first_stmt_info) == 0)))
1963 load_permutation.release ();
1964 continue;
1966 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1967 loads_permuted = true;
1970 if (loads_permuted)
1972 if (!vect_supported_load_permutation_p (new_instance))
1974 if (dump_enabled_p ())
1975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1976 "Build SLP failed: unsupported load "
1977 "permutation %G", stmt_info->stmt);
1978 vect_free_slp_instance (new_instance, false);
1979 return false;
1983 /* If the loads and stores can be handled with load/store-lan
1984 instructions do not generate this SLP instance. */
1985 if (is_a <loop_vec_info> (vinfo)
1986 && loads_permuted
1987 && dr && vect_store_lanes_supported (vectype, group_size, false))
1989 slp_tree load_node;
1990 FOR_EACH_VEC_ELT (loads, i, load_node)
1992 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
1993 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1994 /* Use SLP for strided accesses (or if we can't load-lanes). */
1995 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1996 || ! vect_load_lanes_supported
1997 (STMT_VINFO_VECTYPE (stmt_vinfo),
1998 DR_GROUP_SIZE (stmt_vinfo), false))
1999 break;
2001 if (i == loads.length ())
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2005 "Built SLP cancelled: can use "
2006 "load/store-lanes\n");
2007 vect_free_slp_instance (new_instance, false);
2008 return false;
2012 vinfo->slp_instances.safe_push (new_instance);
2014 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_NOTE, vect_location,
2017 "Final SLP tree for instance:\n");
2018 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2021 return true;
2024 else
2026 /* Failed to SLP. */
2027 /* Free the allocated memory. */
2028 scalar_stmts.release ();
2029 loads.release ();
2032 /* For basic block SLP, try to break the group up into multiples of the
2033 vector size. */
2034 unsigned HOST_WIDE_INT const_nunits;
2035 if (is_a <bb_vec_info> (vinfo)
2036 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2037 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2038 && nunits.is_constant (&const_nunits))
2040 /* We consider breaking the group only on VF boundaries from the existing
2041 start. */
2042 for (i = 0; i < group_size; i++)
2043 if (!matches[i]) break;
2045 if (i >= const_nunits && i < group_size)
2047 /* Split into two groups at the first vector boundary before i. */
2048 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2049 unsigned group1_size = i & ~(const_nunits - 1);
2051 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2052 group1_size);
2053 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2054 max_tree_size);
2055 /* If the first non-match was in the middle of a vector,
2056 skip the rest of that vector. */
2057 if (group1_size < i)
2059 i = group1_size + const_nunits;
2060 if (i < group_size)
2061 rest = vect_split_slp_store_group (rest, const_nunits);
2063 if (i < group_size)
2064 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2065 return res;
2067 /* Even though the first vector did not all match, we might be able to SLP
2068 (some) of the remainder. FORNOW ignore this possibility. */
2071 return false;
2075 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2076 trees of packed scalar stmts if SLP is possible. */
2078 opt_result
2079 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2081 unsigned int i;
2082 stmt_vec_info first_element;
2084 DUMP_VECT_SCOPE ("vect_analyze_slp");
2086 /* Find SLP sequences starting from groups of grouped stores. */
2087 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2088 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2090 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2092 if (loop_vinfo->reduction_chains.length () > 0)
2094 /* Find SLP sequences starting from reduction chains. */
2095 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2096 if (! vect_analyze_slp_instance (vinfo, first_element,
2097 max_tree_size))
2099 /* Dissolve reduction chain group. */
2100 stmt_vec_info vinfo = first_element;
2101 while (vinfo)
2103 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2104 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2105 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2106 vinfo = next;
2108 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2112 /* Find SLP sequences starting from groups of reductions. */
2113 if (loop_vinfo->reductions.length () > 1)
2114 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2115 max_tree_size);
2118 return opt_result::success ();
2122 /* For each possible SLP instance decide whether to SLP it and calculate overall
2123 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2124 least one instance. */
2126 bool
2127 vect_make_slp_decision (loop_vec_info loop_vinfo)
2129 unsigned int i;
2130 poly_uint64 unrolling_factor = 1;
2131 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2132 slp_instance instance;
2133 int decided_to_slp = 0;
2135 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2137 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2139 /* FORNOW: SLP if you can. */
2140 /* All unroll factors have the form current_vector_size * X for some
2141 rational X, so they must have a common multiple. */
2142 unrolling_factor
2143 = force_common_multiple (unrolling_factor,
2144 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2146 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2147 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2148 loop-based vectorization. Such stmts will be marked as HYBRID. */
2149 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2150 decided_to_slp++;
2153 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2155 if (decided_to_slp && dump_enabled_p ())
2157 dump_printf_loc (MSG_NOTE, vect_location,
2158 "Decided to SLP %d instances. Unrolling factor ",
2159 decided_to_slp);
2160 dump_dec (MSG_NOTE, unrolling_factor);
2161 dump_printf (MSG_NOTE, "\n");
2164 return (decided_to_slp > 0);
2168 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2169 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2171 static void
2172 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2174 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2175 imm_use_iterator imm_iter;
2176 gimple *use_stmt;
2177 stmt_vec_info use_vinfo;
2178 slp_tree child;
2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2180 int j;
2182 /* Propagate hybrid down the SLP tree. */
2183 if (stype == hybrid)
2185 else if (HYBRID_SLP_STMT (stmt_vinfo))
2186 stype = hybrid;
2187 else
2189 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2190 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2191 /* If we get a pattern stmt here we have to use the LHS of the
2192 original stmt for immediate uses. */
2193 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2194 tree def;
2195 if (gimple_code (stmt) == GIMPLE_PHI)
2196 def = gimple_phi_result (stmt);
2197 else
2198 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2199 if (def)
2200 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2202 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2203 if (!use_vinfo)
2204 continue;
2205 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2206 if (!STMT_SLP_TYPE (use_vinfo)
2207 && (STMT_VINFO_RELEVANT (use_vinfo)
2208 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2209 && !(gimple_code (use_stmt) == GIMPLE_PHI
2210 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2214 "def in non-SLP stmt: %G", use_stmt);
2215 stype = hybrid;
2220 if (stype == hybrid
2221 && !HYBRID_SLP_STMT (stmt_vinfo))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2225 stmt_vinfo->stmt);
2226 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2229 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2230 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2231 vect_detect_hybrid_slp_stmts (child, i, stype);
2234 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2236 static tree
2237 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2239 walk_stmt_info *wi = (walk_stmt_info *)data;
2240 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2242 if (wi->is_lhs)
2243 return NULL_TREE;
2245 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2246 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2248 if (dump_enabled_p ())
2249 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2250 def_stmt_info->stmt);
2251 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2254 return NULL_TREE;
2257 static tree
2258 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2259 walk_stmt_info *wi)
2261 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2262 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2263 /* If the stmt is in a SLP instance then this isn't a reason
2264 to mark use definitions in other SLP instances as hybrid. */
2265 if (! STMT_SLP_TYPE (use_vinfo)
2266 && (STMT_VINFO_RELEVANT (use_vinfo)
2267 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2268 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2269 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2271 else
2272 *handled = true;
2273 return NULL_TREE;
2276 /* Find stmts that must be both vectorized and SLPed. */
2278 void
2279 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2281 unsigned int i;
2282 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2283 slp_instance instance;
2285 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2287 /* First walk all pattern stmt in the loop and mark defs of uses as
2288 hybrid because immediate uses in them are not recorded. */
2289 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2291 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2292 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2293 gsi_next (&gsi))
2295 gimple *stmt = gsi_stmt (gsi);
2296 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2297 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2299 walk_stmt_info wi;
2300 memset (&wi, 0, sizeof (wi));
2301 wi.info = loop_vinfo;
2302 gimple_stmt_iterator gsi2
2303 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2304 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2305 vect_detect_hybrid_slp_1, &wi);
2306 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2307 vect_detect_hybrid_slp_2,
2308 vect_detect_hybrid_slp_1, &wi);
2313 /* Then walk the SLP instance trees marking stmts with uses in
2314 non-SLP stmts as hybrid, also propagating hybrid down the
2315 SLP tree, collecting the above info on-the-fly. */
2316 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2318 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2319 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2320 i, pure_slp);
2325 /* Initialize a bb_vec_info struct for the statements between
2326 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2328 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2329 gimple_stmt_iterator region_end_in,
2330 vec_info_shared *shared)
2331 : vec_info (vec_info::bb, init_cost (NULL), shared),
2332 bb (gsi_bb (region_begin_in)),
2333 region_begin (region_begin_in),
2334 region_end (region_end_in)
2336 gimple_stmt_iterator gsi;
2338 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2339 gsi_next (&gsi))
2341 gimple *stmt = gsi_stmt (gsi);
2342 gimple_set_uid (stmt, 0);
2343 add_stmt (stmt);
2346 bb->aux = this;
2350 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2351 stmts in the basic block. */
2353 _bb_vec_info::~_bb_vec_info ()
2355 for (gimple_stmt_iterator si = region_begin;
2356 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2357 /* Reset region marker. */
2358 gimple_set_uid (gsi_stmt (si), -1);
2360 bb->aux = NULL;
2363 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2364 given then that child nodes have already been processed, and that
2365 their def types currently match their SLP node's def type. */
2367 static bool
2368 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2369 slp_instance node_instance,
2370 stmt_vector_for_cost *cost_vec)
2372 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2373 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2375 /* For BB vectorization vector types are assigned here.
2376 Memory accesses already got their vector type assigned
2377 in vect_analyze_data_refs. */
2378 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2379 if (bb_vinfo
2380 && ! STMT_VINFO_DATA_REF (stmt_info))
2382 tree vectype, nunits_vectype;
2383 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2384 &nunits_vectype))
2385 /* We checked this when building the node. */
2386 gcc_unreachable ();
2387 if (vectype == boolean_type_node)
2389 vectype = vect_get_mask_type_for_stmt (stmt_info);
2390 if (!vectype)
2391 /* vect_get_mask_type_for_stmt has already explained the
2392 failure. */
2393 return false;
2396 stmt_vec_info sstmt_info;
2397 unsigned int i;
2398 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2399 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2402 /* Calculate the number of vector statements to be created for the
2403 scalar stmts in this node. For SLP reductions it is equal to the
2404 number of vector statements in the children (which has already been
2405 calculated by the recursive call). Otherwise it is the number of
2406 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2407 VF divided by the number of elements in a vector. */
2408 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2409 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2410 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2411 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2412 else
2414 poly_uint64 vf;
2415 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2416 vf = loop_vinfo->vectorization_factor;
2417 else
2418 vf = 1;
2419 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2421 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2422 = vect_get_num_vectors (vf * group_size, vectype);
2425 bool dummy;
2426 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2429 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2430 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2432 Return true if the operations are supported. */
2434 static bool
2435 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2436 slp_instance node_instance,
2437 scalar_stmts_to_slp_tree_map_t *visited,
2438 scalar_stmts_to_slp_tree_map_t *lvisited,
2439 stmt_vector_for_cost *cost_vec)
2441 int i, j;
2442 slp_tree child;
2444 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2445 return true;
2447 /* If we already analyzed the exact same set of scalar stmts we're done.
2448 We share the generated vector stmts for those. */
2449 slp_tree *leader;
2450 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2451 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2453 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2454 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2455 return true;
2458 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2459 doesn't result in any issue since we throw away the lvisited set
2460 when we fail. */
2461 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2463 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2464 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2465 visited, lvisited, cost_vec))
2466 return false;
2468 /* Push SLP node def-type to stmt operands. */
2469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2470 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2471 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2472 = SLP_TREE_DEF_TYPE (child);
2473 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2474 cost_vec);
2475 /* Restore def-types. */
2476 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2477 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2478 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2479 = vect_internal_def;
2480 if (! res)
2481 return false;
2483 return true;
2487 /* Analyze statements in SLP instances of VINFO. Return true if the
2488 operations are supported. */
2490 bool
2491 vect_slp_analyze_operations (vec_info *vinfo)
2493 slp_instance instance;
2494 int i;
2496 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2498 scalar_stmts_to_slp_tree_map_t *visited
2499 = new scalar_stmts_to_slp_tree_map_t ();
2500 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2502 scalar_stmts_to_slp_tree_map_t lvisited;
2503 stmt_vector_for_cost cost_vec;
2504 cost_vec.create (2);
2505 if (!vect_slp_analyze_node_operations (vinfo,
2506 SLP_INSTANCE_TREE (instance),
2507 instance, visited, &lvisited,
2508 &cost_vec))
2510 slp_tree node = SLP_INSTANCE_TREE (instance);
2511 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2512 dump_printf_loc (MSG_NOTE, vect_location,
2513 "removing SLP instance operations starting from: %G",
2514 stmt_info->stmt);
2515 vect_free_slp_instance (instance, false);
2516 vinfo->slp_instances.ordered_remove (i);
2517 cost_vec.release ();
2519 else
2521 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2522 x != lvisited.end(); ++x)
2523 visited->put ((*x).first.copy (), (*x).second);
2524 i++;
2526 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2527 cost_vec.release ();
2530 delete visited;
2532 return !vinfo->slp_instances.is_empty ();
2536 /* Compute the scalar cost of the SLP node NODE and its children
2537 and return it. Do not account defs that are marked in LIFE and
2538 update LIFE according to uses of NODE. */
2540 static void
2541 vect_bb_slp_scalar_cost (basic_block bb,
2542 slp_tree node, vec<bool, va_heap> *life,
2543 stmt_vector_for_cost *cost_vec)
2545 unsigned i;
2546 stmt_vec_info stmt_info;
2547 slp_tree child;
2549 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2551 gimple *stmt = stmt_info->stmt;
2552 vec_info *vinfo = stmt_info->vinfo;
2553 ssa_op_iter op_iter;
2554 def_operand_p def_p;
2556 if ((*life)[i])
2557 continue;
2559 /* If there is a non-vectorized use of the defs then the scalar
2560 stmt is kept live in which case we do not account it or any
2561 required defs in the SLP children in the scalar cost. This
2562 way we make the vectorization more costly when compared to
2563 the scalar cost. */
2564 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2566 imm_use_iterator use_iter;
2567 gimple *use_stmt;
2568 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2569 if (!is_gimple_debug (use_stmt))
2571 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2572 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2574 (*life)[i] = true;
2575 BREAK_FROM_IMM_USE_STMT (use_iter);
2579 if ((*life)[i])
2580 continue;
2582 /* Count scalar stmts only once. */
2583 if (gimple_visited_p (stmt))
2584 continue;
2585 gimple_set_visited (stmt, true);
2587 vect_cost_for_stmt kind;
2588 if (STMT_VINFO_DATA_REF (stmt_info))
2590 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2591 kind = scalar_load;
2592 else
2593 kind = scalar_store;
2595 else
2596 kind = scalar_stmt;
2597 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2600 auto_vec<bool, 20> subtree_life;
2601 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2603 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2605 /* Do not directly pass LIFE to the recursive call, copy it to
2606 confine changes in the callee to the current child/subtree. */
2607 subtree_life.safe_splice (*life);
2608 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2609 subtree_life.truncate (0);
2614 /* Check if vectorization of the basic block is profitable. */
2616 static bool
2617 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2619 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2620 slp_instance instance;
2621 int i;
2622 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2623 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2625 /* Calculate scalar cost. */
2626 stmt_vector_for_cost scalar_costs;
2627 scalar_costs.create (0);
2628 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2630 auto_vec<bool, 20> life;
2631 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2632 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2633 SLP_INSTANCE_TREE (instance),
2634 &life, &scalar_costs);
2636 void *target_cost_data = init_cost (NULL);
2637 add_stmt_costs (target_cost_data, &scalar_costs);
2638 scalar_costs.release ();
2639 unsigned dummy;
2640 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2641 destroy_cost_data (target_cost_data);
2643 /* Unset visited flag. */
2644 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2645 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2646 gimple_set_visited (gsi_stmt (gsi), false);
2648 /* Complete the target-specific cost calculation. */
2649 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2650 &vec_inside_cost, &vec_epilogue_cost);
2652 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2654 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2657 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2658 vec_inside_cost);
2659 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2660 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2661 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2664 /* Vectorization is profitable if its cost is more than the cost of scalar
2665 version. Note that we err on the vector side for equal cost because
2666 the cost estimate is otherwise quite pessimistic (constant uses are
2667 free on the scalar side but cost a load on the vector side for
2668 example). */
2669 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2670 return false;
2672 return true;
2675 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2676 if so and sets fatal to true if failure is independent of
2677 current_vector_size. */
2679 static bb_vec_info
2680 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2681 gimple_stmt_iterator region_end,
2682 vec<data_reference_p> datarefs, int n_stmts,
2683 bool &fatal, vec_info_shared *shared)
2685 bb_vec_info bb_vinfo;
2686 slp_instance instance;
2687 int i;
2688 poly_uint64 min_vf = 2;
2690 /* The first group of checks is independent of the vector size. */
2691 fatal = true;
2693 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2695 if (dump_enabled_p ())
2696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2697 "not vectorized: too many instructions in "
2698 "basic block.\n");
2699 free_data_refs (datarefs);
2700 return NULL;
2703 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2704 if (!bb_vinfo)
2705 return NULL;
2707 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2708 bb_vinfo->shared->save_datarefs ();
2710 /* Analyze the data references. */
2712 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2714 if (dump_enabled_p ())
2715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2716 "not vectorized: unhandled data-ref in basic "
2717 "block.\n");
2719 delete bb_vinfo;
2720 return NULL;
2723 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2727 "not vectorized: not enough data-refs in "
2728 "basic block.\n");
2730 delete bb_vinfo;
2731 return NULL;
2734 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2736 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "not vectorized: unhandled data access in "
2739 "basic block.\n");
2741 delete bb_vinfo;
2742 return NULL;
2745 /* If there are no grouped stores in the region there is no need
2746 to continue with pattern recog as vect_analyze_slp will fail
2747 anyway. */
2748 if (bb_vinfo->grouped_stores.is_empty ())
2750 if (dump_enabled_p ())
2751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2752 "not vectorized: no grouped stores in "
2753 "basic block.\n");
2755 delete bb_vinfo;
2756 return NULL;
2759 /* While the rest of the analysis below depends on it in some way. */
2760 fatal = false;
2762 vect_pattern_recog (bb_vinfo);
2764 /* Check the SLP opportunities in the basic block, analyze and build SLP
2765 trees. */
2766 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2768 if (dump_enabled_p ())
2770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2771 "Failed to SLP the basic block.\n");
2772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2773 "not vectorized: failed to find SLP opportunities "
2774 "in basic block.\n");
2777 delete bb_vinfo;
2778 return NULL;
2781 vect_record_base_alignments (bb_vinfo);
2783 /* Analyze and verify the alignment of data references and the
2784 dependence in the SLP instances. */
2785 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2787 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2788 || ! vect_slp_analyze_instance_dependence (instance))
2790 slp_tree node = SLP_INSTANCE_TREE (instance);
2791 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2792 dump_printf_loc (MSG_NOTE, vect_location,
2793 "removing SLP instance operations starting from: %G",
2794 stmt_info->stmt);
2795 vect_free_slp_instance (instance, false);
2796 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2797 continue;
2800 /* Mark all the statements that we want to vectorize as pure SLP and
2801 relevant. */
2802 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2803 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2805 i++;
2807 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2809 delete bb_vinfo;
2810 return NULL;
2813 if (!vect_slp_analyze_operations (bb_vinfo))
2815 if (dump_enabled_p ())
2816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2817 "not vectorized: bad operation in basic block.\n");
2819 delete bb_vinfo;
2820 return NULL;
2823 /* Cost model: check if the vectorization is worthwhile. */
2824 if (!unlimited_cost_model (NULL)
2825 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2829 "not vectorized: vectorization is not "
2830 "profitable.\n");
2832 delete bb_vinfo;
2833 return NULL;
2836 if (dump_enabled_p ())
2837 dump_printf_loc (MSG_NOTE, vect_location,
2838 "Basic block will be vectorized using SLP\n");
2840 return bb_vinfo;
2844 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2845 true if anything in the basic-block was vectorized. */
2847 bool
2848 vect_slp_bb (basic_block bb)
2850 bb_vec_info bb_vinfo;
2851 gimple_stmt_iterator gsi;
2852 bool any_vectorized = false;
2853 auto_vector_sizes vector_sizes;
2855 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2857 /* Autodetect first vector size we try. */
2858 current_vector_size = 0;
2859 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2860 unsigned int next_size = 0;
2862 gsi = gsi_start_bb (bb);
2864 poly_uint64 autodetected_vector_size = 0;
2865 while (1)
2867 if (gsi_end_p (gsi))
2868 break;
2870 gimple_stmt_iterator region_begin = gsi;
2871 vec<data_reference_p> datarefs = vNULL;
2872 int insns = 0;
2874 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2876 gimple *stmt = gsi_stmt (gsi);
2877 if (is_gimple_debug (stmt))
2878 continue;
2879 insns++;
2881 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2882 vect_location = stmt;
2884 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
2885 break;
2888 /* Skip leading unhandled stmts. */
2889 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2891 gsi_next (&gsi);
2892 continue;
2895 gimple_stmt_iterator region_end = gsi;
2897 bool vectorized = false;
2898 bool fatal = false;
2899 vec_info_shared shared;
2900 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2901 datarefs, insns, fatal, &shared);
2902 if (bb_vinfo
2903 && dbg_cnt (vect_slp))
2905 if (dump_enabled_p ())
2906 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2908 bb_vinfo->shared->check_datarefs ();
2909 vect_schedule_slp (bb_vinfo);
2911 unsigned HOST_WIDE_INT bytes;
2912 if (current_vector_size.is_constant (&bytes))
2913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2914 "basic block part vectorized using %wu byte "
2915 "vectors\n", bytes);
2916 else
2917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2918 "basic block part vectorized using variable "
2919 "length vectors\n");
2921 vectorized = true;
2923 delete bb_vinfo;
2925 any_vectorized |= vectorized;
2927 if (next_size == 0)
2928 autodetected_vector_size = current_vector_size;
2930 if (next_size < vector_sizes.length ()
2931 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2932 next_size += 1;
2934 if (vectorized
2935 || next_size == vector_sizes.length ()
2936 || known_eq (current_vector_size, 0U)
2937 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2938 vector sizes will fail do not bother iterating. */
2939 || fatal)
2941 if (gsi_end_p (region_end))
2942 break;
2944 /* Skip the unhandled stmt. */
2945 gsi_next (&gsi);
2947 /* And reset vector sizes. */
2948 current_vector_size = 0;
2949 next_size = 0;
2951 else
2953 /* Try the next biggest vector size. */
2954 current_vector_size = vector_sizes[next_size++];
2955 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_NOTE, vect_location,
2958 "***** Re-trying analysis with "
2959 "vector size ");
2960 dump_dec (MSG_NOTE, current_vector_size);
2961 dump_printf (MSG_NOTE, "\n");
2964 /* Start over. */
2965 gsi = region_begin;
2969 return any_vectorized;
2973 /* Return 1 if vector type of boolean constant which is OPNUM
2974 operand in statement STMT_VINFO is a boolean vector. */
2976 static bool
2977 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
2979 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
2980 tree op, vectype;
2981 enum vect_def_type dt;
2983 /* For comparison and COND_EXPR type is chosen depending
2984 on the other comparison operand. */
2985 if (TREE_CODE_CLASS (code) == tcc_comparison)
2987 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
2988 if (opnum)
2989 op = gimple_assign_rhs1 (stmt);
2990 else
2991 op = gimple_assign_rhs2 (stmt);
2993 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
2994 gcc_unreachable ();
2996 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2999 if (code == COND_EXPR)
3001 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3002 tree cond = gimple_assign_rhs1 (stmt);
3004 if (TREE_CODE (cond) == SSA_NAME)
3005 op = cond;
3006 else if (opnum)
3007 op = TREE_OPERAND (cond, 1);
3008 else
3009 op = TREE_OPERAND (cond, 0);
3011 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3012 gcc_unreachable ();
3014 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3017 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3020 /* Build a variable-length vector in which the elements in ELTS are repeated
3021 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3022 RESULTS and add any new instructions to SEQ.
3024 The approach we use is:
3026 (1) Find a vector mode VM with integer elements of mode IM.
3028 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3029 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3030 from small vectors to IM.
3032 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3034 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3035 correct byte contents.
3037 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3039 We try to find the largest IM for which this sequence works, in order
3040 to cut down on the number of interleaves. */
3042 void
3043 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3044 unsigned int nresults, vec<tree> &results)
3046 unsigned int nelts = elts.length ();
3047 tree element_type = TREE_TYPE (vector_type);
3049 /* (1) Find a vector mode VM with integer elements of mode IM. */
3050 unsigned int nvectors = 1;
3051 tree new_vector_type;
3052 tree permutes[2];
3053 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3054 &nvectors, &new_vector_type,
3055 permutes))
3056 gcc_unreachable ();
3058 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3059 unsigned int partial_nelts = nelts / nvectors;
3060 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3062 tree_vector_builder partial_elts;
3063 auto_vec<tree, 32> pieces (nvectors * 2);
3064 pieces.quick_grow (nvectors * 2);
3065 for (unsigned int i = 0; i < nvectors; ++i)
3067 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3068 ELTS' has mode IM. */
3069 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3070 for (unsigned int j = 0; j < partial_nelts; ++j)
3071 partial_elts.quick_push (elts[i * partial_nelts + j]);
3072 tree t = gimple_build_vector (seq, &partial_elts);
3073 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3074 TREE_TYPE (new_vector_type), t);
3076 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3077 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3080 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3081 correct byte contents.
3083 We need to repeat the following operation log2(nvectors) times:
3085 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3086 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3088 However, if each input repeats every N elements and the VF is
3089 a multiple of N * 2, the HI result is the same as the LO. */
3090 unsigned int in_start = 0;
3091 unsigned int out_start = nvectors;
3092 unsigned int hi_start = nvectors / 2;
3093 /* A bound on the number of outputs needed to produce NRESULTS results
3094 in the final iteration. */
3095 unsigned int noutputs_bound = nvectors * nresults;
3096 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3098 noutputs_bound /= 2;
3099 unsigned int limit = MIN (noutputs_bound, nvectors);
3100 for (unsigned int i = 0; i < limit; ++i)
3102 if ((i & 1) != 0
3103 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3104 2 * in_repeat))
3106 pieces[out_start + i] = pieces[out_start + i - 1];
3107 continue;
3110 tree output = make_ssa_name (new_vector_type);
3111 tree input1 = pieces[in_start + (i / 2)];
3112 tree input2 = pieces[in_start + (i / 2) + hi_start];
3113 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3114 input1, input2,
3115 permutes[i & 1]);
3116 gimple_seq_add_stmt (seq, stmt);
3117 pieces[out_start + i] = output;
3119 std::swap (in_start, out_start);
3122 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3123 results.reserve (nresults);
3124 for (unsigned int i = 0; i < nresults; ++i)
3125 if (i < nvectors)
3126 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3127 pieces[in_start + i]));
3128 else
3129 results.quick_push (results[i - nvectors]);
3133 /* For constant and loop invariant defs of SLP_NODE this function returns
3134 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3135 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3136 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3137 REDUC_INDEX is the index of the reduction operand in the statements, unless
3138 it is -1. */
3140 static void
3141 vect_get_constant_vectors (tree op, slp_tree slp_node,
3142 vec<tree> *vec_oprnds,
3143 unsigned int op_num, unsigned int number_of_vectors)
3145 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3146 stmt_vec_info stmt_vinfo = stmts[0];
3147 gimple *stmt = stmt_vinfo->stmt;
3148 unsigned HOST_WIDE_INT nunits;
3149 tree vec_cst;
3150 unsigned j, number_of_places_left_in_vector;
3151 tree vector_type;
3152 tree vop;
3153 int group_size = stmts.length ();
3154 unsigned int vec_num, i;
3155 unsigned number_of_copies = 1;
3156 vec<tree> voprnds;
3157 voprnds.create (number_of_vectors);
3158 bool constant_p, is_store;
3159 tree neutral_op = NULL;
3160 enum tree_code code = gimple_expr_code (stmt);
3161 gimple_seq ctor_seq = NULL;
3162 auto_vec<tree, 16> permute_results;
3164 /* Check if vector type is a boolean vector. */
3165 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3166 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3167 vector_type
3168 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3169 else
3170 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3172 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3174 is_store = true;
3175 op = gimple_assign_rhs1 (stmt);
3177 else
3178 is_store = false;
3180 gcc_assert (op);
3182 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3183 created vectors. It is greater than 1 if unrolling is performed.
3185 For example, we have two scalar operands, s1 and s2 (e.g., group of
3186 strided accesses of size two), while NUNITS is four (i.e., four scalars
3187 of this type can be packed in a vector). The output vector will contain
3188 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3189 will be 2).
3191 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3192 containing the operands.
3194 For example, NUNITS is four as before, and the group size is 8
3195 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3196 {s5, s6, s7, s8}. */
3198 /* When using duplicate_and_interleave, we just need one element for
3199 each scalar statement. */
3200 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3201 nunits = group_size;
3203 number_of_copies = nunits * number_of_vectors / group_size;
3205 number_of_places_left_in_vector = nunits;
3206 constant_p = true;
3207 tree_vector_builder elts (vector_type, nunits, 1);
3208 elts.quick_grow (nunits);
3209 bool place_after_defs = false;
3210 for (j = 0; j < number_of_copies; j++)
3212 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3214 stmt = stmt_vinfo->stmt;
3215 if (is_store)
3216 op = gimple_assign_rhs1 (stmt);
3217 else
3219 switch (code)
3221 case COND_EXPR:
3223 tree cond = gimple_assign_rhs1 (stmt);
3224 if (TREE_CODE (cond) == SSA_NAME)
3225 op = gimple_op (stmt, op_num + 1);
3226 else if (op_num == 0 || op_num == 1)
3227 op = TREE_OPERAND (cond, op_num);
3228 else
3230 if (op_num == 2)
3231 op = gimple_assign_rhs2 (stmt);
3232 else
3233 op = gimple_assign_rhs3 (stmt);
3236 break;
3238 case CALL_EXPR:
3239 op = gimple_call_arg (stmt, op_num);
3240 break;
3242 case LSHIFT_EXPR:
3243 case RSHIFT_EXPR:
3244 case LROTATE_EXPR:
3245 case RROTATE_EXPR:
3246 op = gimple_op (stmt, op_num + 1);
3247 /* Unlike the other binary operators, shifts/rotates have
3248 the shift count being int, instead of the same type as
3249 the lhs, so make sure the scalar is the right type if
3250 we are dealing with vectors of
3251 long long/long/short/char. */
3252 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3253 op = fold_convert (TREE_TYPE (vector_type), op);
3254 break;
3256 default:
3257 op = gimple_op (stmt, op_num + 1);
3258 break;
3262 /* Create 'vect_ = {op0,op1,...,opn}'. */
3263 number_of_places_left_in_vector--;
3264 tree orig_op = op;
3265 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3267 if (CONSTANT_CLASS_P (op))
3269 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3271 /* Can't use VIEW_CONVERT_EXPR for booleans because
3272 of possibly different sizes of scalar value and
3273 vector element. */
3274 if (integer_zerop (op))
3275 op = build_int_cst (TREE_TYPE (vector_type), 0);
3276 else if (integer_onep (op))
3277 op = build_all_ones_cst (TREE_TYPE (vector_type));
3278 else
3279 gcc_unreachable ();
3281 else
3282 op = fold_unary (VIEW_CONVERT_EXPR,
3283 TREE_TYPE (vector_type), op);
3284 gcc_assert (op && CONSTANT_CLASS_P (op));
3286 else
3288 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3289 gimple *init_stmt;
3290 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3292 tree true_val
3293 = build_all_ones_cst (TREE_TYPE (vector_type));
3294 tree false_val
3295 = build_zero_cst (TREE_TYPE (vector_type));
3296 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3297 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3298 op, true_val,
3299 false_val);
3301 else
3303 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3304 op);
3305 init_stmt
3306 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3307 op);
3309 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3310 op = new_temp;
3313 elts[number_of_places_left_in_vector] = op;
3314 if (!CONSTANT_CLASS_P (op))
3315 constant_p = false;
3316 if (TREE_CODE (orig_op) == SSA_NAME
3317 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3318 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3319 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3320 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3321 place_after_defs = true;
3323 if (number_of_places_left_in_vector == 0)
3325 if (constant_p
3326 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3327 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3328 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3329 else
3331 if (vec_oprnds->is_empty ())
3332 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3333 number_of_vectors,
3334 permute_results);
3335 vec_cst = permute_results[number_of_vectors - j - 1];
3337 tree init;
3338 gimple_stmt_iterator gsi;
3339 if (place_after_defs)
3341 stmt_vec_info last_stmt_info
3342 = vect_find_last_scalar_stmt_in_slp (slp_node);
3343 gsi = gsi_for_stmt (last_stmt_info->stmt);
3344 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3345 &gsi);
3347 else
3348 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3349 NULL);
3350 if (ctor_seq != NULL)
3352 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3353 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3354 ctor_seq = NULL;
3356 voprnds.quick_push (init);
3357 place_after_defs = false;
3358 number_of_places_left_in_vector = nunits;
3359 constant_p = true;
3360 elts.new_vector (vector_type, nunits, 1);
3361 elts.quick_grow (nunits);
3366 /* Since the vectors are created in the reverse order, we should invert
3367 them. */
3368 vec_num = voprnds.length ();
3369 for (j = vec_num; j != 0; j--)
3371 vop = voprnds[j - 1];
3372 vec_oprnds->quick_push (vop);
3375 voprnds.release ();
3377 /* In case that VF is greater than the unrolling factor needed for the SLP
3378 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3379 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3380 to replicate the vectors. */
3381 while (number_of_vectors > vec_oprnds->length ())
3383 tree neutral_vec = NULL;
3385 if (neutral_op)
3387 if (!neutral_vec)
3388 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3390 vec_oprnds->quick_push (neutral_vec);
3392 else
3394 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3395 vec_oprnds->quick_push (vop);
3401 /* Get vectorized definitions from SLP_NODE that contains corresponding
3402 vectorized def-stmts. */
3404 static void
3405 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3407 tree vec_oprnd;
3408 stmt_vec_info vec_def_stmt_info;
3409 unsigned int i;
3411 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3413 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3415 gcc_assert (vec_def_stmt_info);
3416 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3417 vec_oprnd = gimple_phi_result (vec_def_phi);
3418 else
3419 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3420 vec_oprnds->quick_push (vec_oprnd);
3425 /* Get vectorized definitions for SLP_NODE.
3426 If the scalar definitions are loop invariants or constants, collect them and
3427 call vect_get_constant_vectors() to create vector stmts.
3428 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3429 must be stored in the corresponding child of SLP_NODE, and we call
3430 vect_get_slp_vect_defs () to retrieve them. */
3432 void
3433 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3434 vec<vec<tree> > *vec_oprnds)
3436 int number_of_vects = 0, i;
3437 unsigned int child_index = 0;
3438 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3439 slp_tree child = NULL;
3440 vec<tree> vec_defs;
3441 tree oprnd;
3442 bool vectorized_defs;
3444 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3445 FOR_EACH_VEC_ELT (ops, i, oprnd)
3447 /* For each operand we check if it has vectorized definitions in a child
3448 node or we need to create them (for invariants and constants). We
3449 check if the LHS of the first stmt of the next child matches OPRND.
3450 If it does, we found the correct child. Otherwise, we call
3451 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3452 to check this child node for the next operand. */
3453 vectorized_defs = false;
3454 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3456 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3458 /* We have to check both pattern and original def, if available. */
3459 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3461 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3462 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3463 tree first_def_op;
3465 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3466 first_def_op = gimple_phi_result (first_def);
3467 else
3468 first_def_op = gimple_get_lhs (first_def_info->stmt);
3469 if (operand_equal_p (oprnd, first_def_op, 0)
3470 || (related
3471 && operand_equal_p (oprnd,
3472 gimple_get_lhs (related->stmt), 0)))
3474 /* The number of vector defs is determined by the number of
3475 vector statements in the node from which we get those
3476 statements. */
3477 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3478 vectorized_defs = true;
3479 child_index++;
3482 else
3483 child_index++;
3486 if (!vectorized_defs)
3488 if (i == 0)
3490 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3491 /* Number of vector stmts was calculated according to LHS in
3492 vect_schedule_slp_instance (), fix it by replacing LHS with
3493 RHS, if necessary. See vect_get_smallest_scalar_type () for
3494 details. */
3495 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3496 &rhs_size_unit);
3497 if (rhs_size_unit != lhs_size_unit)
3499 number_of_vects *= rhs_size_unit;
3500 number_of_vects /= lhs_size_unit;
3505 /* Allocate memory for vectorized defs. */
3506 vec_defs = vNULL;
3507 vec_defs.create (number_of_vects);
3509 /* For reduction defs we call vect_get_constant_vectors (), since we are
3510 looking for initial loop invariant values. */
3511 if (vectorized_defs)
3512 /* The defs are already vectorized. */
3513 vect_get_slp_vect_defs (child, &vec_defs);
3514 else
3515 /* Build vectors from scalar defs. */
3516 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3517 number_of_vects);
3519 vec_oprnds->quick_push (vec_defs);
3523 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3524 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3525 permute statements for the SLP node NODE of the SLP instance
3526 SLP_NODE_INSTANCE. */
3528 bool
3529 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3530 gimple_stmt_iterator *gsi, poly_uint64 vf,
3531 slp_instance slp_node_instance, bool analyze_only,
3532 unsigned *n_perms)
3534 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3535 vec_info *vinfo = stmt_info->vinfo;
3536 int vec_index = 0;
3537 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3538 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3539 unsigned int mask_element;
3540 machine_mode mode;
3542 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3543 return false;
3545 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3547 mode = TYPE_MODE (vectype);
3548 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3550 /* Initialize the vect stmts of NODE to properly insert the generated
3551 stmts later. */
3552 if (! analyze_only)
3553 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3554 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3555 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3557 /* Generate permutation masks for every NODE. Number of masks for each NODE
3558 is equal to GROUP_SIZE.
3559 E.g., we have a group of three nodes with three loads from the same
3560 location in each node, and the vector size is 4. I.e., we have a
3561 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3562 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3563 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3566 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3567 The last mask is illegal since we assume two operands for permute
3568 operation, and the mask element values can't be outside that range.
3569 Hence, the last mask must be converted into {2,5,5,5}.
3570 For the first two permutations we need the first and the second input
3571 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3572 we need the second and the third vectors: {b1,c1,a2,b2} and
3573 {c2,a3,b3,c3}. */
3575 int vect_stmts_counter = 0;
3576 unsigned int index = 0;
3577 int first_vec_index = -1;
3578 int second_vec_index = -1;
3579 bool noop_p = true;
3580 *n_perms = 0;
3582 vec_perm_builder mask;
3583 unsigned int nelts_to_build;
3584 unsigned int nvectors_per_build;
3585 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3586 && multiple_p (nunits, group_size));
3587 if (repeating_p)
3589 /* A single vector contains a whole number of copies of the node, so:
3590 (a) all permutes can use the same mask; and
3591 (b) the permutes only need a single vector input. */
3592 mask.new_vector (nunits, group_size, 3);
3593 nelts_to_build = mask.encoded_nelts ();
3594 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3596 else
3598 /* We need to construct a separate mask for each vector statement. */
3599 unsigned HOST_WIDE_INT const_nunits, const_vf;
3600 if (!nunits.is_constant (&const_nunits)
3601 || !vf.is_constant (&const_vf))
3602 return false;
3603 mask.new_vector (const_nunits, const_nunits, 1);
3604 nelts_to_build = const_vf * group_size;
3605 nvectors_per_build = 1;
3608 unsigned int count = mask.encoded_nelts ();
3609 mask.quick_grow (count);
3610 vec_perm_indices indices;
3612 for (unsigned int j = 0; j < nelts_to_build; j++)
3614 unsigned int iter_num = j / group_size;
3615 unsigned int stmt_num = j % group_size;
3616 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3617 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3618 if (repeating_p)
3620 first_vec_index = 0;
3621 mask_element = i;
3623 else
3625 /* Enforced before the loop when !repeating_p. */
3626 unsigned int const_nunits = nunits.to_constant ();
3627 vec_index = i / const_nunits;
3628 mask_element = i % const_nunits;
3629 if (vec_index == first_vec_index
3630 || first_vec_index == -1)
3632 first_vec_index = vec_index;
3634 else if (vec_index == second_vec_index
3635 || second_vec_index == -1)
3637 second_vec_index = vec_index;
3638 mask_element += const_nunits;
3640 else
3642 if (dump_enabled_p ())
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3644 "permutation requires at "
3645 "least three vectors %G",
3646 stmt_info->stmt);
3647 gcc_assert (analyze_only);
3648 return false;
3651 gcc_assert (mask_element < 2 * const_nunits);
3654 if (mask_element != index)
3655 noop_p = false;
3656 mask[index++] = mask_element;
3658 if (index == count && !noop_p)
3660 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3661 if (!can_vec_perm_const_p (mode, indices))
3663 if (dump_enabled_p ())
3665 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3666 vect_location,
3667 "unsupported vect permute { ");
3668 for (i = 0; i < count; ++i)
3670 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3671 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3673 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3675 gcc_assert (analyze_only);
3676 return false;
3679 ++*n_perms;
3682 if (index == count)
3684 if (!analyze_only)
3686 tree mask_vec = NULL_TREE;
3688 if (! noop_p)
3689 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3691 if (second_vec_index == -1)
3692 second_vec_index = first_vec_index;
3694 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3696 /* Generate the permute statement if necessary. */
3697 tree first_vec = dr_chain[first_vec_index + ri];
3698 tree second_vec = dr_chain[second_vec_index + ri];
3699 stmt_vec_info perm_stmt_info;
3700 if (! noop_p)
3702 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3703 tree perm_dest
3704 = vect_create_destination_var (gimple_assign_lhs (stmt),
3705 vectype);
3706 perm_dest = make_ssa_name (perm_dest);
3707 gassign *perm_stmt
3708 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3709 first_vec, second_vec,
3710 mask_vec);
3711 perm_stmt_info
3712 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3713 gsi);
3715 else
3716 /* If mask was NULL_TREE generate the requested
3717 identity transform. */
3718 perm_stmt_info = vinfo->lookup_def (first_vec);
3720 /* Store the vector statement in NODE. */
3721 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3722 = perm_stmt_info;
3726 index = 0;
3727 first_vec_index = -1;
3728 second_vec_index = -1;
3729 noop_p = true;
3733 return true;
3736 /* Vectorize SLP instance tree in postorder. */
3738 static void
3739 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3740 scalar_stmts_to_slp_tree_map_t *bst_map)
3742 gimple_stmt_iterator si;
3743 stmt_vec_info stmt_info;
3744 unsigned int group_size;
3745 tree vectype;
3746 int i, j;
3747 slp_tree child;
3749 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3750 return;
3752 /* See if we have already vectorized the same set of stmts and reuse their
3753 vectorized stmts. */
3754 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3756 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3757 return;
3760 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3762 vect_schedule_slp_instance (child, instance, bst_map);
3764 /* Push SLP node def-type to stmts. */
3765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3766 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3768 stmt_vec_info child_stmt_info;
3769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3770 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3773 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3775 /* VECTYPE is the type of the destination. */
3776 vectype = STMT_VINFO_VECTYPE (stmt_info);
3777 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3778 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3780 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3781 if (!SLP_TREE_VEC_STMTS (node).exists ())
3782 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3784 if (dump_enabled_p ())
3785 dump_printf_loc (MSG_NOTE, vect_location,
3786 "------>vectorizing SLP node starting from: %G",
3787 stmt_info->stmt);
3789 /* Vectorized stmts go before the last scalar stmt which is where
3790 all uses are ready. */
3791 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3792 si = gsi_for_stmt (last_stmt_info->stmt);
3794 /* Mark the first element of the reduction chain as reduction to properly
3795 transform the node. In the analysis phase only the last element of the
3796 chain is marked as reduction. */
3797 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3798 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3799 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3801 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3802 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3805 /* Handle two-operation SLP nodes by vectorizing the group with
3806 both operations and then performing a merge. */
3807 if (SLP_TREE_TWO_OPERATORS (node))
3809 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3810 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3811 enum tree_code ocode = ERROR_MARK;
3812 stmt_vec_info ostmt_info;
3813 vec_perm_builder mask (group_size, group_size, 1);
3814 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3816 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3817 if (gimple_assign_rhs_code (ostmt) != code0)
3819 mask.quick_push (1);
3820 ocode = gimple_assign_rhs_code (ostmt);
3822 else
3823 mask.quick_push (0);
3825 if (ocode != ERROR_MARK)
3827 vec<stmt_vec_info> v0;
3828 vec<stmt_vec_info> v1;
3829 unsigned j;
3830 tree tmask = NULL_TREE;
3831 vect_transform_stmt (stmt_info, &si, node, instance);
3832 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3833 SLP_TREE_VEC_STMTS (node).truncate (0);
3834 gimple_assign_set_rhs_code (stmt, ocode);
3835 vect_transform_stmt (stmt_info, &si, node, instance);
3836 gimple_assign_set_rhs_code (stmt, code0);
3837 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3838 SLP_TREE_VEC_STMTS (node).truncate (0);
3839 tree meltype = build_nonstandard_integer_type
3840 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3841 tree mvectype = get_same_sized_vectype (meltype, vectype);
3842 unsigned k = 0, l;
3843 for (j = 0; j < v0.length (); ++j)
3845 /* Enforced by vect_build_slp_tree, which rejects variable-length
3846 vectors for SLP_TREE_TWO_OPERATORS. */
3847 unsigned int const_nunits = nunits.to_constant ();
3848 tree_vector_builder melts (mvectype, const_nunits, 1);
3849 for (l = 0; l < const_nunits; ++l)
3851 if (k >= group_size)
3852 k = 0;
3853 tree t = build_int_cst (meltype,
3854 mask[k++] * const_nunits + l);
3855 melts.quick_push (t);
3857 tmask = melts.build ();
3859 /* ??? Not all targets support a VEC_PERM_EXPR with a
3860 constant mask that would translate to a vec_merge RTX
3861 (with their vec_perm_const_ok). We can either not
3862 vectorize in that case or let veclower do its job.
3863 Unfortunately that isn't too great and at least for
3864 plus/minus we'd eventually like to match targets
3865 vector addsub instructions. */
3866 gimple *vstmt;
3867 vstmt = gimple_build_assign (make_ssa_name (vectype),
3868 VEC_PERM_EXPR,
3869 gimple_assign_lhs (v0[j]->stmt),
3870 gimple_assign_lhs (v1[j]->stmt),
3871 tmask);
3872 SLP_TREE_VEC_STMTS (node).quick_push
3873 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
3875 v0.release ();
3876 v1.release ();
3877 return;
3880 vect_transform_stmt (stmt_info, &si, node, instance);
3882 /* Restore stmt def-types. */
3883 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3884 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3886 stmt_vec_info child_stmt_info;
3887 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3888 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3892 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3893 For loop vectorization this is done in vectorizable_call, but for SLP
3894 it needs to be deferred until end of vect_schedule_slp, because multiple
3895 SLP instances may refer to the same scalar stmt. */
3897 static void
3898 vect_remove_slp_scalar_calls (slp_tree node)
3900 gimple *new_stmt;
3901 gimple_stmt_iterator gsi;
3902 int i;
3903 slp_tree child;
3904 tree lhs;
3905 stmt_vec_info stmt_info;
3907 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3908 return;
3910 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3911 vect_remove_slp_scalar_calls (child);
3913 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3915 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3916 if (!stmt || gimple_bb (stmt) == NULL)
3917 continue;
3918 if (is_pattern_stmt_p (stmt_info)
3919 || !PURE_SLP_STMT (stmt_info))
3920 continue;
3921 lhs = gimple_call_lhs (stmt);
3922 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3923 gsi = gsi_for_stmt (stmt);
3924 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
3925 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3929 /* Generate vector code for all SLP instances in the loop/basic block. */
3931 void
3932 vect_schedule_slp (vec_info *vinfo)
3934 vec<slp_instance> slp_instances;
3935 slp_instance instance;
3936 unsigned int i;
3938 scalar_stmts_to_slp_tree_map_t *bst_map
3939 = new scalar_stmts_to_slp_tree_map_t ();
3940 slp_instances = vinfo->slp_instances;
3941 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3943 /* Schedule the tree of INSTANCE. */
3944 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3945 instance, bst_map);
3946 if (dump_enabled_p ())
3947 dump_printf_loc (MSG_NOTE, vect_location,
3948 "vectorizing stmts using SLP.\n");
3950 delete bst_map;
3952 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3954 slp_tree root = SLP_INSTANCE_TREE (instance);
3955 stmt_vec_info store_info;
3956 unsigned int j;
3958 /* Remove scalar call stmts. Do not do this for basic-block
3959 vectorization as not all uses may be vectorized.
3960 ??? Why should this be necessary? DCE should be able to
3961 remove the stmts itself.
3962 ??? For BB vectorization we can as well remove scalar
3963 stmts starting from the SLP tree root if they have no
3964 uses. */
3965 if (is_a <loop_vec_info> (vinfo))
3966 vect_remove_slp_scalar_calls (root);
3968 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
3969 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3971 if (!STMT_VINFO_DATA_REF (store_info))
3972 break;
3974 store_info = vect_orig_stmt (store_info);
3975 /* Free the attached stmt_vec_info and remove the stmt. */
3976 vinfo->remove_stmt (store_info);