PR c++/87582
[official-gcc.git] / gcc / tree-vect-slp.c
blobf60fea0a581e40c8b099a3ae0f3f740c5dc57158
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
61 vect_free_slp_tree (child, final_p);
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
80 SLP_TREE_LOAD_PERMUTATION (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
90 void
91 vect_free_slp_instance (slp_instance instance, bool final_p)
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
94 SLP_INSTANCE_LOADS (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
104 slp_tree node;
105 stmt_vec_info stmt_info = scalar_stmts[0];
106 unsigned int nops;
108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else if (is_a <gphi *> (stmt_info->stmt))
117 nops = 0;
118 else
119 return NULL;
121 node = XNEW (struct _slp_tree);
122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
125 SLP_TREE_CHILDREN (node).create (nops);
126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
127 SLP_TREE_TWO_OPERATORS (node) = false;
128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
134 return node;
138 /* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141 typedef struct _slp_oprnd_info
143 /* Def-stmts for the operands. */
144 vec<stmt_vec_info> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
148 tree first_op_type;
149 enum vect_def_type first_dt;
150 bool first_pattern;
151 bool second_pattern;
152 } *slp_oprnd_info;
155 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
157 static vec<slp_oprnd_info>
158 vect_create_oprnd_info (int nops, int group_size)
160 int i;
161 slp_oprnd_info oprnd_info;
162 vec<slp_oprnd_info> oprnds_info;
164 oprnds_info.create (nops);
165 for (i = 0; i < nops; i++)
167 oprnd_info = XNEW (struct _slp_oprnd_info);
168 oprnd_info->def_stmts.create (group_size);
169 oprnd_info->first_dt = vect_uninitialized_def;
170 oprnd_info->first_op_type = NULL_TREE;
171 oprnd_info->first_pattern = false;
172 oprnd_info->second_pattern = false;
173 oprnds_info.quick_push (oprnd_info);
176 return oprnds_info;
180 /* Free operands info. */
182 static void
183 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 int i;
186 slp_oprnd_info oprnd_info;
188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190 oprnd_info->def_stmts.release ();
191 XDELETE (oprnd_info);
194 oprnds_info.release ();
198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
200 of the chain. */
203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
204 stmt_vec_info first_stmt_info)
206 stmt_vec_info next_stmt_info = first_stmt_info;
207 int result = 0;
209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
210 return -1;
214 if (next_stmt_info == stmt_info)
215 return result;
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
220 while (next_stmt_info);
222 return -1;
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
231 bool
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
278 return true;
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
300 static int
301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
302 vec<stmt_vec_info> stmts, unsigned stmt_num,
303 vec<slp_oprnd_info> *oprnds_info)
305 stmt_vec_info stmt_info = stmts[stmt_num];
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
308 enum vect_def_type dt = vect_uninitialized_def;
309 bool pattern = false;
310 slp_oprnd_info oprnd_info;
311 int first_op_idx = 1;
312 unsigned int commutative_op = -1U;
313 bool first_op_cond = false;
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
319 number_of_oprnds = gimple_call_num_args (stmt);
320 first_op_idx = 3;
321 if (gimple_call_internal_p (stmt))
323 internal_fn ifn = gimple_call_internal_fn (stmt);
324 commutative_op = first_commutative_argument (ifn);
327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
329 enum tree_code code = gimple_assign_rhs_code (stmt);
330 number_of_oprnds = gimple_num_ops (stmt) - 1;
331 /* Swap can only be done for cond_expr if asked to, otherwise we
332 could result in different comparison code to the first stmt. */
333 if (gimple_assign_rhs_code (stmt) == COND_EXPR
334 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
336 first_op_cond = true;
337 number_of_oprnds++;
339 else
340 commutative_op = commutative_tree_code (code) ? 0U : -1U;
342 else
343 return -1;
345 bool swapped = (*swap != 0);
346 gcc_assert (!swapped || first_op_cond);
347 for (i = 0; i < number_of_oprnds; i++)
349 again:
350 if (first_op_cond)
352 /* Map indicating how operands of cond_expr should be swapped. */
353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
354 int *map = maps[*swap];
356 if (i < 2)
357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
358 first_op_idx), map[i]);
359 else
360 oprnd = gimple_op (stmt_info->stmt, map[i]);
362 else
363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
365 oprnd_info = (*oprnds_info)[i];
367 stmt_vec_info def_stmt_info;
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
370 if (dump_enabled_p ())
371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
372 "Build SLP failed: can't analyze def for %T\n",
373 oprnd);
375 return -1;
378 if (second)
379 oprnd_info->second_pattern = pattern;
381 if (first)
383 oprnd_info->first_dt = dt;
384 oprnd_info->first_pattern = pattern;
385 oprnd_info->first_op_type = TREE_TYPE (oprnd);
387 else
389 /* Not first stmt of the group, check that the def-stmt/s match
390 the def-stmt/s of the first stmt. Allow different definition
391 types for reduction chains: the first stmt must be a
392 vect_reduction_def (a phi node), and the rest
393 vect_internal_def. */
394 tree type = TREE_TYPE (oprnd);
395 if ((oprnd_info->first_dt != dt
396 && !(oprnd_info->first_dt == vect_reduction_def
397 && dt == vect_internal_def)
398 && !((oprnd_info->first_dt == vect_external_def
399 || oprnd_info->first_dt == vect_constant_def)
400 && (dt == vect_external_def
401 || dt == vect_constant_def)))
402 || !types_compatible_p (oprnd_info->first_op_type, type))
404 /* Try swapping operands if we got a mismatch. */
405 if (i == commutative_op && !swapped)
407 swapped = true;
408 goto again;
411 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
413 "Build SLP failed: different types\n");
415 return 1;
417 if ((dt == vect_constant_def
418 || dt == vect_external_def)
419 && !current_vector_size.is_constant ()
420 && (TREE_CODE (type) == BOOLEAN_TYPE
421 || !can_duplicate_and_interleave_p (stmts.length (),
422 TYPE_MODE (type))))
424 if (dump_enabled_p ())
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426 "Build SLP failed: invalid type of def "
427 "for variable-length SLP %T\n", oprnd);
428 return -1;
432 /* Check the types of the definitions. */
433 switch (dt)
435 case vect_constant_def:
436 case vect_external_def:
437 break;
439 case vect_reduction_def:
440 case vect_induction_def:
441 case vect_internal_def:
442 oprnd_info->def_stmts.quick_push (def_stmt_info);
443 break;
445 default:
446 /* FORNOW: Not supported. */
447 if (dump_enabled_p ())
448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
449 "Build SLP failed: illegal type of def %T\n",
450 oprnd);
452 return -1;
456 /* Swap operands. */
457 if (swapped)
459 /* If there are already uses of this stmt in a SLP instance then
460 we've committed to the operand order and can't swap it. */
461 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
465 "Build SLP failed: cannot swap operands of "
466 "shared stmt %G", stmt_info->stmt);
467 return -1;
470 if (first_op_cond)
472 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
473 tree cond = gimple_assign_rhs1 (stmt);
474 enum tree_code code = TREE_CODE (cond);
476 /* Swap. */
477 if (*swap == 1)
479 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
480 &TREE_OPERAND (cond, 1));
481 TREE_SET_CODE (cond, swap_tree_comparison (code));
483 /* Invert. */
484 else
486 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
487 gimple_assign_rhs3_ptr (stmt));
488 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
489 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
490 gcc_assert (code != ERROR_MARK);
491 TREE_SET_CODE (cond, code);
494 else
496 unsigned int op = commutative_op + first_op_idx;
497 swap_ssa_operands (stmt_info->stmt,
498 gimple_op_ptr (stmt_info->stmt, op),
499 gimple_op_ptr (stmt_info->stmt, op + 1));
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "swapped operands to match def types in %G",
504 stmt_info->stmt);
507 *swap = swapped;
508 return 0;
511 /* Return true if call statements CALL1 and CALL2 are similar enough
512 to be combined into the same SLP group. */
514 static bool
515 compatible_calls_p (gcall *call1, gcall *call2)
517 unsigned int nargs = gimple_call_num_args (call1);
518 if (nargs != gimple_call_num_args (call2))
519 return false;
521 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
522 return false;
524 if (gimple_call_internal_p (call1))
526 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
527 TREE_TYPE (gimple_call_lhs (call2))))
528 return false;
529 for (unsigned int i = 0; i < nargs; ++i)
530 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
531 TREE_TYPE (gimple_call_arg (call2, i))))
532 return false;
534 else
536 if (!operand_equal_p (gimple_call_fn (call1),
537 gimple_call_fn (call2), 0))
538 return false;
540 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
541 return false;
543 return true;
546 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
547 caller's attempt to find the vector type in STMT_INFO with the narrowest
548 element type. Return true if VECTYPE is nonnull and if it is valid
549 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
550 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
551 vect_build_slp_tree. */
553 static bool
554 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
555 tree vectype, poly_uint64 *max_nunits)
557 if (!vectype)
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type in %G\n",
562 stmt_info->stmt);
563 /* Fatal mismatch. */
564 return false;
567 /* If populating the vector type requires unrolling then fail
568 before adjusting *max_nunits for basic-block vectorization. */
569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
570 unsigned HOST_WIDE_INT const_nunits;
571 if (STMT_VINFO_BB_VINFO (stmt_info)
572 && (!nunits.is_constant (&const_nunits)
573 || const_nunits > group_size))
575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
576 "Build SLP failed: unrolling required "
577 "in basic block SLP\n");
578 /* Fatal mismatch. */
579 return false;
582 /* In case of multiple types we need to detect the smallest type. */
583 vect_update_max_nunits (max_nunits, vectype);
584 return true;
587 /* STMTS is a group of GROUP_SIZE SLP statements in which some
588 statements do the same operation as the first statement and in which
589 the others do ALT_STMT_CODE. Return true if we can take one vector
590 of the first operation and one vector of the second and permute them
591 to get the required result. VECTYPE is the type of the vector that
592 would be permuted. */
594 static bool
595 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
596 unsigned int group_size, tree vectype,
597 tree_code alt_stmt_code)
599 unsigned HOST_WIDE_INT count;
600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
601 return false;
603 vec_perm_builder sel (count, count, 1);
604 for (unsigned int i = 0; i < count; ++i)
606 unsigned int elt = i;
607 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
608 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
609 elt += count;
610 sel.quick_push (elt);
612 vec_perm_indices indices (sel, 2, count);
613 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
616 /* Verify if the scalar stmts STMTS are isomorphic, require data
617 permutation or are of unsupported types of operation. Return
618 true if they are, otherwise return false and indicate in *MATCHES
619 which stmts are not isomorphic to the first one. If MATCHES[0]
620 is false then this indicates the comparison could not be
621 carried out or the stmts will never be vectorized by SLP.
623 Note COND_EXPR is possibly ismorphic to another one after swapping its
624 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
625 the first stmt by swapping the two operands of comparison; set SWAP[i]
626 to 2 if stmt I is isormorphic to the first stmt by inverting the code
627 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
628 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
630 static bool
631 vect_build_slp_tree_1 (unsigned char *swap,
632 vec<stmt_vec_info> stmts, unsigned int group_size,
633 poly_uint64 *max_nunits, bool *matches,
634 bool *two_operators)
636 unsigned int i;
637 stmt_vec_info first_stmt_info = stmts[0];
638 enum tree_code first_stmt_code = ERROR_MARK;
639 enum tree_code alt_stmt_code = ERROR_MARK;
640 enum tree_code rhs_code = ERROR_MARK;
641 enum tree_code first_cond_code = ERROR_MARK;
642 tree lhs;
643 bool need_same_oprnds = false;
644 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
645 optab optab;
646 int icode;
647 machine_mode optab_op2_mode;
648 machine_mode vec_mode;
649 stmt_vec_info first_load = NULL, prev_first_load = NULL;
651 /* For every stmt in NODE find its def stmt/s. */
652 stmt_vec_info stmt_info;
653 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
655 gimple *stmt = stmt_info->stmt;
656 swap[i] = 0;
657 matches[i] = false;
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
662 /* Fail to vectorize statements marked as unvectorizable. */
663 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
667 "Build SLP failed: unvectorizable statement %G",
668 stmt);
669 /* Fatal mismatch. */
670 matches[0] = false;
671 return false;
674 lhs = gimple_get_lhs (stmt);
675 if (lhs == NULL_TREE)
677 if (dump_enabled_p ())
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: not GIMPLE_ASSIGN nor "
680 "GIMPLE_CALL %G", stmt);
681 /* Fatal mismatch. */
682 matches[0] = false;
683 return false;
686 tree nunits_vectype;
687 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
688 &nunits_vectype)
689 || (nunits_vectype
690 && !vect_record_max_nunits (stmt_info, group_size,
691 nunits_vectype, max_nunits)))
693 /* Fatal mismatch. */
694 matches[0] = false;
695 return false;
698 gcc_assert (vectype);
700 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
702 rhs_code = CALL_EXPR;
703 if ((gimple_call_internal_p (call_stmt)
704 && (!vectorizable_internal_fn_p
705 (gimple_call_internal_fn (call_stmt))))
706 || gimple_call_tail_p (call_stmt)
707 || gimple_call_noreturn_p (call_stmt)
708 || !gimple_call_nothrow_p (call_stmt)
709 || gimple_call_chain (call_stmt))
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: unsupported call type %G",
714 call_stmt);
715 /* Fatal mismatch. */
716 matches[0] = false;
717 return false;
720 else
721 rhs_code = gimple_assign_rhs_code (stmt);
723 /* Check the operation. */
724 if (i == 0)
726 first_stmt_code = rhs_code;
728 /* Shift arguments should be equal in all the packed stmts for a
729 vector shift with scalar shift operand. */
730 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
731 || rhs_code == LROTATE_EXPR
732 || rhs_code == RROTATE_EXPR)
734 if (vectype == boolean_type_node)
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "Build SLP failed: shift of a"
739 " boolean.\n");
740 /* Fatal mismatch. */
741 matches[0] = false;
742 return false;
745 vec_mode = TYPE_MODE (vectype);
747 /* First see if we have a vector/vector shift. */
748 optab = optab_for_tree_code (rhs_code, vectype,
749 optab_vector);
751 if (!optab
752 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
754 /* No vector/vector shift, try for a vector/scalar shift. */
755 optab = optab_for_tree_code (rhs_code, vectype,
756 optab_scalar);
758 if (!optab)
760 if (dump_enabled_p ())
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "Build SLP failed: no optab.\n");
763 /* Fatal mismatch. */
764 matches[0] = false;
765 return false;
767 icode = (int) optab_handler (optab, vec_mode);
768 if (icode == CODE_FOR_nothing)
770 if (dump_enabled_p ())
771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
772 "Build SLP failed: "
773 "op not supported by target.\n");
774 /* Fatal mismatch. */
775 matches[0] = false;
776 return false;
778 optab_op2_mode = insn_data[icode].operand[2].mode;
779 if (!VECTOR_MODE_P (optab_op2_mode))
781 need_same_oprnds = true;
782 first_op1 = gimple_assign_rhs2 (stmt);
786 else if (rhs_code == WIDEN_LSHIFT_EXPR)
788 need_same_oprnds = true;
789 first_op1 = gimple_assign_rhs2 (stmt);
792 else
794 if (first_stmt_code != rhs_code
795 && alt_stmt_code == ERROR_MARK)
796 alt_stmt_code = rhs_code;
797 if (first_stmt_code != rhs_code
798 && (first_stmt_code != IMAGPART_EXPR
799 || rhs_code != REALPART_EXPR)
800 && (first_stmt_code != REALPART_EXPR
801 || rhs_code != IMAGPART_EXPR)
802 /* Handle mismatches in plus/minus by computing both
803 and merging the results. */
804 && !((first_stmt_code == PLUS_EXPR
805 || first_stmt_code == MINUS_EXPR)
806 && (alt_stmt_code == PLUS_EXPR
807 || alt_stmt_code == MINUS_EXPR)
808 && rhs_code == alt_stmt_code)
809 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
810 && (first_stmt_code == ARRAY_REF
811 || first_stmt_code == BIT_FIELD_REF
812 || first_stmt_code == INDIRECT_REF
813 || first_stmt_code == COMPONENT_REF
814 || first_stmt_code == MEM_REF)))
816 if (dump_enabled_p ())
818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
819 "Build SLP failed: different operation "
820 "in stmt %G", stmt);
821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
822 "original stmt %G", first_stmt_info->stmt);
824 /* Mismatch. */
825 continue;
828 if (need_same_oprnds
829 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
831 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: different shift "
834 "arguments in %G", stmt);
835 /* Mismatch. */
836 continue;
839 if (rhs_code == CALL_EXPR)
841 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
842 as_a <gcall *> (stmt)))
844 if (dump_enabled_p ())
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: different calls in %G",
847 stmt);
848 /* Mismatch. */
849 continue;
854 /* Grouped store or load. */
855 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
857 if (REFERENCE_CLASS_P (lhs))
859 /* Store. */
862 else
864 /* Load. */
865 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
866 if (prev_first_load)
868 /* Check that there are no loads from different interleaving
869 chains in the same node. */
870 if (prev_first_load != first_load)
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
874 vect_location,
875 "Build SLP failed: different "
876 "interleaving chains in one node %G",
877 stmt);
878 /* Mismatch. */
879 continue;
882 else
883 prev_first_load = first_load;
885 } /* Grouped access. */
886 else
888 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
890 /* Not grouped load. */
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: not grouped load %G", stmt);
895 /* FORNOW: Not grouped loads are not supported. */
896 /* Fatal mismatch. */
897 matches[0] = false;
898 return false;
901 /* Not memory operation. */
902 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
903 && TREE_CODE_CLASS (rhs_code) != tcc_unary
904 && TREE_CODE_CLASS (rhs_code) != tcc_expression
905 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
906 && rhs_code != CALL_EXPR)
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 "Build SLP failed: operation unsupported %G",
911 stmt);
912 /* Fatal mismatch. */
913 matches[0] = false;
914 return false;
917 if (rhs_code == COND_EXPR)
919 tree cond_expr = gimple_assign_rhs1 (stmt);
920 enum tree_code cond_code = TREE_CODE (cond_expr);
921 enum tree_code swap_code = ERROR_MARK;
922 enum tree_code invert_code = ERROR_MARK;
924 if (i == 0)
925 first_cond_code = TREE_CODE (cond_expr);
926 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
928 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
929 swap_code = swap_tree_comparison (cond_code);
930 invert_code = invert_tree_comparison (cond_code, honor_nans);
933 if (first_cond_code == cond_code)
935 /* Isomorphic can be achieved by swapping. */
936 else if (first_cond_code == swap_code)
937 swap[i] = 1;
938 /* Isomorphic can be achieved by inverting. */
939 else if (first_cond_code == invert_code)
940 swap[i] = 2;
941 else
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945 "Build SLP failed: different"
946 " operation %G", stmt);
947 /* Mismatch. */
948 continue;
953 matches[i] = true;
956 for (i = 0; i < group_size; ++i)
957 if (!matches[i])
958 return false;
960 /* If we allowed a two-operation SLP node verify the target can cope
961 with the permute we are going to use. */
962 if (alt_stmt_code != ERROR_MARK
963 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
965 if (vectype == boolean_type_node
966 || !vect_two_operations_perm_ok_p (stmts, group_size,
967 vectype, alt_stmt_code))
969 for (i = 0; i < group_size; ++i)
970 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
972 matches[i] = false;
973 if (dump_enabled_p ())
975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
976 "Build SLP failed: different operation "
977 "in stmt %G", stmts[i]->stmt);
978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
979 "original stmt %G", first_stmt_info->stmt);
982 return false;
984 *two_operators = true;
987 return true;
990 /* Traits for the hash_set to record failed SLP builds for a stmt set.
991 Note we never remove apart from at destruction time so we do not
992 need a special value for deleted that differs from empty. */
993 struct bst_traits
995 typedef vec <stmt_vec_info> value_type;
996 typedef vec <stmt_vec_info> compare_type;
997 static inline hashval_t hash (value_type);
998 static inline bool equal (value_type existing, value_type candidate);
999 static inline bool is_empty (value_type x) { return !x.exists (); }
1000 static inline bool is_deleted (value_type x) { return !x.exists (); }
1001 static inline void mark_empty (value_type &x) { x.release (); }
1002 static inline void mark_deleted (value_type &x) { x.release (); }
1003 static inline void remove (value_type &x) { x.release (); }
1005 inline hashval_t
1006 bst_traits::hash (value_type x)
1008 inchash::hash h;
1009 for (unsigned i = 0; i < x.length (); ++i)
1010 h.add_int (gimple_uid (x[i]->stmt));
1011 return h.end ();
1013 inline bool
1014 bst_traits::equal (value_type existing, value_type candidate)
1016 if (existing.length () != candidate.length ())
1017 return false;
1018 for (unsigned i = 0; i < existing.length (); ++i)
1019 if (existing[i] != candidate[i])
1020 return false;
1021 return true;
1024 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1025 static scalar_stmts_set_t *bst_fail;
1027 typedef hash_map <vec <gimple *>, slp_tree,
1028 simple_hashmap_traits <bst_traits, slp_tree> >
1029 scalar_stmts_to_slp_tree_map_t;
1031 static slp_tree
1032 vect_build_slp_tree_2 (vec_info *vinfo,
1033 vec<stmt_vec_info> stmts, unsigned int group_size,
1034 poly_uint64 *max_nunits,
1035 vec<slp_tree> *loads,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
1037 unsigned max_tree_size);
1039 static slp_tree
1040 vect_build_slp_tree (vec_info *vinfo,
1041 vec<stmt_vec_info> stmts, unsigned int group_size,
1042 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1043 bool *matches, unsigned *npermutes, unsigned *tree_size,
1044 unsigned max_tree_size)
1046 if (bst_fail->contains (stmts))
1047 return NULL;
1048 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1049 loads, matches, npermutes, tree_size,
1050 max_tree_size);
1051 /* When SLP build fails for stmts record this, otherwise SLP build
1052 can be exponential in time when we allow to construct parts from
1053 scalars, see PR81723. */
1054 if (! res)
1056 vec <stmt_vec_info> x;
1057 x.create (stmts.length ());
1058 x.splice (stmts);
1059 bst_fail->add (x);
1061 return res;
1064 /* Recursively build an SLP tree starting from NODE.
1065 Fail (and return a value not equal to zero) if def-stmts are not
1066 isomorphic, require data permutation or are of unsupported types of
1067 operation. Otherwise, return 0.
1068 The value returned is the depth in the SLP tree where a mismatch
1069 was found. */
1071 static slp_tree
1072 vect_build_slp_tree_2 (vec_info *vinfo,
1073 vec<stmt_vec_info> stmts, unsigned int group_size,
1074 poly_uint64 *max_nunits,
1075 vec<slp_tree> *loads,
1076 bool *matches, unsigned *npermutes, unsigned *tree_size,
1077 unsigned max_tree_size)
1079 unsigned nops, i, this_tree_size = 0;
1080 poly_uint64 this_max_nunits = *max_nunits;
1081 slp_tree node;
1083 matches[0] = false;
1085 stmt_vec_info stmt_info = stmts[0];
1086 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1087 nops = gimple_call_num_args (stmt);
1088 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1090 nops = gimple_num_ops (stmt) - 1;
1091 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1092 nops++;
1094 else if (is_a <gphi *> (stmt_info->stmt))
1095 nops = 0;
1096 else
1097 return NULL;
1099 /* If the SLP node is a PHI (induction or reduction), terminate
1100 the recursion. */
1101 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1103 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1104 tree vectype = get_vectype_for_scalar_type (scalar_type);
1105 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1106 return NULL;
1108 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1109 /* Induction from different IVs is not supported. */
1110 if (def_type == vect_induction_def)
1112 stmt_vec_info other_info;
1113 FOR_EACH_VEC_ELT (stmts, i, other_info)
1114 if (stmt_info != other_info)
1115 return NULL;
1117 else
1119 /* Else def types have to match. */
1120 stmt_vec_info other_info;
1121 FOR_EACH_VEC_ELT (stmts, i, other_info)
1123 /* But for reduction chains only check on the first stmt. */
1124 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1125 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1126 continue;
1127 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1128 return NULL;
1131 node = vect_create_new_slp_node (stmts);
1132 return node;
1136 bool two_operators = false;
1137 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1138 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1139 &this_max_nunits, matches, &two_operators))
1140 return NULL;
1142 /* If the SLP node is a load, terminate the recursion. */
1143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1144 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1146 *max_nunits = this_max_nunits;
1147 node = vect_create_new_slp_node (stmts);
1148 loads->safe_push (node);
1149 return node;
1152 /* Get at the operands, verifying they are compatible. */
1153 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1154 slp_oprnd_info oprnd_info;
1155 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1157 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1158 stmts, i, &oprnds_info);
1159 if (res != 0)
1160 matches[(res == -1) ? 0 : i] = false;
1161 if (!matches[0])
1162 break;
1164 for (i = 0; i < group_size; ++i)
1165 if (!matches[i])
1167 vect_free_oprnd_info (oprnds_info);
1168 return NULL;
1171 auto_vec<slp_tree, 4> children;
1172 auto_vec<slp_tree> this_loads;
1174 stmt_info = stmts[0];
1176 if (tree_size)
1177 max_tree_size -= *tree_size;
1179 /* Create SLP_TREE nodes for the definition node/s. */
1180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1182 slp_tree child;
1183 unsigned old_nloads = this_loads.length ();
1184 unsigned old_tree_size = this_tree_size;
1185 unsigned int j;
1187 if (oprnd_info->first_dt != vect_internal_def
1188 && oprnd_info->first_dt != vect_reduction_def
1189 && oprnd_info->first_dt != vect_induction_def)
1190 continue;
1192 if (++this_tree_size > max_tree_size)
1194 FOR_EACH_VEC_ELT (children, j, child)
1195 vect_free_slp_tree (child, false);
1196 vect_free_oprnd_info (oprnds_info);
1197 return NULL;
1200 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1201 group_size, &this_max_nunits,
1202 &this_loads, matches, npermutes,
1203 &this_tree_size,
1204 max_tree_size)) != NULL)
1206 /* If we have all children of child built up from scalars then just
1207 throw that away and build it up this node from scalars. */
1208 if (!SLP_TREE_CHILDREN (child).is_empty ()
1209 /* ??? Rejecting patterns this way doesn't work. We'd have to
1210 do extra work to cancel the pattern so the uses see the
1211 scalar version. */
1212 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1214 slp_tree grandchild;
1216 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1217 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1218 break;
1219 if (!grandchild)
1221 /* Roll back. */
1222 this_loads.truncate (old_nloads);
1223 this_tree_size = old_tree_size;
1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1225 vect_free_slp_tree (grandchild, false);
1226 SLP_TREE_CHILDREN (child).truncate (0);
1228 dump_printf_loc (MSG_NOTE, vect_location,
1229 "Building parent vector operands from "
1230 "scalars instead\n");
1231 oprnd_info->def_stmts = vNULL;
1232 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1233 children.safe_push (child);
1234 continue;
1238 oprnd_info->def_stmts = vNULL;
1239 children.safe_push (child);
1240 continue;
1243 /* If the SLP build failed fatally and we analyze a basic-block
1244 simply treat nodes we fail to build as externally defined
1245 (and thus build vectors from the scalar defs).
1246 The cost model will reject outright expensive cases.
1247 ??? This doesn't treat cases where permutation ultimatively
1248 fails (or we don't try permutation below). Ideally we'd
1249 even compute a permutation that will end up with the maximum
1250 SLP tree size... */
1251 if (is_a <bb_vec_info> (vinfo)
1252 && !matches[0]
1253 /* ??? Rejecting patterns this way doesn't work. We'd have to
1254 do extra work to cancel the pattern so the uses see the
1255 scalar version. */
1256 && !is_pattern_stmt_p (stmt_info))
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "Building vector operands from scalars\n");
1260 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1261 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1262 children.safe_push (child);
1263 oprnd_info->def_stmts = vNULL;
1264 continue;
1267 /* If the SLP build for operand zero failed and operand zero
1268 and one can be commutated try that for the scalar stmts
1269 that failed the match. */
1270 if (i == 0
1271 /* A first scalar stmt mismatch signals a fatal mismatch. */
1272 && matches[0]
1273 /* ??? For COND_EXPRs we can swap the comparison operands
1274 as well as the arms under some constraints. */
1275 && nops == 2
1276 && oprnds_info[1]->first_dt == vect_internal_def
1277 && is_gimple_assign (stmt_info->stmt)
1278 /* Do so only if the number of not successful permutes was nor more
1279 than a cut-ff as re-trying the recursive match on
1280 possibly each level of the tree would expose exponential
1281 behavior. */
1282 && *npermutes < 4)
1284 /* See whether we can swap the matching or the non-matching
1285 stmt operands. */
1286 bool swap_not_matching = true;
1289 for (j = 0; j < group_size; ++j)
1291 if (matches[j] != !swap_not_matching)
1292 continue;
1293 stmt_vec_info stmt_info = stmts[j];
1294 /* Verify if we can swap operands of this stmt. */
1295 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1296 if (!stmt
1297 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1299 if (!swap_not_matching)
1300 goto fail;
1301 swap_not_matching = false;
1302 break;
1304 /* Verify if we can safely swap or if we committed to a
1305 specific operand order already.
1306 ??? Instead of modifying GIMPLE stmts here we could
1307 record whether we want to swap operands in the SLP
1308 node and temporarily do that when processing it
1309 (or wrap operand accessors in a helper). */
1310 else if (swap[j] != 0
1311 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1313 if (!swap_not_matching)
1315 if (dump_enabled_p ())
1316 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1317 vect_location,
1318 "Build SLP failed: cannot swap "
1319 "operands of shared stmt %G",
1320 stmts[j]->stmt);
1321 goto fail;
1323 swap_not_matching = false;
1324 break;
1328 while (j != group_size);
1330 /* Swap mismatched definition stmts. */
1331 dump_printf_loc (MSG_NOTE, vect_location,
1332 "Re-trying with swapped operands of stmts ");
1333 for (j = 0; j < group_size; ++j)
1334 if (matches[j] == !swap_not_matching)
1336 std::swap (oprnds_info[0]->def_stmts[j],
1337 oprnds_info[1]->def_stmts[j]);
1338 dump_printf (MSG_NOTE, "%d ", j);
1340 dump_printf (MSG_NOTE, "\n");
1341 /* And try again with scratch 'matches' ... */
1342 bool *tem = XALLOCAVEC (bool, group_size);
1343 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1344 group_size, &this_max_nunits,
1345 &this_loads, tem, npermutes,
1346 &this_tree_size,
1347 max_tree_size)) != NULL)
1349 /* ... so if successful we can apply the operand swapping
1350 to the GIMPLE IL. This is necessary because for example
1351 vect_get_slp_defs uses operand indexes and thus expects
1352 canonical operand order. This is also necessary even
1353 if we end up building the operand from scalars as
1354 we'll continue to process swapped operand two. */
1355 for (j = 0; j < group_size; ++j)
1356 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1357 for (j = 0; j < group_size; ++j)
1358 if (matches[j] == !swap_not_matching)
1360 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1361 /* Avoid swapping operands twice. */
1362 if (gimple_plf (stmt, GF_PLF_1))
1363 continue;
1364 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1365 gimple_assign_rhs2_ptr (stmt));
1366 gimple_set_plf (stmt, GF_PLF_1, true);
1368 /* Verify we swap all duplicates or none. */
1369 if (flag_checking)
1370 for (j = 0; j < group_size; ++j)
1371 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1372 == (matches[j] == !swap_not_matching));
1374 /* If we have all children of child built up from scalars then
1375 just throw that away and build it up this node from scalars. */
1376 if (!SLP_TREE_CHILDREN (child).is_empty ()
1377 /* ??? Rejecting patterns this way doesn't work. We'd have
1378 to do extra work to cancel the pattern so the uses see the
1379 scalar version. */
1380 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1382 unsigned int j;
1383 slp_tree grandchild;
1385 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1386 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1387 break;
1388 if (!grandchild)
1390 /* Roll back. */
1391 this_loads.truncate (old_nloads);
1392 this_tree_size = old_tree_size;
1393 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1394 vect_free_slp_tree (grandchild, false);
1395 SLP_TREE_CHILDREN (child).truncate (0);
1397 dump_printf_loc (MSG_NOTE, vect_location,
1398 "Building parent vector operands from "
1399 "scalars instead\n");
1400 oprnd_info->def_stmts = vNULL;
1401 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1402 children.safe_push (child);
1403 continue;
1407 oprnd_info->def_stmts = vNULL;
1408 children.safe_push (child);
1409 continue;
1412 ++*npermutes;
1415 fail:
1416 gcc_assert (child == NULL);
1417 FOR_EACH_VEC_ELT (children, j, child)
1418 vect_free_slp_tree (child, false);
1419 vect_free_oprnd_info (oprnds_info);
1420 return NULL;
1423 vect_free_oprnd_info (oprnds_info);
1425 if (tree_size)
1426 *tree_size += this_tree_size;
1427 *max_nunits = this_max_nunits;
1428 loads->safe_splice (this_loads);
1430 node = vect_create_new_slp_node (stmts);
1431 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1432 SLP_TREE_CHILDREN (node).splice (children);
1433 return node;
1436 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1438 static void
1439 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1440 slp_tree node)
1442 int i;
1443 stmt_vec_info stmt_info;
1444 slp_tree child;
1446 dump_printf_loc (dump_kind, loc, "node%s\n",
1447 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1448 ? " (external)" : "");
1449 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1450 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1452 vect_print_slp_tree (dump_kind, loc, child);
1456 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1457 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1458 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1459 stmts in NODE are to be marked. */
1461 static void
1462 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1464 int i;
1465 stmt_vec_info stmt_info;
1466 slp_tree child;
1468 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1469 return;
1471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1472 if (j < 0 || i == j)
1473 STMT_SLP_TYPE (stmt_info) = mark;
1475 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1476 vect_mark_slp_stmts (child, mark, j);
1480 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1482 static void
1483 vect_mark_slp_stmts_relevant (slp_tree node)
1485 int i;
1486 stmt_vec_info stmt_info;
1487 slp_tree child;
1489 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1490 return;
1492 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1494 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1495 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1496 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1499 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1500 vect_mark_slp_stmts_relevant (child);
1504 /* Rearrange the statements of NODE according to PERMUTATION. */
1506 static void
1507 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1508 vec<unsigned> permutation)
1510 stmt_vec_info stmt_info;
1511 vec<stmt_vec_info> tmp_stmts;
1512 unsigned int i;
1513 slp_tree child;
1515 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1516 vect_slp_rearrange_stmts (child, group_size, permutation);
1518 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1519 tmp_stmts.create (group_size);
1520 tmp_stmts.quick_grow_cleared (group_size);
1522 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1523 tmp_stmts[permutation[i]] = stmt_info;
1525 SLP_TREE_SCALAR_STMTS (node).release ();
1526 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1530 /* Attempt to reorder stmts in a reduction chain so that we don't
1531 require any load permutation. Return true if that was possible,
1532 otherwise return false. */
1534 static bool
1535 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1537 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1538 unsigned int i, j;
1539 unsigned int lidx;
1540 slp_tree node, load;
1542 /* Compare all the permutation sequences to the first one. We know
1543 that at least one load is permuted. */
1544 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1545 if (!node->load_permutation.exists ())
1546 return false;
1547 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1549 if (!load->load_permutation.exists ())
1550 return false;
1551 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1552 if (lidx != node->load_permutation[j])
1553 return false;
1556 /* Check that the loads in the first sequence are different and there
1557 are no gaps between them. */
1558 auto_sbitmap load_index (group_size);
1559 bitmap_clear (load_index);
1560 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1562 if (lidx >= group_size)
1563 return false;
1564 if (bitmap_bit_p (load_index, lidx))
1565 return false;
1567 bitmap_set_bit (load_index, lidx);
1569 for (i = 0; i < group_size; i++)
1570 if (!bitmap_bit_p (load_index, i))
1571 return false;
1573 /* This permutation is valid for reduction. Since the order of the
1574 statements in the nodes is not important unless they are memory
1575 accesses, we can rearrange the statements in all the nodes
1576 according to the order of the loads. */
1577 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1578 node->load_permutation);
1580 /* We are done, no actual permutations need to be generated. */
1581 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1582 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1584 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1585 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1586 /* But we have to keep those permutations that are required because
1587 of handling of gaps. */
1588 if (known_eq (unrolling_factor, 1U)
1589 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1590 && DR_GROUP_GAP (first_stmt_info) == 0))
1591 SLP_TREE_LOAD_PERMUTATION (node).release ();
1592 else
1593 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1594 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1597 return true;
1600 /* Check if the required load permutations in the SLP instance
1601 SLP_INSTN are supported. */
1603 static bool
1604 vect_supported_load_permutation_p (slp_instance slp_instn)
1606 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1607 unsigned int i, j, k, next;
1608 slp_tree node;
1610 if (dump_enabled_p ())
1612 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1613 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1614 if (node->load_permutation.exists ())
1615 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1616 dump_printf (MSG_NOTE, "%d ", next);
1617 else
1618 for (k = 0; k < group_size; ++k)
1619 dump_printf (MSG_NOTE, "%d ", k);
1620 dump_printf (MSG_NOTE, "\n");
1623 /* In case of reduction every load permutation is allowed, since the order
1624 of the reduction statements is not important (as opposed to the case of
1625 grouped stores). The only condition we need to check is that all the
1626 load nodes are of the same size and have the same permutation (and then
1627 rearrange all the nodes of the SLP instance according to this
1628 permutation). */
1630 /* Check that all the load nodes are of the same size. */
1631 /* ??? Can't we assert this? */
1632 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1633 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1634 return false;
1636 node = SLP_INSTANCE_TREE (slp_instn);
1637 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1639 /* Reduction (there are no data-refs in the root).
1640 In reduction chain the order of the loads is not important. */
1641 if (!STMT_VINFO_DATA_REF (stmt_info)
1642 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1643 vect_attempt_slp_rearrange_stmts (slp_instn);
1645 /* In basic block vectorization we allow any subchain of an interleaving
1646 chain.
1647 FORNOW: not supported in loop SLP because of realignment compications. */
1648 if (STMT_VINFO_BB_VINFO (stmt_info))
1650 /* Check whether the loads in an instance form a subchain and thus
1651 no permutation is necessary. */
1652 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1654 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1655 continue;
1656 bool subchain_p = true;
1657 stmt_vec_info next_load_info = NULL;
1658 stmt_vec_info load_info;
1659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1661 if (j != 0
1662 && (next_load_info != load_info
1663 || DR_GROUP_GAP (load_info) != 1))
1665 subchain_p = false;
1666 break;
1668 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1670 if (subchain_p)
1671 SLP_TREE_LOAD_PERMUTATION (node).release ();
1672 else
1674 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1675 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1676 unsigned HOST_WIDE_INT nunits;
1677 unsigned k, maxk = 0;
1678 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1679 if (k > maxk)
1680 maxk = k;
1681 /* In BB vectorization we may not actually use a loaded vector
1682 accessing elements in excess of DR_GROUP_SIZE. */
1683 tree vectype = STMT_VINFO_VECTYPE (group_info);
1684 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1685 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1688 "BB vectorization with gaps at the end of "
1689 "a load is not supported\n");
1690 return false;
1693 /* Verify the permutation can be generated. */
1694 vec<tree> tem;
1695 unsigned n_perms;
1696 if (!vect_transform_slp_perm_load (node, tem, NULL,
1697 1, slp_instn, true, &n_perms))
1699 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1700 vect_location,
1701 "unsupported load permutation\n");
1702 return false;
1706 return true;
1709 /* For loop vectorization verify we can generate the permutation. Be
1710 conservative about the vectorization factor, there are permutations
1711 that will use three vector inputs only starting from a specific factor
1712 and the vectorization factor is not yet final.
1713 ??? The SLP instance unrolling factor might not be the maximum one. */
1714 unsigned n_perms;
1715 poly_uint64 test_vf
1716 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1717 LOOP_VINFO_VECT_FACTOR
1718 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1719 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1720 if (node->load_permutation.exists ()
1721 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1722 slp_instn, true, &n_perms))
1723 return false;
1725 return true;
1729 /* Find the last store in SLP INSTANCE. */
1731 stmt_vec_info
1732 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1734 stmt_vec_info last = NULL;
1735 stmt_vec_info stmt_vinfo;
1737 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1739 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1740 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1743 return last;
1746 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1747 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1748 (also containing the first GROUP1_SIZE stmts, since stores are
1749 consecutive), the second containing the remainder.
1750 Return the first stmt in the second group. */
1752 static stmt_vec_info
1753 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1755 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1756 gcc_assert (group1_size > 0);
1757 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1758 gcc_assert (group2_size > 0);
1759 DR_GROUP_SIZE (first_vinfo) = group1_size;
1761 stmt_vec_info stmt_info = first_vinfo;
1762 for (unsigned i = group1_size; i > 1; i--)
1764 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1765 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1767 /* STMT is now the last element of the first group. */
1768 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1769 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1771 DR_GROUP_SIZE (group2) = group2_size;
1772 for (stmt_info = group2; stmt_info;
1773 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1775 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1776 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1779 /* For the second group, the DR_GROUP_GAP is that before the original group,
1780 plus skipping over the first vector. */
1781 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1783 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1784 DR_GROUP_GAP (first_vinfo) += group2_size;
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1788 group1_size, group2_size);
1790 return group2;
1793 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1794 statements and a vector of NUNITS elements. */
1796 static poly_uint64
1797 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1799 return exact_div (common_multiple (nunits, group_size), group_size);
1802 /* Analyze an SLP instance starting from a group of grouped stores. Call
1803 vect_build_slp_tree to build a tree of packed stmts if possible.
1804 Return FALSE if it's impossible to SLP any stmt in the loop. */
1806 static bool
1807 vect_analyze_slp_instance (vec_info *vinfo,
1808 stmt_vec_info stmt_info, unsigned max_tree_size)
1810 slp_instance new_instance;
1811 slp_tree node;
1812 unsigned int group_size;
1813 tree vectype, scalar_type = NULL_TREE;
1814 unsigned int i;
1815 vec<slp_tree> loads;
1816 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1817 vec<stmt_vec_info> scalar_stmts;
1819 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1821 scalar_type = TREE_TYPE (DR_REF (dr));
1822 vectype = get_vectype_for_scalar_type (scalar_type);
1823 group_size = DR_GROUP_SIZE (stmt_info);
1825 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1827 gcc_assert (is_a <loop_vec_info> (vinfo));
1828 vectype = STMT_VINFO_VECTYPE (stmt_info);
1829 group_size = REDUC_GROUP_SIZE (stmt_info);
1831 else
1833 gcc_assert (is_a <loop_vec_info> (vinfo));
1834 vectype = STMT_VINFO_VECTYPE (stmt_info);
1835 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1838 if (!vectype)
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "Build SLP failed: unsupported data-type %T\n",
1843 scalar_type);
1845 return false;
1847 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1849 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1850 scalar_stmts.create (group_size);
1851 stmt_vec_info next_info = stmt_info;
1852 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1854 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1855 while (next_info)
1857 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1858 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1861 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1863 /* Collect the reduction stmts and store them in
1864 SLP_TREE_SCALAR_STMTS. */
1865 while (next_info)
1867 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1868 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1870 /* Mark the first element of the reduction chain as reduction to properly
1871 transform the node. In the reduction analysis phase only the last
1872 element of the chain is marked as reduction. */
1873 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1875 else
1877 /* Collect reduction statements. */
1878 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1879 for (i = 0; reductions.iterate (i, &next_info); i++)
1880 scalar_stmts.safe_push (next_info);
1883 loads.create (group_size);
1885 /* Build the tree for the SLP instance. */
1886 bool *matches = XALLOCAVEC (bool, group_size);
1887 unsigned npermutes = 0;
1888 bst_fail = new scalar_stmts_set_t ();
1889 poly_uint64 max_nunits = nunits;
1890 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1891 &max_nunits, &loads, matches, &npermutes,
1892 NULL, max_tree_size);
1893 delete bst_fail;
1894 if (node != NULL)
1896 /* Calculate the unrolling factor based on the smallest type. */
1897 poly_uint64 unrolling_factor
1898 = calculate_unrolling_factor (max_nunits, group_size);
1900 if (maybe_ne (unrolling_factor, 1U)
1901 && is_a <bb_vec_info> (vinfo))
1903 unsigned HOST_WIDE_INT const_max_nunits;
1904 if (!max_nunits.is_constant (&const_max_nunits)
1905 || const_max_nunits > group_size)
1907 if (dump_enabled_p ())
1908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1909 "Build SLP failed: store group "
1910 "size not a multiple of the vector size "
1911 "in basic block SLP\n");
1912 vect_free_slp_tree (node, false);
1913 loads.release ();
1914 return false;
1916 /* Fatal mismatch. */
1917 matches[group_size / const_max_nunits * const_max_nunits] = false;
1918 vect_free_slp_tree (node, false);
1919 loads.release ();
1921 else
1923 /* Create a new SLP instance. */
1924 new_instance = XNEW (struct _slp_instance);
1925 SLP_INSTANCE_TREE (new_instance) = node;
1926 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1927 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1928 SLP_INSTANCE_LOADS (new_instance) = loads;
1930 /* Compute the load permutation. */
1931 slp_tree load_node;
1932 bool loads_permuted = false;
1933 FOR_EACH_VEC_ELT (loads, i, load_node)
1935 vec<unsigned> load_permutation;
1936 int j;
1937 stmt_vec_info load_info;
1938 bool this_load_permuted = false;
1939 load_permutation.create (group_size);
1940 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
1941 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1942 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
1944 int load_place = vect_get_place_in_interleaving_chain
1945 (load_info, first_stmt_info);
1946 gcc_assert (load_place != -1);
1947 if (load_place != j)
1948 this_load_permuted = true;
1949 load_permutation.safe_push (load_place);
1951 if (!this_load_permuted
1952 /* The load requires permutation when unrolling exposes
1953 a gap either because the group is larger than the SLP
1954 group-size or because there is a gap between the groups. */
1955 && (known_eq (unrolling_factor, 1U)
1956 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1957 && DR_GROUP_GAP (first_stmt_info) == 0)))
1959 load_permutation.release ();
1960 continue;
1962 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1963 loads_permuted = true;
1966 if (loads_permuted)
1968 if (!vect_supported_load_permutation_p (new_instance))
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "Build SLP failed: unsupported load "
1973 "permutation %G", stmt_info->stmt);
1974 vect_free_slp_instance (new_instance, false);
1975 return false;
1979 /* If the loads and stores can be handled with load/store-lan
1980 instructions do not generate this SLP instance. */
1981 if (is_a <loop_vec_info> (vinfo)
1982 && loads_permuted
1983 && dr && vect_store_lanes_supported (vectype, group_size, false))
1985 slp_tree load_node;
1986 FOR_EACH_VEC_ELT (loads, i, load_node)
1988 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
1989 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1990 /* Use SLP for strided accesses (or if we can't load-lanes). */
1991 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1992 || ! vect_load_lanes_supported
1993 (STMT_VINFO_VECTYPE (stmt_vinfo),
1994 DR_GROUP_SIZE (stmt_vinfo), false))
1995 break;
1997 if (i == loads.length ())
1999 if (dump_enabled_p ())
2000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2001 "Built SLP cancelled: can use "
2002 "load/store-lanes\n");
2003 vect_free_slp_instance (new_instance, false);
2004 return false;
2008 vinfo->slp_instances.safe_push (new_instance);
2010 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_NOTE, vect_location,
2013 "Final SLP tree for instance:\n");
2014 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2017 return true;
2020 else
2022 /* Failed to SLP. */
2023 /* Free the allocated memory. */
2024 scalar_stmts.release ();
2025 loads.release ();
2028 /* For basic block SLP, try to break the group up into multiples of the
2029 vector size. */
2030 unsigned HOST_WIDE_INT const_nunits;
2031 if (is_a <bb_vec_info> (vinfo)
2032 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2033 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2034 && nunits.is_constant (&const_nunits))
2036 /* We consider breaking the group only on VF boundaries from the existing
2037 start. */
2038 for (i = 0; i < group_size; i++)
2039 if (!matches[i]) break;
2041 if (i >= const_nunits && i < group_size)
2043 /* Split into two groups at the first vector boundary before i. */
2044 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2045 unsigned group1_size = i & ~(const_nunits - 1);
2047 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2048 group1_size);
2049 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2050 max_tree_size);
2051 /* If the first non-match was in the middle of a vector,
2052 skip the rest of that vector. */
2053 if (group1_size < i)
2055 i = group1_size + const_nunits;
2056 if (i < group_size)
2057 rest = vect_split_slp_store_group (rest, const_nunits);
2059 if (i < group_size)
2060 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2061 return res;
2063 /* Even though the first vector did not all match, we might be able to SLP
2064 (some) of the remainder. FORNOW ignore this possibility. */
2067 return false;
2071 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2072 trees of packed scalar stmts if SLP is possible. */
2074 opt_result
2075 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2077 unsigned int i;
2078 stmt_vec_info first_element;
2080 DUMP_VECT_SCOPE ("vect_analyze_slp");
2082 /* Find SLP sequences starting from groups of grouped stores. */
2083 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2084 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2086 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2088 if (loop_vinfo->reduction_chains.length () > 0)
2090 /* Find SLP sequences starting from reduction chains. */
2091 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2092 if (! vect_analyze_slp_instance (vinfo, first_element,
2093 max_tree_size))
2095 /* Dissolve reduction chain group. */
2096 stmt_vec_info vinfo = first_element;
2097 while (vinfo)
2099 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2100 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2101 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2102 vinfo = next;
2104 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2108 /* Find SLP sequences starting from groups of reductions. */
2109 if (loop_vinfo->reductions.length () > 1)
2110 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2111 max_tree_size);
2114 return opt_result::success ();
2118 /* For each possible SLP instance decide whether to SLP it and calculate overall
2119 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2120 least one instance. */
2122 bool
2123 vect_make_slp_decision (loop_vec_info loop_vinfo)
2125 unsigned int i;
2126 poly_uint64 unrolling_factor = 1;
2127 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2128 slp_instance instance;
2129 int decided_to_slp = 0;
2131 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2133 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2135 /* FORNOW: SLP if you can. */
2136 /* All unroll factors have the form current_vector_size * X for some
2137 rational X, so they must have a common multiple. */
2138 unrolling_factor
2139 = force_common_multiple (unrolling_factor,
2140 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2142 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2143 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2144 loop-based vectorization. Such stmts will be marked as HYBRID. */
2145 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2146 decided_to_slp++;
2149 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2151 if (decided_to_slp && dump_enabled_p ())
2153 dump_printf_loc (MSG_NOTE, vect_location,
2154 "Decided to SLP %d instances. Unrolling factor ",
2155 decided_to_slp);
2156 dump_dec (MSG_NOTE, unrolling_factor);
2157 dump_printf (MSG_NOTE, "\n");
2160 return (decided_to_slp > 0);
2164 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2165 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2167 static void
2168 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2170 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2171 imm_use_iterator imm_iter;
2172 gimple *use_stmt;
2173 stmt_vec_info use_vinfo;
2174 slp_tree child;
2175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2176 int j;
2178 /* Propagate hybrid down the SLP tree. */
2179 if (stype == hybrid)
2181 else if (HYBRID_SLP_STMT (stmt_vinfo))
2182 stype = hybrid;
2183 else
2185 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2186 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2187 /* If we get a pattern stmt here we have to use the LHS of the
2188 original stmt for immediate uses. */
2189 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2190 tree def;
2191 if (gimple_code (stmt) == GIMPLE_PHI)
2192 def = gimple_phi_result (stmt);
2193 else
2194 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2195 if (def)
2196 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2198 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2199 if (!use_vinfo)
2200 continue;
2201 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2202 if (!STMT_SLP_TYPE (use_vinfo)
2203 && (STMT_VINFO_RELEVANT (use_vinfo)
2204 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2205 && !(gimple_code (use_stmt) == GIMPLE_PHI
2206 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2208 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2210 "def in non-SLP stmt: %G", use_stmt);
2211 stype = hybrid;
2216 if (stype == hybrid
2217 && !HYBRID_SLP_STMT (stmt_vinfo))
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2221 stmt_vinfo->stmt);
2222 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2225 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2226 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2227 vect_detect_hybrid_slp_stmts (child, i, stype);
2230 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2232 static tree
2233 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2235 walk_stmt_info *wi = (walk_stmt_info *)data;
2236 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2238 if (wi->is_lhs)
2239 return NULL_TREE;
2241 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2242 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2246 def_stmt_info->stmt);
2247 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2250 return NULL_TREE;
2253 static tree
2254 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2255 walk_stmt_info *wi)
2257 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2258 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2259 /* If the stmt is in a SLP instance then this isn't a reason
2260 to mark use definitions in other SLP instances as hybrid. */
2261 if (! STMT_SLP_TYPE (use_vinfo)
2262 && (STMT_VINFO_RELEVANT (use_vinfo)
2263 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2264 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2265 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2267 else
2268 *handled = true;
2269 return NULL_TREE;
2272 /* Find stmts that must be both vectorized and SLPed. */
2274 void
2275 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2277 unsigned int i;
2278 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2279 slp_instance instance;
2281 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2283 /* First walk all pattern stmt in the loop and mark defs of uses as
2284 hybrid because immediate uses in them are not recorded. */
2285 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2287 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2288 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2289 gsi_next (&gsi))
2291 gimple *stmt = gsi_stmt (gsi);
2292 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2293 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2295 walk_stmt_info wi;
2296 memset (&wi, 0, sizeof (wi));
2297 wi.info = loop_vinfo;
2298 gimple_stmt_iterator gsi2
2299 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2300 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2301 vect_detect_hybrid_slp_1, &wi);
2302 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2303 vect_detect_hybrid_slp_2,
2304 vect_detect_hybrid_slp_1, &wi);
2309 /* Then walk the SLP instance trees marking stmts with uses in
2310 non-SLP stmts as hybrid, also propagating hybrid down the
2311 SLP tree, collecting the above info on-the-fly. */
2312 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2314 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2315 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2316 i, pure_slp);
2321 /* Initialize a bb_vec_info struct for the statements between
2322 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2324 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2325 gimple_stmt_iterator region_end_in,
2326 vec_info_shared *shared)
2327 : vec_info (vec_info::bb, init_cost (NULL), shared),
2328 bb (gsi_bb (region_begin_in)),
2329 region_begin (region_begin_in),
2330 region_end (region_end_in)
2332 gimple_stmt_iterator gsi;
2334 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2335 gsi_next (&gsi))
2337 gimple *stmt = gsi_stmt (gsi);
2338 gimple_set_uid (stmt, 0);
2339 add_stmt (stmt);
2342 bb->aux = this;
2346 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2347 stmts in the basic block. */
2349 _bb_vec_info::~_bb_vec_info ()
2351 for (gimple_stmt_iterator si = region_begin;
2352 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2353 /* Reset region marker. */
2354 gimple_set_uid (gsi_stmt (si), -1);
2356 bb->aux = NULL;
2359 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2360 given then that child nodes have already been processed, and that
2361 their def types currently match their SLP node's def type. */
2363 static bool
2364 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2365 slp_instance node_instance,
2366 stmt_vector_for_cost *cost_vec)
2368 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2369 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2371 /* For BB vectorization vector types are assigned here.
2372 Memory accesses already got their vector type assigned
2373 in vect_analyze_data_refs. */
2374 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2375 if (bb_vinfo
2376 && ! STMT_VINFO_DATA_REF (stmt_info))
2378 tree vectype, nunits_vectype;
2379 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2380 &nunits_vectype))
2381 /* We checked this when building the node. */
2382 gcc_unreachable ();
2383 if (vectype == boolean_type_node)
2385 vectype = vect_get_mask_type_for_stmt (stmt_info);
2386 if (!vectype)
2387 /* vect_get_mask_type_for_stmt has already explained the
2388 failure. */
2389 return false;
2392 stmt_vec_info sstmt_info;
2393 unsigned int i;
2394 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2395 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2398 /* Calculate the number of vector statements to be created for the
2399 scalar stmts in this node. For SLP reductions it is equal to the
2400 number of vector statements in the children (which has already been
2401 calculated by the recursive call). Otherwise it is the number of
2402 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2403 VF divided by the number of elements in a vector. */
2404 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2405 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2406 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2407 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2408 else
2410 poly_uint64 vf;
2411 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2412 vf = loop_vinfo->vectorization_factor;
2413 else
2414 vf = 1;
2415 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2416 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2417 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2418 = vect_get_num_vectors (vf * group_size, vectype);
2421 bool dummy;
2422 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2425 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2426 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2428 Return true if the operations are supported. */
2430 static bool
2431 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2432 slp_instance node_instance,
2433 scalar_stmts_to_slp_tree_map_t *visited,
2434 scalar_stmts_to_slp_tree_map_t *lvisited,
2435 stmt_vector_for_cost *cost_vec)
2437 int i, j;
2438 slp_tree child;
2440 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2441 return true;
2443 /* If we already analyzed the exact same set of scalar stmts we're done.
2444 We share the generated vector stmts for those. */
2445 slp_tree *leader;
2446 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2447 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2449 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2450 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2451 return true;
2454 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2455 doesn't result in any issue since we throw away the lvisited set
2456 when we fail. */
2457 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2459 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2460 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2461 visited, lvisited, cost_vec))
2462 return false;
2464 /* Push SLP node def-type to stmt operands. */
2465 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2466 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2467 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2468 = SLP_TREE_DEF_TYPE (child);
2469 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2470 cost_vec);
2471 /* Restore def-types. */
2472 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2473 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2474 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2475 = vect_internal_def;
2476 if (! res)
2477 return false;
2479 return true;
2483 /* Analyze statements in SLP instances of VINFO. Return true if the
2484 operations are supported. */
2486 bool
2487 vect_slp_analyze_operations (vec_info *vinfo)
2489 slp_instance instance;
2490 int i;
2492 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2494 scalar_stmts_to_slp_tree_map_t *visited
2495 = new scalar_stmts_to_slp_tree_map_t ();
2496 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2498 scalar_stmts_to_slp_tree_map_t lvisited;
2499 stmt_vector_for_cost cost_vec;
2500 cost_vec.create (2);
2501 if (!vect_slp_analyze_node_operations (vinfo,
2502 SLP_INSTANCE_TREE (instance),
2503 instance, visited, &lvisited,
2504 &cost_vec))
2506 slp_tree node = SLP_INSTANCE_TREE (instance);
2507 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2508 dump_printf_loc (MSG_NOTE, vect_location,
2509 "removing SLP instance operations starting from: %G",
2510 stmt_info->stmt);
2511 vect_free_slp_instance (instance, false);
2512 vinfo->slp_instances.ordered_remove (i);
2513 cost_vec.release ();
2515 else
2517 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2518 x != lvisited.end(); ++x)
2519 visited->put ((*x).first.copy (), (*x).second);
2520 i++;
2522 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2523 cost_vec.release ();
2526 delete visited;
2528 return !vinfo->slp_instances.is_empty ();
2532 /* Compute the scalar cost of the SLP node NODE and its children
2533 and return it. Do not account defs that are marked in LIFE and
2534 update LIFE according to uses of NODE. */
2536 static void
2537 vect_bb_slp_scalar_cost (basic_block bb,
2538 slp_tree node, vec<bool, va_heap> *life,
2539 stmt_vector_for_cost *cost_vec)
2541 unsigned i;
2542 stmt_vec_info stmt_info;
2543 slp_tree child;
2545 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2547 gimple *stmt = stmt_info->stmt;
2548 vec_info *vinfo = stmt_info->vinfo;
2549 ssa_op_iter op_iter;
2550 def_operand_p def_p;
2552 if ((*life)[i])
2553 continue;
2555 /* If there is a non-vectorized use of the defs then the scalar
2556 stmt is kept live in which case we do not account it or any
2557 required defs in the SLP children in the scalar cost. This
2558 way we make the vectorization more costly when compared to
2559 the scalar cost. */
2560 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2562 imm_use_iterator use_iter;
2563 gimple *use_stmt;
2564 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2565 if (!is_gimple_debug (use_stmt))
2567 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2568 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2570 (*life)[i] = true;
2571 BREAK_FROM_IMM_USE_STMT (use_iter);
2575 if ((*life)[i])
2576 continue;
2578 /* Count scalar stmts only once. */
2579 if (gimple_visited_p (stmt))
2580 continue;
2581 gimple_set_visited (stmt, true);
2583 vect_cost_for_stmt kind;
2584 if (STMT_VINFO_DATA_REF (stmt_info))
2586 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2587 kind = scalar_load;
2588 else
2589 kind = scalar_store;
2591 else
2592 kind = scalar_stmt;
2593 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2596 auto_vec<bool, 20> subtree_life;
2597 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2599 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2601 /* Do not directly pass LIFE to the recursive call, copy it to
2602 confine changes in the callee to the current child/subtree. */
2603 subtree_life.safe_splice (*life);
2604 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2605 subtree_life.truncate (0);
2610 /* Check if vectorization of the basic block is profitable. */
2612 static bool
2613 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2615 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2616 slp_instance instance;
2617 int i;
2618 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2619 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2621 /* Calculate scalar cost. */
2622 stmt_vector_for_cost scalar_costs;
2623 scalar_costs.create (0);
2624 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2626 auto_vec<bool, 20> life;
2627 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2628 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2629 SLP_INSTANCE_TREE (instance),
2630 &life, &scalar_costs);
2632 void *target_cost_data = init_cost (NULL);
2633 add_stmt_costs (target_cost_data, &scalar_costs);
2634 scalar_costs.release ();
2635 unsigned dummy;
2636 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2637 destroy_cost_data (target_cost_data);
2639 /* Unset visited flag. */
2640 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2641 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2642 gimple_set_visited (gsi_stmt (gsi), false);
2644 /* Complete the target-specific cost calculation. */
2645 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2646 &vec_inside_cost, &vec_epilogue_cost);
2648 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2650 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2653 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2654 vec_inside_cost);
2655 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2656 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2657 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2660 /* Vectorization is profitable if its cost is more than the cost of scalar
2661 version. Note that we err on the vector side for equal cost because
2662 the cost estimate is otherwise quite pessimistic (constant uses are
2663 free on the scalar side but cost a load on the vector side for
2664 example). */
2665 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2666 return false;
2668 return true;
2671 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2672 if so and sets fatal to true if failure is independent of
2673 current_vector_size. */
2675 static bb_vec_info
2676 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2677 gimple_stmt_iterator region_end,
2678 vec<data_reference_p> datarefs, int n_stmts,
2679 bool &fatal, vec_info_shared *shared)
2681 bb_vec_info bb_vinfo;
2682 slp_instance instance;
2683 int i;
2684 poly_uint64 min_vf = 2;
2686 /* The first group of checks is independent of the vector size. */
2687 fatal = true;
2689 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "not vectorized: too many instructions in "
2694 "basic block.\n");
2695 free_data_refs (datarefs);
2696 return NULL;
2699 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2700 if (!bb_vinfo)
2701 return NULL;
2703 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2704 bb_vinfo->shared->save_datarefs ();
2706 /* Analyze the data references. */
2708 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "not vectorized: unhandled data-ref in basic "
2713 "block.\n");
2715 delete bb_vinfo;
2716 return NULL;
2719 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "not vectorized: not enough data-refs in "
2724 "basic block.\n");
2726 delete bb_vinfo;
2727 return NULL;
2730 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2732 if (dump_enabled_p ())
2733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2734 "not vectorized: unhandled data access in "
2735 "basic block.\n");
2737 delete bb_vinfo;
2738 return NULL;
2741 /* If there are no grouped stores in the region there is no need
2742 to continue with pattern recog as vect_analyze_slp will fail
2743 anyway. */
2744 if (bb_vinfo->grouped_stores.is_empty ())
2746 if (dump_enabled_p ())
2747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2748 "not vectorized: no grouped stores in "
2749 "basic block.\n");
2751 delete bb_vinfo;
2752 return NULL;
2755 /* While the rest of the analysis below depends on it in some way. */
2756 fatal = false;
2758 vect_pattern_recog (bb_vinfo);
2760 /* Check the SLP opportunities in the basic block, analyze and build SLP
2761 trees. */
2762 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2764 if (dump_enabled_p ())
2766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2767 "Failed to SLP the basic block.\n");
2768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2769 "not vectorized: failed to find SLP opportunities "
2770 "in basic block.\n");
2773 delete bb_vinfo;
2774 return NULL;
2777 vect_record_base_alignments (bb_vinfo);
2779 /* Analyze and verify the alignment of data references and the
2780 dependence in the SLP instances. */
2781 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2783 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2784 || ! vect_slp_analyze_instance_dependence (instance))
2786 slp_tree node = SLP_INSTANCE_TREE (instance);
2787 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2788 dump_printf_loc (MSG_NOTE, vect_location,
2789 "removing SLP instance operations starting from: %G",
2790 stmt_info->stmt);
2791 vect_free_slp_instance (instance, false);
2792 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2793 continue;
2796 /* Mark all the statements that we want to vectorize as pure SLP and
2797 relevant. */
2798 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2799 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2801 i++;
2803 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2805 delete bb_vinfo;
2806 return NULL;
2809 if (!vect_slp_analyze_operations (bb_vinfo))
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2813 "not vectorized: bad operation in basic block.\n");
2815 delete bb_vinfo;
2816 return NULL;
2819 /* Cost model: check if the vectorization is worthwhile. */
2820 if (!unlimited_cost_model (NULL)
2821 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2825 "not vectorized: vectorization is not "
2826 "profitable.\n");
2828 delete bb_vinfo;
2829 return NULL;
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_NOTE, vect_location,
2834 "Basic block will be vectorized using SLP\n");
2836 return bb_vinfo;
2840 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2841 true if anything in the basic-block was vectorized. */
2843 bool
2844 vect_slp_bb (basic_block bb)
2846 bb_vec_info bb_vinfo;
2847 gimple_stmt_iterator gsi;
2848 bool any_vectorized = false;
2849 auto_vector_sizes vector_sizes;
2851 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2853 /* Autodetect first vector size we try. */
2854 current_vector_size = 0;
2855 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2856 unsigned int next_size = 0;
2858 gsi = gsi_start_bb (bb);
2860 poly_uint64 autodetected_vector_size = 0;
2861 while (1)
2863 if (gsi_end_p (gsi))
2864 break;
2866 gimple_stmt_iterator region_begin = gsi;
2867 vec<data_reference_p> datarefs = vNULL;
2868 int insns = 0;
2870 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2872 gimple *stmt = gsi_stmt (gsi);
2873 if (is_gimple_debug (stmt))
2874 continue;
2875 insns++;
2877 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2878 vect_location = stmt;
2880 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
2881 break;
2884 /* Skip leading unhandled stmts. */
2885 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2887 gsi_next (&gsi);
2888 continue;
2891 gimple_stmt_iterator region_end = gsi;
2893 bool vectorized = false;
2894 bool fatal = false;
2895 vec_info_shared shared;
2896 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2897 datarefs, insns, fatal, &shared);
2898 if (bb_vinfo
2899 && dbg_cnt (vect_slp))
2901 if (dump_enabled_p ())
2902 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2904 bb_vinfo->shared->check_datarefs ();
2905 vect_schedule_slp (bb_vinfo);
2907 unsigned HOST_WIDE_INT bytes;
2908 if (current_vector_size.is_constant (&bytes))
2909 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2910 "basic block part vectorized using %wu byte "
2911 "vectors\n", bytes);
2912 else
2913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2914 "basic block part vectorized using variable "
2915 "length vectors\n");
2917 vectorized = true;
2919 delete bb_vinfo;
2921 any_vectorized |= vectorized;
2923 if (next_size == 0)
2924 autodetected_vector_size = current_vector_size;
2926 if (next_size < vector_sizes.length ()
2927 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2928 next_size += 1;
2930 if (vectorized
2931 || next_size == vector_sizes.length ()
2932 || known_eq (current_vector_size, 0U)
2933 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2934 vector sizes will fail do not bother iterating. */
2935 || fatal)
2937 if (gsi_end_p (region_end))
2938 break;
2940 /* Skip the unhandled stmt. */
2941 gsi_next (&gsi);
2943 /* And reset vector sizes. */
2944 current_vector_size = 0;
2945 next_size = 0;
2947 else
2949 /* Try the next biggest vector size. */
2950 current_vector_size = vector_sizes[next_size++];
2951 if (dump_enabled_p ())
2953 dump_printf_loc (MSG_NOTE, vect_location,
2954 "***** Re-trying analysis with "
2955 "vector size ");
2956 dump_dec (MSG_NOTE, current_vector_size);
2957 dump_printf (MSG_NOTE, "\n");
2960 /* Start over. */
2961 gsi = region_begin;
2965 return any_vectorized;
2969 /* Return 1 if vector type of boolean constant which is OPNUM
2970 operand in statement STMT_VINFO is a boolean vector. */
2972 static bool
2973 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
2975 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
2976 tree op, vectype;
2977 enum vect_def_type dt;
2979 /* For comparison and COND_EXPR type is chosen depending
2980 on the other comparison operand. */
2981 if (TREE_CODE_CLASS (code) == tcc_comparison)
2983 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
2984 if (opnum)
2985 op = gimple_assign_rhs1 (stmt);
2986 else
2987 op = gimple_assign_rhs2 (stmt);
2989 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
2990 gcc_unreachable ();
2992 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2995 if (code == COND_EXPR)
2997 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
2998 tree cond = gimple_assign_rhs1 (stmt);
3000 if (TREE_CODE (cond) == SSA_NAME)
3001 op = cond;
3002 else if (opnum)
3003 op = TREE_OPERAND (cond, 1);
3004 else
3005 op = TREE_OPERAND (cond, 0);
3007 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3008 gcc_unreachable ();
3010 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3013 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3016 /* Build a variable-length vector in which the elements in ELTS are repeated
3017 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3018 RESULTS and add any new instructions to SEQ.
3020 The approach we use is:
3022 (1) Find a vector mode VM with integer elements of mode IM.
3024 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3025 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3026 from small vectors to IM.
3028 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3030 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3031 correct byte contents.
3033 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3035 We try to find the largest IM for which this sequence works, in order
3036 to cut down on the number of interleaves. */
3038 void
3039 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3040 unsigned int nresults, vec<tree> &results)
3042 unsigned int nelts = elts.length ();
3043 tree element_type = TREE_TYPE (vector_type);
3045 /* (1) Find a vector mode VM with integer elements of mode IM. */
3046 unsigned int nvectors = 1;
3047 tree new_vector_type;
3048 tree permutes[2];
3049 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3050 &nvectors, &new_vector_type,
3051 permutes))
3052 gcc_unreachable ();
3054 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3055 unsigned int partial_nelts = nelts / nvectors;
3056 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3058 tree_vector_builder partial_elts;
3059 auto_vec<tree, 32> pieces (nvectors * 2);
3060 pieces.quick_grow (nvectors * 2);
3061 for (unsigned int i = 0; i < nvectors; ++i)
3063 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3064 ELTS' has mode IM. */
3065 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3066 for (unsigned int j = 0; j < partial_nelts; ++j)
3067 partial_elts.quick_push (elts[i * partial_nelts + j]);
3068 tree t = gimple_build_vector (seq, &partial_elts);
3069 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3070 TREE_TYPE (new_vector_type), t);
3072 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3073 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3076 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3077 correct byte contents.
3079 We need to repeat the following operation log2(nvectors) times:
3081 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3082 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3084 However, if each input repeats every N elements and the VF is
3085 a multiple of N * 2, the HI result is the same as the LO. */
3086 unsigned int in_start = 0;
3087 unsigned int out_start = nvectors;
3088 unsigned int hi_start = nvectors / 2;
3089 /* A bound on the number of outputs needed to produce NRESULTS results
3090 in the final iteration. */
3091 unsigned int noutputs_bound = nvectors * nresults;
3092 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3094 noutputs_bound /= 2;
3095 unsigned int limit = MIN (noutputs_bound, nvectors);
3096 for (unsigned int i = 0; i < limit; ++i)
3098 if ((i & 1) != 0
3099 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3100 2 * in_repeat))
3102 pieces[out_start + i] = pieces[out_start + i - 1];
3103 continue;
3106 tree output = make_ssa_name (new_vector_type);
3107 tree input1 = pieces[in_start + (i / 2)];
3108 tree input2 = pieces[in_start + (i / 2) + hi_start];
3109 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3110 input1, input2,
3111 permutes[i & 1]);
3112 gimple_seq_add_stmt (seq, stmt);
3113 pieces[out_start + i] = output;
3115 std::swap (in_start, out_start);
3118 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3119 results.reserve (nresults);
3120 for (unsigned int i = 0; i < nresults; ++i)
3121 if (i < nvectors)
3122 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3123 pieces[in_start + i]));
3124 else
3125 results.quick_push (results[i - nvectors]);
3129 /* For constant and loop invariant defs of SLP_NODE this function returns
3130 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3131 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3132 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3133 REDUC_INDEX is the index of the reduction operand in the statements, unless
3134 it is -1. */
3136 static void
3137 vect_get_constant_vectors (tree op, slp_tree slp_node,
3138 vec<tree> *vec_oprnds,
3139 unsigned int op_num, unsigned int number_of_vectors)
3141 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3142 stmt_vec_info stmt_vinfo = stmts[0];
3143 gimple *stmt = stmt_vinfo->stmt;
3144 unsigned HOST_WIDE_INT nunits;
3145 tree vec_cst;
3146 unsigned j, number_of_places_left_in_vector;
3147 tree vector_type;
3148 tree vop;
3149 int group_size = stmts.length ();
3150 unsigned int vec_num, i;
3151 unsigned number_of_copies = 1;
3152 vec<tree> voprnds;
3153 voprnds.create (number_of_vectors);
3154 bool constant_p, is_store;
3155 tree neutral_op = NULL;
3156 enum tree_code code = gimple_expr_code (stmt);
3157 gimple_seq ctor_seq = NULL;
3158 auto_vec<tree, 16> permute_results;
3160 /* Check if vector type is a boolean vector. */
3161 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3162 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3163 vector_type
3164 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3165 else
3166 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3168 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3170 is_store = true;
3171 op = gimple_assign_rhs1 (stmt);
3173 else
3174 is_store = false;
3176 gcc_assert (op);
3178 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3179 created vectors. It is greater than 1 if unrolling is performed.
3181 For example, we have two scalar operands, s1 and s2 (e.g., group of
3182 strided accesses of size two), while NUNITS is four (i.e., four scalars
3183 of this type can be packed in a vector). The output vector will contain
3184 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3185 will be 2).
3187 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3188 containing the operands.
3190 For example, NUNITS is four as before, and the group size is 8
3191 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3192 {s5, s6, s7, s8}. */
3194 /* When using duplicate_and_interleave, we just need one element for
3195 each scalar statement. */
3196 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3197 nunits = group_size;
3199 number_of_copies = nunits * number_of_vectors / group_size;
3201 number_of_places_left_in_vector = nunits;
3202 constant_p = true;
3203 tree_vector_builder elts (vector_type, nunits, 1);
3204 elts.quick_grow (nunits);
3205 bool place_after_defs = false;
3206 for (j = 0; j < number_of_copies; j++)
3208 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3210 stmt = stmt_vinfo->stmt;
3211 if (is_store)
3212 op = gimple_assign_rhs1 (stmt);
3213 else
3215 switch (code)
3217 case COND_EXPR:
3219 tree cond = gimple_assign_rhs1 (stmt);
3220 if (TREE_CODE (cond) == SSA_NAME)
3221 op = gimple_op (stmt, op_num + 1);
3222 else if (op_num == 0 || op_num == 1)
3223 op = TREE_OPERAND (cond, op_num);
3224 else
3226 if (op_num == 2)
3227 op = gimple_assign_rhs2 (stmt);
3228 else
3229 op = gimple_assign_rhs3 (stmt);
3232 break;
3234 case CALL_EXPR:
3235 op = gimple_call_arg (stmt, op_num);
3236 break;
3238 case LSHIFT_EXPR:
3239 case RSHIFT_EXPR:
3240 case LROTATE_EXPR:
3241 case RROTATE_EXPR:
3242 op = gimple_op (stmt, op_num + 1);
3243 /* Unlike the other binary operators, shifts/rotates have
3244 the shift count being int, instead of the same type as
3245 the lhs, so make sure the scalar is the right type if
3246 we are dealing with vectors of
3247 long long/long/short/char. */
3248 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3249 op = fold_convert (TREE_TYPE (vector_type), op);
3250 break;
3252 default:
3253 op = gimple_op (stmt, op_num + 1);
3254 break;
3258 /* Create 'vect_ = {op0,op1,...,opn}'. */
3259 number_of_places_left_in_vector--;
3260 tree orig_op = op;
3261 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3263 if (CONSTANT_CLASS_P (op))
3265 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3267 /* Can't use VIEW_CONVERT_EXPR for booleans because
3268 of possibly different sizes of scalar value and
3269 vector element. */
3270 if (integer_zerop (op))
3271 op = build_int_cst (TREE_TYPE (vector_type), 0);
3272 else if (integer_onep (op))
3273 op = build_all_ones_cst (TREE_TYPE (vector_type));
3274 else
3275 gcc_unreachable ();
3277 else
3278 op = fold_unary (VIEW_CONVERT_EXPR,
3279 TREE_TYPE (vector_type), op);
3280 gcc_assert (op && CONSTANT_CLASS_P (op));
3282 else
3284 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3285 gimple *init_stmt;
3286 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3288 tree true_val
3289 = build_all_ones_cst (TREE_TYPE (vector_type));
3290 tree false_val
3291 = build_zero_cst (TREE_TYPE (vector_type));
3292 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3293 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3294 op, true_val,
3295 false_val);
3297 else
3299 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3300 op);
3301 init_stmt
3302 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3303 op);
3305 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3306 op = new_temp;
3309 elts[number_of_places_left_in_vector] = op;
3310 if (!CONSTANT_CLASS_P (op))
3311 constant_p = false;
3312 if (TREE_CODE (orig_op) == SSA_NAME
3313 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3314 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3315 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3316 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3317 place_after_defs = true;
3319 if (number_of_places_left_in_vector == 0)
3321 if (constant_p
3322 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3323 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3324 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3325 else
3327 if (vec_oprnds->is_empty ())
3328 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3329 number_of_vectors,
3330 permute_results);
3331 vec_cst = permute_results[number_of_vectors - j - 1];
3333 tree init;
3334 gimple_stmt_iterator gsi;
3335 if (place_after_defs)
3337 stmt_vec_info last_stmt_info
3338 = vect_find_last_scalar_stmt_in_slp (slp_node);
3339 gsi = gsi_for_stmt (last_stmt_info->stmt);
3340 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3341 &gsi);
3343 else
3344 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3345 NULL);
3346 if (ctor_seq != NULL)
3348 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3349 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3350 ctor_seq = NULL;
3352 voprnds.quick_push (init);
3353 place_after_defs = false;
3354 number_of_places_left_in_vector = nunits;
3355 constant_p = true;
3356 elts.new_vector (vector_type, nunits, 1);
3357 elts.quick_grow (nunits);
3362 /* Since the vectors are created in the reverse order, we should invert
3363 them. */
3364 vec_num = voprnds.length ();
3365 for (j = vec_num; j != 0; j--)
3367 vop = voprnds[j - 1];
3368 vec_oprnds->quick_push (vop);
3371 voprnds.release ();
3373 /* In case that VF is greater than the unrolling factor needed for the SLP
3374 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3375 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3376 to replicate the vectors. */
3377 while (number_of_vectors > vec_oprnds->length ())
3379 tree neutral_vec = NULL;
3381 if (neutral_op)
3383 if (!neutral_vec)
3384 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3386 vec_oprnds->quick_push (neutral_vec);
3388 else
3390 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3391 vec_oprnds->quick_push (vop);
3397 /* Get vectorized definitions from SLP_NODE that contains corresponding
3398 vectorized def-stmts. */
3400 static void
3401 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3403 tree vec_oprnd;
3404 stmt_vec_info vec_def_stmt_info;
3405 unsigned int i;
3407 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3409 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3411 gcc_assert (vec_def_stmt_info);
3412 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3413 vec_oprnd = gimple_phi_result (vec_def_phi);
3414 else
3415 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3416 vec_oprnds->quick_push (vec_oprnd);
3421 /* Get vectorized definitions for SLP_NODE.
3422 If the scalar definitions are loop invariants or constants, collect them and
3423 call vect_get_constant_vectors() to create vector stmts.
3424 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3425 must be stored in the corresponding child of SLP_NODE, and we call
3426 vect_get_slp_vect_defs () to retrieve them. */
3428 void
3429 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3430 vec<vec<tree> > *vec_oprnds)
3432 int number_of_vects = 0, i;
3433 unsigned int child_index = 0;
3434 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3435 slp_tree child = NULL;
3436 vec<tree> vec_defs;
3437 tree oprnd;
3438 bool vectorized_defs;
3440 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3441 FOR_EACH_VEC_ELT (ops, i, oprnd)
3443 /* For each operand we check if it has vectorized definitions in a child
3444 node or we need to create them (for invariants and constants). We
3445 check if the LHS of the first stmt of the next child matches OPRND.
3446 If it does, we found the correct child. Otherwise, we call
3447 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3448 to check this child node for the next operand. */
3449 vectorized_defs = false;
3450 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3452 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3454 /* We have to check both pattern and original def, if available. */
3455 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3457 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3458 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3459 tree first_def_op;
3461 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3462 first_def_op = gimple_phi_result (first_def);
3463 else
3464 first_def_op = gimple_get_lhs (first_def_info->stmt);
3465 if (operand_equal_p (oprnd, first_def_op, 0)
3466 || (related
3467 && operand_equal_p (oprnd,
3468 gimple_get_lhs (related->stmt), 0)))
3470 /* The number of vector defs is determined by the number of
3471 vector statements in the node from which we get those
3472 statements. */
3473 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3474 vectorized_defs = true;
3475 child_index++;
3478 else
3479 child_index++;
3482 if (!vectorized_defs)
3484 if (i == 0)
3486 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3487 /* Number of vector stmts was calculated according to LHS in
3488 vect_schedule_slp_instance (), fix it by replacing LHS with
3489 RHS, if necessary. See vect_get_smallest_scalar_type () for
3490 details. */
3491 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3492 &rhs_size_unit);
3493 if (rhs_size_unit != lhs_size_unit)
3495 number_of_vects *= rhs_size_unit;
3496 number_of_vects /= lhs_size_unit;
3501 /* Allocate memory for vectorized defs. */
3502 vec_defs = vNULL;
3503 vec_defs.create (number_of_vects);
3505 /* For reduction defs we call vect_get_constant_vectors (), since we are
3506 looking for initial loop invariant values. */
3507 if (vectorized_defs)
3508 /* The defs are already vectorized. */
3509 vect_get_slp_vect_defs (child, &vec_defs);
3510 else
3511 /* Build vectors from scalar defs. */
3512 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3513 number_of_vects);
3515 vec_oprnds->quick_push (vec_defs);
3519 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3520 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3521 permute statements for the SLP node NODE of the SLP instance
3522 SLP_NODE_INSTANCE. */
3524 bool
3525 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3526 gimple_stmt_iterator *gsi, poly_uint64 vf,
3527 slp_instance slp_node_instance, bool analyze_only,
3528 unsigned *n_perms)
3530 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3531 vec_info *vinfo = stmt_info->vinfo;
3532 int vec_index = 0;
3533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3534 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3535 unsigned int mask_element;
3536 machine_mode mode;
3538 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3539 return false;
3541 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3543 mode = TYPE_MODE (vectype);
3544 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3546 /* Initialize the vect stmts of NODE to properly insert the generated
3547 stmts later. */
3548 if (! analyze_only)
3549 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3550 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3551 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3553 /* Generate permutation masks for every NODE. Number of masks for each NODE
3554 is equal to GROUP_SIZE.
3555 E.g., we have a group of three nodes with three loads from the same
3556 location in each node, and the vector size is 4. I.e., we have a
3557 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3558 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3559 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3562 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3563 The last mask is illegal since we assume two operands for permute
3564 operation, and the mask element values can't be outside that range.
3565 Hence, the last mask must be converted into {2,5,5,5}.
3566 For the first two permutations we need the first and the second input
3567 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3568 we need the second and the third vectors: {b1,c1,a2,b2} and
3569 {c2,a3,b3,c3}. */
3571 int vect_stmts_counter = 0;
3572 unsigned int index = 0;
3573 int first_vec_index = -1;
3574 int second_vec_index = -1;
3575 bool noop_p = true;
3576 *n_perms = 0;
3578 vec_perm_builder mask;
3579 unsigned int nelts_to_build;
3580 unsigned int nvectors_per_build;
3581 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3582 && multiple_p (nunits, group_size));
3583 if (repeating_p)
3585 /* A single vector contains a whole number of copies of the node, so:
3586 (a) all permutes can use the same mask; and
3587 (b) the permutes only need a single vector input. */
3588 mask.new_vector (nunits, group_size, 3);
3589 nelts_to_build = mask.encoded_nelts ();
3590 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3592 else
3594 /* We need to construct a separate mask for each vector statement. */
3595 unsigned HOST_WIDE_INT const_nunits, const_vf;
3596 if (!nunits.is_constant (&const_nunits)
3597 || !vf.is_constant (&const_vf))
3598 return false;
3599 mask.new_vector (const_nunits, const_nunits, 1);
3600 nelts_to_build = const_vf * group_size;
3601 nvectors_per_build = 1;
3604 unsigned int count = mask.encoded_nelts ();
3605 mask.quick_grow (count);
3606 vec_perm_indices indices;
3608 for (unsigned int j = 0; j < nelts_to_build; j++)
3610 unsigned int iter_num = j / group_size;
3611 unsigned int stmt_num = j % group_size;
3612 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3613 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3614 if (repeating_p)
3616 first_vec_index = 0;
3617 mask_element = i;
3619 else
3621 /* Enforced before the loop when !repeating_p. */
3622 unsigned int const_nunits = nunits.to_constant ();
3623 vec_index = i / const_nunits;
3624 mask_element = i % const_nunits;
3625 if (vec_index == first_vec_index
3626 || first_vec_index == -1)
3628 first_vec_index = vec_index;
3630 else if (vec_index == second_vec_index
3631 || second_vec_index == -1)
3633 second_vec_index = vec_index;
3634 mask_element += const_nunits;
3636 else
3638 if (dump_enabled_p ())
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3640 "permutation requires at "
3641 "least three vectors %G",
3642 stmt_info->stmt);
3643 gcc_assert (analyze_only);
3644 return false;
3647 gcc_assert (mask_element < 2 * const_nunits);
3650 if (mask_element != index)
3651 noop_p = false;
3652 mask[index++] = mask_element;
3654 if (index == count && !noop_p)
3656 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3657 if (!can_vec_perm_const_p (mode, indices))
3659 if (dump_enabled_p ())
3661 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3662 vect_location,
3663 "unsupported vect permute { ");
3664 for (i = 0; i < count; ++i)
3666 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3667 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3669 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3671 gcc_assert (analyze_only);
3672 return false;
3675 ++*n_perms;
3678 if (index == count)
3680 if (!analyze_only)
3682 tree mask_vec = NULL_TREE;
3684 if (! noop_p)
3685 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3687 if (second_vec_index == -1)
3688 second_vec_index = first_vec_index;
3690 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3692 /* Generate the permute statement if necessary. */
3693 tree first_vec = dr_chain[first_vec_index + ri];
3694 tree second_vec = dr_chain[second_vec_index + ri];
3695 stmt_vec_info perm_stmt_info;
3696 if (! noop_p)
3698 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3699 tree perm_dest
3700 = vect_create_destination_var (gimple_assign_lhs (stmt),
3701 vectype);
3702 perm_dest = make_ssa_name (perm_dest);
3703 gassign *perm_stmt
3704 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3705 first_vec, second_vec,
3706 mask_vec);
3707 perm_stmt_info
3708 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3709 gsi);
3711 else
3712 /* If mask was NULL_TREE generate the requested
3713 identity transform. */
3714 perm_stmt_info = vinfo->lookup_def (first_vec);
3716 /* Store the vector statement in NODE. */
3717 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3718 = perm_stmt_info;
3722 index = 0;
3723 first_vec_index = -1;
3724 second_vec_index = -1;
3725 noop_p = true;
3729 return true;
3732 /* Vectorize SLP instance tree in postorder. */
3734 static void
3735 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3736 scalar_stmts_to_slp_tree_map_t *bst_map)
3738 gimple_stmt_iterator si;
3739 stmt_vec_info stmt_info;
3740 unsigned int group_size;
3741 tree vectype;
3742 int i, j;
3743 slp_tree child;
3745 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3746 return;
3748 /* See if we have already vectorized the same set of stmts and reuse their
3749 vectorized stmts. */
3750 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3752 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3753 return;
3756 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3757 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3758 vect_schedule_slp_instance (child, instance, bst_map);
3760 /* Push SLP node def-type to stmts. */
3761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3762 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3764 stmt_vec_info child_stmt_info;
3765 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3766 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3769 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3771 /* VECTYPE is the type of the destination. */
3772 vectype = STMT_VINFO_VECTYPE (stmt_info);
3773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3774 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3776 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3777 if (!SLP_TREE_VEC_STMTS (node).exists ())
3778 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3780 if (dump_enabled_p ())
3781 dump_printf_loc (MSG_NOTE, vect_location,
3782 "------>vectorizing SLP node starting from: %G",
3783 stmt_info->stmt);
3785 /* Vectorized stmts go before the last scalar stmt which is where
3786 all uses are ready. */
3787 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3788 si = gsi_for_stmt (last_stmt_info->stmt);
3790 /* Mark the first element of the reduction chain as reduction to properly
3791 transform the node. In the analysis phase only the last element of the
3792 chain is marked as reduction. */
3793 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3794 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3795 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3797 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3798 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3801 /* Handle two-operation SLP nodes by vectorizing the group with
3802 both operations and then performing a merge. */
3803 if (SLP_TREE_TWO_OPERATORS (node))
3805 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3806 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3807 enum tree_code ocode = ERROR_MARK;
3808 stmt_vec_info ostmt_info;
3809 vec_perm_builder mask (group_size, group_size, 1);
3810 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3812 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3813 if (gimple_assign_rhs_code (ostmt) != code0)
3815 mask.quick_push (1);
3816 ocode = gimple_assign_rhs_code (ostmt);
3818 else
3819 mask.quick_push (0);
3821 if (ocode != ERROR_MARK)
3823 vec<stmt_vec_info> v0;
3824 vec<stmt_vec_info> v1;
3825 unsigned j;
3826 tree tmask = NULL_TREE;
3827 vect_transform_stmt (stmt_info, &si, node, instance);
3828 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3829 SLP_TREE_VEC_STMTS (node).truncate (0);
3830 gimple_assign_set_rhs_code (stmt, ocode);
3831 vect_transform_stmt (stmt_info, &si, node, instance);
3832 gimple_assign_set_rhs_code (stmt, code0);
3833 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3834 SLP_TREE_VEC_STMTS (node).truncate (0);
3835 tree meltype = build_nonstandard_integer_type
3836 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3837 tree mvectype = get_same_sized_vectype (meltype, vectype);
3838 unsigned k = 0, l;
3839 for (j = 0; j < v0.length (); ++j)
3841 /* Enforced by vect_build_slp_tree, which rejects variable-length
3842 vectors for SLP_TREE_TWO_OPERATORS. */
3843 unsigned int const_nunits = nunits.to_constant ();
3844 tree_vector_builder melts (mvectype, const_nunits, 1);
3845 for (l = 0; l < const_nunits; ++l)
3847 if (k >= group_size)
3848 k = 0;
3849 tree t = build_int_cst (meltype,
3850 mask[k++] * const_nunits + l);
3851 melts.quick_push (t);
3853 tmask = melts.build ();
3855 /* ??? Not all targets support a VEC_PERM_EXPR with a
3856 constant mask that would translate to a vec_merge RTX
3857 (with their vec_perm_const_ok). We can either not
3858 vectorize in that case or let veclower do its job.
3859 Unfortunately that isn't too great and at least for
3860 plus/minus we'd eventually like to match targets
3861 vector addsub instructions. */
3862 gimple *vstmt;
3863 vstmt = gimple_build_assign (make_ssa_name (vectype),
3864 VEC_PERM_EXPR,
3865 gimple_assign_lhs (v0[j]->stmt),
3866 gimple_assign_lhs (v1[j]->stmt),
3867 tmask);
3868 SLP_TREE_VEC_STMTS (node).quick_push
3869 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
3871 v0.release ();
3872 v1.release ();
3873 return;
3876 vect_transform_stmt (stmt_info, &si, node, instance);
3878 /* Restore stmt def-types. */
3879 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3880 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3882 stmt_vec_info child_stmt_info;
3883 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3884 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3888 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3889 For loop vectorization this is done in vectorizable_call, but for SLP
3890 it needs to be deferred until end of vect_schedule_slp, because multiple
3891 SLP instances may refer to the same scalar stmt. */
3893 static void
3894 vect_remove_slp_scalar_calls (slp_tree node)
3896 gimple *new_stmt;
3897 gimple_stmt_iterator gsi;
3898 int i;
3899 slp_tree child;
3900 tree lhs;
3901 stmt_vec_info stmt_info;
3903 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3904 return;
3906 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3907 vect_remove_slp_scalar_calls (child);
3909 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3911 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3912 if (!stmt || gimple_bb (stmt) == NULL)
3913 continue;
3914 if (is_pattern_stmt_p (stmt_info)
3915 || !PURE_SLP_STMT (stmt_info))
3916 continue;
3917 lhs = gimple_call_lhs (stmt);
3918 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3919 gsi = gsi_for_stmt (stmt);
3920 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
3921 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3925 /* Generate vector code for all SLP instances in the loop/basic block. */
3927 void
3928 vect_schedule_slp (vec_info *vinfo)
3930 vec<slp_instance> slp_instances;
3931 slp_instance instance;
3932 unsigned int i;
3934 scalar_stmts_to_slp_tree_map_t *bst_map
3935 = new scalar_stmts_to_slp_tree_map_t ();
3936 slp_instances = vinfo->slp_instances;
3937 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3939 /* Schedule the tree of INSTANCE. */
3940 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3941 instance, bst_map);
3942 if (dump_enabled_p ())
3943 dump_printf_loc (MSG_NOTE, vect_location,
3944 "vectorizing stmts using SLP.\n");
3946 delete bst_map;
3948 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3950 slp_tree root = SLP_INSTANCE_TREE (instance);
3951 stmt_vec_info store_info;
3952 unsigned int j;
3954 /* Remove scalar call stmts. Do not do this for basic-block
3955 vectorization as not all uses may be vectorized.
3956 ??? Why should this be necessary? DCE should be able to
3957 remove the stmts itself.
3958 ??? For BB vectorization we can as well remove scalar
3959 stmts starting from the SLP tree root if they have no
3960 uses. */
3961 if (is_a <loop_vec_info> (vinfo))
3962 vect_remove_slp_scalar_calls (root);
3964 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
3965 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3967 if (!STMT_VINFO_DATA_REF (store_info))
3968 break;
3970 store_info = vect_orig_stmt (store_info);
3971 /* Free the attached stmt_vec_info and remove the stmt. */
3972 vinfo->remove_stmt (store_info);