PR rtl-optimization/82913
[official-gcc.git] / gcc / tree-vect-slp.c
blobbc81b3d4b046d2421dcfd055631f076423f444cb
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
488 as for vect_build_slp_tree. */
490 static bool
491 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
492 tree vectype, unsigned int *max_nunits)
494 if (!vectype)
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "Build SLP failed: unsupported data-type in ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
503 /* Fatal mismatch. */
504 return false;
507 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a <bb_vec_info> (vinfo)
510 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: unrolling required "
514 "in basic block SLP\n");
515 /* Fatal mismatch. */
516 return false;
519 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
521 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
523 return true;
526 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES
529 which stmts are not isomorphic to the first one. If MATCHES[0]
530 is false then this indicates the comparison could not be
531 carried out or the stmts will never be vectorized by SLP.
533 Note COND_EXPR is possibly ismorphic to another one after swapping its
534 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
535 the first stmt by swapping the two operands of comparison; set SWAP[i]
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
540 static bool
541 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
542 vec<gimple *> stmts, unsigned int group_size,
543 unsigned nops, unsigned int *max_nunits,
544 bool *matches, bool *two_operators)
546 unsigned int i;
547 gimple *first_stmt = stmts[0], *stmt = stmts[0];
548 enum tree_code first_stmt_code = ERROR_MARK;
549 enum tree_code alt_stmt_code = ERROR_MARK;
550 enum tree_code rhs_code = ERROR_MARK;
551 enum tree_code first_cond_code = ERROR_MARK;
552 tree lhs;
553 bool need_same_oprnds = false;
554 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
555 optab optab;
556 int icode;
557 machine_mode optab_op2_mode;
558 machine_mode vec_mode;
559 HOST_WIDE_INT dummy;
560 gimple *first_load = NULL, *prev_first_load = NULL;
562 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts, i, stmt)
565 swap[i] = 0;
566 matches[i] = false;
568 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
574 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
577 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580 "Build SLP failed: unvectorizable statement ");
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
583 /* Fatal mismatch. */
584 matches[0] = false;
585 return false;
588 lhs = gimple_get_lhs (stmt);
589 if (lhs == NULL_TREE)
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: not GIMPLE_ASSIGN nor "
595 "GIMPLE_CALL ");
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
598 /* Fatal mismatch. */
599 matches[0] = false;
600 return false;
603 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
604 vectype = get_vectype_for_scalar_type (scalar_type);
605 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
606 max_nunits))
608 /* Fatal mismatch. */
609 matches[0] = false;
610 return false;
613 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
615 rhs_code = CALL_EXPR;
616 if (gimple_call_internal_p (call_stmt)
617 || gimple_call_tail_p (call_stmt)
618 || gimple_call_noreturn_p (call_stmt)
619 || !gimple_call_nothrow_p (call_stmt)
620 || gimple_call_chain (call_stmt))
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported call type ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
627 call_stmt, 0);
629 /* Fatal mismatch. */
630 matches[0] = false;
631 return false;
634 else
635 rhs_code = gimple_assign_rhs_code (stmt);
637 /* Check the operation. */
638 if (i == 0)
640 first_stmt_code = rhs_code;
642 /* Shift arguments should be equal in all the packed stmts for a
643 vector shift with scalar shift operand. */
644 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
645 || rhs_code == LROTATE_EXPR
646 || rhs_code == RROTATE_EXPR)
648 vec_mode = TYPE_MODE (vectype);
650 /* First see if we have a vector/vector shift. */
651 optab = optab_for_tree_code (rhs_code, vectype,
652 optab_vector);
654 if (!optab
655 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
657 /* No vector/vector shift, try for a vector/scalar shift. */
658 optab = optab_for_tree_code (rhs_code, vectype,
659 optab_scalar);
661 if (!optab)
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "Build SLP failed: no optab.\n");
666 /* Fatal mismatch. */
667 matches[0] = false;
668 return false;
670 icode = (int) optab_handler (optab, vec_mode);
671 if (icode == CODE_FOR_nothing)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
675 "Build SLP failed: "
676 "op not supported by target.\n");
677 /* Fatal mismatch. */
678 matches[0] = false;
679 return false;
681 optab_op2_mode = insn_data[icode].operand[2].mode;
682 if (!VECTOR_MODE_P (optab_op2_mode))
684 need_same_oprnds = true;
685 first_op1 = gimple_assign_rhs2 (stmt);
689 else if (rhs_code == WIDEN_LSHIFT_EXPR)
691 need_same_oprnds = true;
692 first_op1 = gimple_assign_rhs2 (stmt);
695 else
697 if (first_stmt_code != rhs_code
698 && alt_stmt_code == ERROR_MARK)
699 alt_stmt_code = rhs_code;
700 if (first_stmt_code != rhs_code
701 && (first_stmt_code != IMAGPART_EXPR
702 || rhs_code != REALPART_EXPR)
703 && (first_stmt_code != REALPART_EXPR
704 || rhs_code != IMAGPART_EXPR)
705 /* Handle mismatches in plus/minus by computing both
706 and merging the results. */
707 && !((first_stmt_code == PLUS_EXPR
708 || first_stmt_code == MINUS_EXPR)
709 && (alt_stmt_code == PLUS_EXPR
710 || alt_stmt_code == MINUS_EXPR)
711 && rhs_code == alt_stmt_code)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
713 && (first_stmt_code == ARRAY_REF
714 || first_stmt_code == BIT_FIELD_REF
715 || first_stmt_code == INDIRECT_REF
716 || first_stmt_code == COMPONENT_REF
717 || first_stmt_code == MEM_REF)))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different operation "
723 "in stmt ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "original stmt ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
728 first_stmt, 0);
730 /* Mismatch. */
731 continue;
734 if (need_same_oprnds
735 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740 "Build SLP failed: different shift "
741 "arguments in ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
744 /* Mismatch. */
745 continue;
748 if (rhs_code == CALL_EXPR)
750 gimple *first_stmt = stmts[0];
751 if (gimple_call_num_args (stmt) != nops
752 || !operand_equal_p (gimple_call_fn (first_stmt),
753 gimple_call_fn (stmt), 0)
754 || gimple_call_fntype (first_stmt)
755 != gimple_call_fntype (stmt))
757 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "Build SLP failed: different calls in ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
762 stmt, 0);
764 /* Mismatch. */
765 continue;
770 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
773 if (REFERENCE_CLASS_P (lhs))
775 /* Store. */
778 else
780 /* Load. */
781 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
782 if (prev_first_load)
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load != first_load)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
791 vect_location,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
795 stmt, 0);
797 /* Mismatch. */
798 continue;
801 else
802 prev_first_load = first_load;
804 } /* Grouped access. */
805 else
807 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
809 /* Not grouped load. */
810 if (dump_enabled_p ())
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
817 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */
819 matches[0] = false;
820 return false;
823 /* Not memory operation. */
824 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
825 && TREE_CODE_CLASS (rhs_code) != tcc_unary
826 && TREE_CODE_CLASS (rhs_code) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
828 && rhs_code != CALL_EXPR)
830 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: operation");
834 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
837 /* Fatal mismatch. */
838 matches[0] = false;
839 return false;
842 if (rhs_code == COND_EXPR)
844 tree cond_expr = gimple_assign_rhs1 (stmt);
845 enum tree_code cond_code = TREE_CODE (cond_expr);
846 enum tree_code swap_code = ERROR_MARK;
847 enum tree_code invert_code = ERROR_MARK;
849 if (i == 0)
850 first_cond_code = TREE_CODE (cond_expr);
851 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
853 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
854 swap_code = swap_tree_comparison (cond_code);
855 invert_code = invert_tree_comparison (cond_code, honor_nans);
858 if (first_cond_code == cond_code)
860 /* Isomorphic can be achieved by swapping. */
861 else if (first_cond_code == swap_code)
862 swap[i] = 1;
863 /* Isomorphic can be achieved by inverting. */
864 else if (first_cond_code == invert_code)
865 swap[i] = 2;
866 else
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "Build SLP failed: different"
872 " operation");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
874 stmt, 0);
876 /* Mismatch. */
877 continue;
882 matches[i] = true;
885 for (i = 0; i < group_size; ++i)
886 if (!matches[i])
887 return false;
889 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */
891 if (alt_stmt_code != ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
894 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
895 auto_vec_perm_indices sel (count);
896 for (i = 0; i < count; ++i)
898 unsigned int elt = i;
899 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
900 elt += count;
901 sel.quick_push (elt);
903 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
905 for (i = 0; i < group_size; ++i)
906 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
908 matches[i] = false;
909 if (dump_enabled_p ())
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
912 "Build SLP failed: different operation "
913 "in stmt ");
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
915 stmts[i], 0);
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "original stmt ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 first_stmt, 0);
922 return false;
924 *two_operators = true;
927 return true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */
933 struct bst_traits
935 typedef vec <gimple *> value_type;
936 typedef vec <gimple *> compare_type;
937 static inline hashval_t hash (value_type);
938 static inline bool equal (value_type existing, value_type candidate);
939 static inline bool is_empty (value_type x) { return !x.exists (); }
940 static inline bool is_deleted (value_type x) { return !x.exists (); }
941 static inline void mark_empty (value_type &x) { x.release (); }
942 static inline void mark_deleted (value_type &x) { x.release (); }
943 static inline void remove (value_type &x) { x.release (); }
945 inline hashval_t
946 bst_traits::hash (value_type x)
948 inchash::hash h;
949 for (unsigned i = 0; i < x.length (); ++i)
950 h.add_int (gimple_uid (x[i]));
951 return h.end ();
953 inline bool
954 bst_traits::equal (value_type existing, value_type candidate)
956 if (existing.length () != candidate.length ())
957 return false;
958 for (unsigned i = 0; i < existing.length (); ++i)
959 if (existing[i] != candidate[i])
960 return false;
961 return true;
964 static hash_set <vec <gimple *>, bst_traits> *bst_fail;
966 static slp_tree
967 vect_build_slp_tree_2 (vec_info *vinfo,
968 vec<gimple *> stmts, unsigned int group_size,
969 unsigned int *max_nunits,
970 vec<slp_tree> *loads,
971 bool *matches, unsigned *npermutes, unsigned *tree_size,
972 unsigned max_tree_size);
974 static slp_tree
975 vect_build_slp_tree (vec_info *vinfo,
976 vec<gimple *> stmts, unsigned int group_size,
977 unsigned int *max_nunits,
978 vec<slp_tree> *loads,
979 bool *matches, unsigned *npermutes, unsigned *tree_size,
980 unsigned max_tree_size)
982 if (bst_fail->contains (stmts))
983 return NULL;
984 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
985 loads, matches, npermutes, tree_size,
986 max_tree_size);
987 /* When SLP build fails for stmts record this, otherwise SLP build
988 can be exponential in time when we allow to construct parts from
989 scalars, see PR81723. */
990 if (! res)
992 vec <gimple *> x;
993 x.create (stmts.length ());
994 x.splice (stmts);
995 bst_fail->add (x);
997 return res;
1000 /* Recursively build an SLP tree starting from NODE.
1001 Fail (and return a value not equal to zero) if def-stmts are not
1002 isomorphic, require data permutation or are of unsupported types of
1003 operation. Otherwise, return 0.
1004 The value returned is the depth in the SLP tree where a mismatch
1005 was found. */
1007 static slp_tree
1008 vect_build_slp_tree_2 (vec_info *vinfo,
1009 vec<gimple *> stmts, unsigned int group_size,
1010 unsigned int *max_nunits,
1011 vec<slp_tree> *loads,
1012 bool *matches, unsigned *npermutes, unsigned *tree_size,
1013 unsigned max_tree_size)
1015 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
1016 gimple *stmt;
1017 slp_tree node;
1019 matches[0] = false;
1021 stmt = stmts[0];
1022 if (is_gimple_call (stmt))
1023 nops = gimple_call_num_args (stmt);
1024 else if (is_gimple_assign (stmt))
1026 nops = gimple_num_ops (stmt) - 1;
1027 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1028 nops++;
1030 else if (gimple_code (stmt) == GIMPLE_PHI)
1031 nops = 0;
1032 else
1033 return NULL;
1035 /* If the SLP node is a PHI (induction or reduction), terminate
1036 the recursion. */
1037 if (gimple_code (stmt) == GIMPLE_PHI)
1039 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1040 tree vectype = get_vectype_for_scalar_type (scalar_type);
1041 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1042 max_nunits))
1043 return NULL;
1045 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1046 /* Induction from different IVs is not supported. */
1047 if (def_type == vect_induction_def)
1049 FOR_EACH_VEC_ELT (stmts, i, stmt)
1050 if (stmt != stmts[0])
1051 return NULL;
1053 else
1055 /* Else def types have to match. */
1056 FOR_EACH_VEC_ELT (stmts, i, stmt)
1058 /* But for reduction chains only check on the first stmt. */
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1061 continue;
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1063 return NULL;
1066 node = vect_create_new_slp_node (stmts);
1067 return node;
1071 bool two_operators = false;
1072 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1073 if (!vect_build_slp_tree_1 (vinfo, swap,
1074 stmts, group_size, nops,
1075 &this_max_nunits, matches, &two_operators))
1076 return NULL;
1078 /* If the SLP node is a load, terminate the recursion. */
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1082 *max_nunits = this_max_nunits;
1083 node = vect_create_new_slp_node (stmts);
1084 loads->safe_push (node);
1085 return node;
1088 /* Get at the operands, verifying they are compatible. */
1089 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1090 slp_oprnd_info oprnd_info;
1091 FOR_EACH_VEC_ELT (stmts, i, stmt)
1093 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1094 stmt, i, &oprnds_info);
1095 if (res != 0)
1096 matches[(res == -1) ? 0 : i] = false;
1097 if (!matches[0])
1098 break;
1100 for (i = 0; i < group_size; ++i)
1101 if (!matches[i])
1103 vect_free_oprnd_info (oprnds_info);
1104 return NULL;
1107 auto_vec<slp_tree, 4> children;
1108 auto_vec<slp_tree> this_loads;
1110 stmt = stmts[0];
1112 if (tree_size)
1113 max_tree_size -= *tree_size;
1115 /* Create SLP_TREE nodes for the definition node/s. */
1116 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1118 slp_tree child;
1119 unsigned old_nloads = this_loads.length ();
1120 unsigned old_tree_size = this_tree_size;
1121 unsigned int j;
1123 if (oprnd_info->first_dt != vect_internal_def
1124 && oprnd_info->first_dt != vect_reduction_def
1125 && oprnd_info->first_dt != vect_induction_def)
1126 continue;
1128 if (++this_tree_size > max_tree_size)
1130 FOR_EACH_VEC_ELT (children, j, child)
1131 vect_free_slp_tree (child);
1132 vect_free_oprnd_info (oprnds_info);
1133 return NULL;
1136 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1137 group_size, &this_max_nunits,
1138 &this_loads, matches, npermutes,
1139 &this_tree_size,
1140 max_tree_size)) != NULL)
1142 /* If we have all children of child built up from scalars then just
1143 throw that away and build it up this node from scalars. */
1144 if (!SLP_TREE_CHILDREN (child).is_empty ()
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to
1146 do extra work to cancel the pattern so the uses see the
1147 scalar version. */
1148 && !is_pattern_stmt_p
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1151 slp_tree grandchild;
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1154 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1155 break;
1156 if (!grandchild)
1158 /* Roll back. */
1159 this_loads.truncate (old_nloads);
1160 this_tree_size = old_tree_size;
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1162 vect_free_slp_tree (grandchild);
1163 SLP_TREE_CHILDREN (child).truncate (0);
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "Building parent vector operands from "
1167 "scalars instead\n");
1168 oprnd_info->def_stmts = vNULL;
1169 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1170 children.safe_push (child);
1171 continue;
1175 oprnd_info->def_stmts = vNULL;
1176 children.safe_push (child);
1177 continue;
1180 /* If the SLP build failed fatally and we analyze a basic-block
1181 simply treat nodes we fail to build as externally defined
1182 (and thus build vectors from the scalar defs).
1183 The cost model will reject outright expensive cases.
1184 ??? This doesn't treat cases where permutation ultimatively
1185 fails (or we don't try permutation below). Ideally we'd
1186 even compute a permutation that will end up with the maximum
1187 SLP tree size... */
1188 if (is_a <bb_vec_info> (vinfo)
1189 && !matches[0]
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to
1191 do extra work to cancel the pattern so the uses see the
1192 scalar version. */
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "Building vector operands from scalars\n");
1197 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1198 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1199 children.safe_push (child);
1200 oprnd_info->def_stmts = vNULL;
1201 continue;
1204 /* If the SLP build for operand zero failed and operand zero
1205 and one can be commutated try that for the scalar stmts
1206 that failed the match. */
1207 if (i == 0
1208 /* A first scalar stmt mismatch signals a fatal mismatch. */
1209 && matches[0]
1210 /* ??? For COND_EXPRs we can swap the comparison operands
1211 as well as the arms under some constraints. */
1212 && nops == 2
1213 && oprnds_info[1]->first_dt == vect_internal_def
1214 && is_gimple_assign (stmt)
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1216 && ! two_operators
1217 /* Do so only if the number of not successful permutes was nor more
1218 than a cut-ff as re-trying the recursive match on
1219 possibly each level of the tree would expose exponential
1220 behavior. */
1221 && *npermutes < 4)
1223 /* Verify if we can safely swap or if we committed to a specific
1224 operand order already. */
1225 for (j = 0; j < group_size; ++j)
1226 if (!matches[j]
1227 && (swap[j] != 0
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1230 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1233 "Build SLP failed: cannot swap operands "
1234 "of shared stmt ");
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1236 stmts[j], 0);
1238 goto fail;
1241 /* Swap mismatched definition stmts. */
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "Re-trying with swapped operands of stmts ");
1244 for (j = 0; j < group_size; ++j)
1245 if (!matches[j])
1247 std::swap (oprnds_info[0]->def_stmts[j],
1248 oprnds_info[1]->def_stmts[j]);
1249 dump_printf (MSG_NOTE, "%d ", j);
1251 dump_printf (MSG_NOTE, "\n");
1252 /* And try again with scratch 'matches' ... */
1253 bool *tem = XALLOCAVEC (bool, group_size);
1254 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1255 group_size, &this_max_nunits,
1256 &this_loads, tem, npermutes,
1257 &this_tree_size,
1258 max_tree_size)) != NULL)
1260 /* ... so if successful we can apply the operand swapping
1261 to the GIMPLE IL. This is necessary because for example
1262 vect_get_slp_defs uses operand indexes and thus expects
1263 canonical operand order. This is also necessary even
1264 if we end up building the operand from scalars as
1265 we'll continue to process swapped operand two. */
1266 for (j = 0; j < group_size; ++j)
1268 gimple *stmt = stmts[j];
1269 gimple_set_plf (stmt, GF_PLF_1, false);
1271 for (j = 0; j < group_size; ++j)
1273 gimple *stmt = stmts[j];
1274 if (!matches[j])
1276 /* Avoid swapping operands twice. */
1277 if (gimple_plf (stmt, GF_PLF_1))
1278 continue;
1279 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1280 gimple_assign_rhs2_ptr (stmt));
1281 gimple_set_plf (stmt, GF_PLF_1, true);
1284 /* Verify we swap all duplicates or none. */
1285 if (flag_checking)
1286 for (j = 0; j < group_size; ++j)
1288 gimple *stmt = stmts[j];
1289 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1292 /* If we have all children of child built up from scalars then
1293 just throw that away and build it up this node from scalars. */
1294 if (!SLP_TREE_CHILDREN (child).is_empty ()
1295 /* ??? Rejecting patterns this way doesn't work. We'd have
1296 to do extra work to cancel the pattern so the uses see the
1297 scalar version. */
1298 && !is_pattern_stmt_p
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1301 unsigned int j;
1302 slp_tree grandchild;
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1305 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1306 break;
1307 if (!grandchild)
1309 /* Roll back. */
1310 this_loads.truncate (old_nloads);
1311 this_tree_size = old_tree_size;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1313 vect_free_slp_tree (grandchild);
1314 SLP_TREE_CHILDREN (child).truncate (0);
1316 dump_printf_loc (MSG_NOTE, vect_location,
1317 "Building parent vector operands from "
1318 "scalars instead\n");
1319 oprnd_info->def_stmts = vNULL;
1320 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1321 children.safe_push (child);
1322 continue;
1326 oprnd_info->def_stmts = vNULL;
1327 children.safe_push (child);
1328 continue;
1331 ++*npermutes;
1334 fail:
1335 gcc_assert (child == NULL);
1336 FOR_EACH_VEC_ELT (children, j, child)
1337 vect_free_slp_tree (child);
1338 vect_free_oprnd_info (oprnds_info);
1339 return NULL;
1342 vect_free_oprnd_info (oprnds_info);
1344 if (tree_size)
1345 *tree_size += this_tree_size;
1346 *max_nunits = this_max_nunits;
1347 loads->safe_splice (this_loads);
1349 node = vect_create_new_slp_node (stmts);
1350 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1351 SLP_TREE_CHILDREN (node).splice (children);
1352 return node;
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1357 static void
1358 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1360 int i;
1361 gimple *stmt;
1362 slp_tree child;
1364 dump_printf_loc (dump_kind, loc, "node%s\n",
1365 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1366 ? " (external)" : "");
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1369 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1370 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1373 vect_print_slp_tree (dump_kind, loc, child);
1377 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1378 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1379 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1380 stmts in NODE are to be marked. */
1382 static void
1383 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1385 int i;
1386 gimple *stmt;
1387 slp_tree child;
1389 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1390 return;
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1393 if (j < 0 || i == j)
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1397 vect_mark_slp_stmts (child, mark, j);
1401 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1403 static void
1404 vect_mark_slp_stmts_relevant (slp_tree node)
1406 int i;
1407 gimple *stmt;
1408 stmt_vec_info stmt_info;
1409 slp_tree child;
1411 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1412 return;
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1416 stmt_info = vinfo_for_stmt (stmt);
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1418 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1419 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1422 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1423 vect_mark_slp_stmts_relevant (child);
1427 /* Rearrange the statements of NODE according to PERMUTATION. */
1429 static void
1430 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1431 vec<unsigned> permutation)
1433 gimple *stmt;
1434 vec<gimple *> tmp_stmts;
1435 unsigned int i;
1436 slp_tree child;
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1439 vect_slp_rearrange_stmts (child, group_size, permutation);
1441 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1442 tmp_stmts.create (group_size);
1443 tmp_stmts.quick_grow_cleared (group_size);
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1446 tmp_stmts[permutation[i]] = stmt;
1448 SLP_TREE_SCALAR_STMTS (node).release ();
1449 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1453 /* Attempt to reorder stmts in a reduction chain so that we don't
1454 require any load permutation. Return true if that was possible,
1455 otherwise return false. */
1457 static bool
1458 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1460 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1461 unsigned int i, j;
1462 unsigned int lidx;
1463 slp_tree node, load;
1465 /* Compare all the permutation sequences to the first one. We know
1466 that at least one load is permuted. */
1467 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1468 if (!node->load_permutation.exists ())
1469 return false;
1470 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1472 if (!load->load_permutation.exists ())
1473 return false;
1474 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1475 if (lidx != node->load_permutation[j])
1476 return false;
1479 /* Check that the loads in the first sequence are different and there
1480 are no gaps between them. */
1481 auto_sbitmap load_index (group_size);
1482 bitmap_clear (load_index);
1483 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1485 if (lidx >= group_size)
1486 return false;
1487 if (bitmap_bit_p (load_index, lidx))
1488 return false;
1490 bitmap_set_bit (load_index, lidx);
1492 for (i = 0; i < group_size; i++)
1493 if (!bitmap_bit_p (load_index, i))
1494 return false;
1496 /* This permutation is valid for reduction. Since the order of the
1497 statements in the nodes is not important unless they are memory
1498 accesses, we can rearrange the statements in all the nodes
1499 according to the order of the loads. */
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1501 node->load_permutation);
1503 /* We are done, no actual permutations need to be generated. */
1504 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1507 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1508 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1509 /* But we have to keep those permutations that are required because
1510 of handling of gaps. */
1511 if (unrolling_factor == 1
1512 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1514 SLP_TREE_LOAD_PERMUTATION (node).release ();
1515 else
1516 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1517 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1520 return true;
1523 /* Check if the required load permutations in the SLP instance
1524 SLP_INSTN are supported. */
1526 static bool
1527 vect_supported_load_permutation_p (slp_instance slp_instn)
1529 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1530 unsigned int i, j, k, next;
1531 slp_tree node;
1532 gimple *stmt, *load, *next_load;
1534 if (dump_enabled_p ())
1536 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1538 if (node->load_permutation.exists ())
1539 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1540 dump_printf (MSG_NOTE, "%d ", next);
1541 else
1542 for (k = 0; k < group_size; ++k)
1543 dump_printf (MSG_NOTE, "%d ", k);
1544 dump_printf (MSG_NOTE, "\n");
1547 /* In case of reduction every load permutation is allowed, since the order
1548 of the reduction statements is not important (as opposed to the case of
1549 grouped stores). The only condition we need to check is that all the
1550 load nodes are of the same size and have the same permutation (and then
1551 rearrange all the nodes of the SLP instance according to this
1552 permutation). */
1554 /* Check that all the load nodes are of the same size. */
1555 /* ??? Can't we assert this? */
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1557 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1558 return false;
1560 node = SLP_INSTANCE_TREE (slp_instn);
1561 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1563 /* Reduction (there are no data-refs in the root).
1564 In reduction chain the order of the loads is not important. */
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1567 vect_attempt_slp_rearrange_stmts (slp_instn);
1569 /* In basic block vectorization we allow any subchain of an interleaving
1570 chain.
1571 FORNOW: not supported in loop SLP because of realignment compications. */
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1574 /* Check whether the loads in an instance form a subchain and thus
1575 no permutation is necessary. */
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1578 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1579 continue;
1580 bool subchain_p = true;
1581 next_load = NULL;
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1584 if (j != 0
1585 && (next_load != load
1586 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1588 subchain_p = false;
1589 break;
1591 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1593 if (subchain_p)
1594 SLP_TREE_LOAD_PERMUTATION (node).release ();
1595 else
1597 stmt_vec_info group_info
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1599 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1600 unsigned nunits
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1602 unsigned k, maxk = 0;
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1604 if (k > maxk)
1605 maxk = k;
1606 /* In BB vectorization we may not actually use a loaded vector
1607 accessing elements in excess of GROUP_SIZE. */
1608 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1611 "BB vectorization with gaps at the end of "
1612 "a load is not supported\n");
1613 return false;
1616 /* Verify the permutation can be generated. */
1617 vec<tree> tem;
1618 unsigned n_perms;
1619 if (!vect_transform_slp_perm_load (node, tem, NULL,
1620 1, slp_instn, true, &n_perms))
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1623 vect_location,
1624 "unsupported load permutation\n");
1625 return false;
1629 return true;
1632 /* For loop vectorization verify we can generate the permutation. Be
1633 conservative about the vectorization factor, there are permutations
1634 that will use three vector inputs only starting from a specific factor
1635 and the vectorization factor is not yet final.
1636 ??? The SLP instance unrolling factor might not be the maximum one. */
1637 unsigned n_perms;
1638 unsigned test_vf
1639 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1640 LOOP_VINFO_VECT_FACTOR
1641 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1642 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1643 if (node->load_permutation.exists ()
1644 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1645 slp_instn, true, &n_perms))
1646 return false;
1648 return true;
1652 /* Find the last store in SLP INSTANCE. */
1654 gimple *
1655 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1657 gimple *last = NULL, *stmt;
1659 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1661 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1662 if (is_pattern_stmt_p (stmt_vinfo))
1663 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1664 else
1665 last = get_later_stmt (stmt, last);
1668 return last;
1671 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1673 static void
1674 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1675 stmt_vector_for_cost *prologue_cost_vec,
1676 stmt_vector_for_cost *body_cost_vec,
1677 unsigned ncopies_for_cost)
1679 unsigned i, j;
1680 slp_tree child;
1681 gimple *stmt;
1682 stmt_vec_info stmt_info;
1683 tree lhs;
1685 /* Recurse down the SLP tree. */
1686 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1687 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1688 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1689 body_cost_vec, ncopies_for_cost);
1691 /* Look at the first scalar stmt to determine the cost. */
1692 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1693 stmt_info = vinfo_for_stmt (stmt);
1694 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1696 vect_memory_access_type memory_access_type
1697 = (STMT_VINFO_STRIDED_P (stmt_info)
1698 ? VMAT_STRIDED_SLP
1699 : VMAT_CONTIGUOUS);
1700 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1701 vect_model_store_cost (stmt_info, ncopies_for_cost,
1702 memory_access_type, vect_uninitialized_def,
1703 node, prologue_cost_vec, body_cost_vec);
1704 else
1706 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1707 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1709 /* If the load is permuted then the alignment is determined by
1710 the first group element not by the first scalar stmt DR. */
1711 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1712 stmt_info = vinfo_for_stmt (stmt);
1713 /* Record the cost for the permutation. */
1714 unsigned n_perms;
1715 vect_transform_slp_perm_load (node, vNULL, NULL,
1716 ncopies_for_cost, instance, true,
1717 &n_perms);
1718 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1719 stmt_info, 0, vect_body);
1720 unsigned nunits
1721 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1722 /* And adjust the number of loads performed. This handles
1723 redundancies as well as loads that are later dead. */
1724 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1725 bitmap_clear (perm);
1726 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1727 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1728 ncopies_for_cost = 0;
1729 bool load_seen = false;
1730 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1732 if (i % nunits == 0)
1734 if (load_seen)
1735 ncopies_for_cost++;
1736 load_seen = false;
1738 if (bitmap_bit_p (perm, i))
1739 load_seen = true;
1741 if (load_seen)
1742 ncopies_for_cost++;
1743 gcc_assert (ncopies_for_cost
1744 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1745 + nunits - 1) / nunits);
1746 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1748 /* Record the cost for the vector loads. */
1749 vect_model_load_cost (stmt_info, ncopies_for_cost,
1750 memory_access_type, node, prologue_cost_vec,
1751 body_cost_vec);
1752 return;
1755 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1757 /* ncopies_for_cost is the number of IVs we generate. */
1758 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1759 stmt_info, 0, vect_body);
1761 /* Prologue cost for the initial values and step vector. */
1762 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1763 CONSTANT_CLASS_P
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1765 (stmt_info))
1766 ? vector_load : vec_construct,
1767 stmt_info, 0, vect_prologue);
1768 record_stmt_cost (prologue_cost_vec, 1,
1769 CONSTANT_CLASS_P
1770 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1771 ? vector_load : vec_construct,
1772 stmt_info, 0, vect_prologue);
1774 /* ??? No easy way to get at the actual number of vector stmts
1775 to be geneated and thus the derived IVs. */
1777 else
1779 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1780 stmt_info, 0, vect_body);
1781 if (SLP_TREE_TWO_OPERATORS (node))
1783 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1784 stmt_info, 0, vect_body);
1785 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1786 stmt_info, 0, vect_body);
1790 /* Push SLP node def-type to stmts. */
1791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1792 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1793 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1796 /* Scan operands and account for prologue cost of constants/externals.
1797 ??? This over-estimates cost for multiple uses and should be
1798 re-engineered. */
1799 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1800 lhs = gimple_get_lhs (stmt);
1801 for (i = 0; i < gimple_num_ops (stmt); ++i)
1803 tree op = gimple_op (stmt, i);
1804 gimple *def_stmt;
1805 enum vect_def_type dt;
1806 if (!op || op == lhs)
1807 continue;
1808 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1810 /* Without looking at the actual initializer a vector of
1811 constants can be implemented as load from the constant pool.
1812 ??? We need to pass down stmt_info for a vector type
1813 even if it points to the wrong stmt. */
1814 if (dt == vect_constant_def)
1815 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1816 stmt_info, 0, vect_prologue);
1817 else if (dt == vect_external_def)
1818 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1819 stmt_info, 0, vect_prologue);
1823 /* Restore stmt def-types. */
1824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1825 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1827 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1830 /* Compute the cost for the SLP instance INSTANCE. */
1832 static void
1833 vect_analyze_slp_cost (slp_instance instance, void *data)
1835 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1836 unsigned ncopies_for_cost;
1837 stmt_info_for_cost *si;
1838 unsigned i;
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location,
1842 "=== vect_analyze_slp_cost ===\n");
1844 /* Calculate the number of vector stmts to create based on the unrolling
1845 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1846 GROUP_SIZE / NUNITS otherwise. */
1847 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1848 slp_tree node = SLP_INSTANCE_TREE (instance);
1849 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1850 /* Adjust the group_size by the vectorization factor which is always one
1851 for basic-block vectorization. */
1852 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1853 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1854 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1855 /* For reductions look at a reduction operand in case the reduction
1856 operation is widening like DOT_PROD or SAD. */
1857 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1859 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1860 switch (gimple_assign_rhs_code (stmt))
1862 case DOT_PROD_EXPR:
1863 case SAD_EXPR:
1864 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1865 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1866 break;
1867 default:;
1870 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1872 prologue_cost_vec.create (10);
1873 body_cost_vec.create (10);
1874 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1875 &prologue_cost_vec, &body_cost_vec,
1876 ncopies_for_cost);
1878 /* Record the prologue costs, which were delayed until we were
1879 sure that SLP was successful. */
1880 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1882 struct _stmt_vec_info *stmt_info
1883 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1884 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1885 si->misalign, vect_prologue);
1888 /* Record the instance's instructions in the target cost model. */
1889 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1891 struct _stmt_vec_info *stmt_info
1892 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1893 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1894 si->misalign, vect_body);
1897 prologue_cost_vec.release ();
1898 body_cost_vec.release ();
1901 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1902 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1903 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1904 containing the remainder.
1905 Return the first stmt in the second group. */
1907 static gimple *
1908 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1910 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1911 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1912 gcc_assert (group1_size > 0);
1913 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1914 gcc_assert (group2_size > 0);
1915 GROUP_SIZE (first_vinfo) = group1_size;
1917 gimple *stmt = first_stmt;
1918 for (unsigned i = group1_size; i > 1; i--)
1920 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1921 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1923 /* STMT is now the last element of the first group. */
1924 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1925 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1927 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1928 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1930 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1931 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1934 /* For the second group, the GROUP_GAP is that before the original group,
1935 plus skipping over the first vector. */
1936 GROUP_GAP (vinfo_for_stmt (group2)) =
1937 GROUP_GAP (first_vinfo) + group1_size;
1939 /* GROUP_GAP of the first group now has to skip over the second group too. */
1940 GROUP_GAP (first_vinfo) += group2_size;
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1944 group1_size, group2_size);
1946 return group2;
1949 /* Analyze an SLP instance starting from a group of grouped stores. Call
1950 vect_build_slp_tree to build a tree of packed stmts if possible.
1951 Return FALSE if it's impossible to SLP any stmt in the loop. */
1953 static bool
1954 vect_analyze_slp_instance (vec_info *vinfo,
1955 gimple *stmt, unsigned max_tree_size)
1957 slp_instance new_instance;
1958 slp_tree node;
1959 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1960 unsigned int unrolling_factor = 1, nunits;
1961 tree vectype, scalar_type = NULL_TREE;
1962 gimple *next;
1963 unsigned int i;
1964 unsigned int max_nunits = 0;
1965 vec<slp_tree> loads;
1966 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1967 vec<gimple *> scalar_stmts;
1969 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1971 if (dr)
1973 scalar_type = TREE_TYPE (DR_REF (dr));
1974 vectype = get_vectype_for_scalar_type (scalar_type);
1976 else
1978 gcc_assert (is_a <loop_vec_info> (vinfo));
1979 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1982 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1984 else
1986 gcc_assert (is_a <loop_vec_info> (vinfo));
1987 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1988 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1991 if (!vectype)
1993 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1996 "Build SLP failed: unsupported data-type ");
1997 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1998 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2001 return false;
2003 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2005 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2006 scalar_stmts.create (group_size);
2007 next = stmt;
2008 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2011 while (next)
2013 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2014 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2015 scalar_stmts.safe_push (
2016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2017 else
2018 scalar_stmts.safe_push (next);
2019 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2021 /* Mark the first element of the reduction chain as reduction to properly
2022 transform the node. In the reduction analysis phase only the last
2023 element of the chain is marked as reduction. */
2024 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2025 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2027 else
2029 /* Collect reduction statements. */
2030 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2031 for (i = 0; reductions.iterate (i, &next); i++)
2032 scalar_stmts.safe_push (next);
2035 loads.create (group_size);
2037 /* Build the tree for the SLP instance. */
2038 bool *matches = XALLOCAVEC (bool, group_size);
2039 unsigned npermutes = 0;
2040 bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
2041 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2042 &max_nunits, &loads, matches, &npermutes,
2043 NULL, max_tree_size);
2044 delete bst_fail;
2045 if (node != NULL)
2047 /* Calculate the unrolling factor based on the smallest type. */
2048 unrolling_factor
2049 = least_common_multiple (max_nunits, group_size) / group_size;
2051 if (unrolling_factor != 1
2052 && is_a <bb_vec_info> (vinfo))
2055 if (max_nunits > group_size)
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2058 "Build SLP failed: store group "
2059 "size not a multiple of the vector size "
2060 "in basic block SLP\n");
2061 vect_free_slp_tree (node);
2062 loads.release ();
2063 return false;
2065 /* Fatal mismatch. */
2066 matches[group_size/max_nunits * max_nunits] = false;
2067 vect_free_slp_tree (node);
2068 loads.release ();
2070 else
2072 /* Create a new SLP instance. */
2073 new_instance = XNEW (struct _slp_instance);
2074 SLP_INSTANCE_TREE (new_instance) = node;
2075 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2076 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2077 SLP_INSTANCE_LOADS (new_instance) = loads;
2079 /* Compute the load permutation. */
2080 slp_tree load_node;
2081 bool loads_permuted = false;
2082 FOR_EACH_VEC_ELT (loads, i, load_node)
2084 vec<unsigned> load_permutation;
2085 int j;
2086 gimple *load, *first_stmt;
2087 bool this_load_permuted = false;
2088 load_permutation.create (group_size);
2089 first_stmt = GROUP_FIRST_ELEMENT
2090 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2093 int load_place = vect_get_place_in_interleaving_chain
2094 (load, first_stmt);
2095 gcc_assert (load_place != -1);
2096 if (load_place != j)
2097 this_load_permuted = true;
2098 load_permutation.safe_push (load_place);
2100 if (!this_load_permuted
2101 /* The load requires permutation when unrolling exposes
2102 a gap either because the group is larger than the SLP
2103 group-size or because there is a gap between the groups. */
2104 && (unrolling_factor == 1
2105 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2106 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2108 load_permutation.release ();
2109 continue;
2111 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2112 loads_permuted = true;
2115 if (loads_permuted)
2117 if (!vect_supported_load_permutation_p (new_instance))
2119 if (dump_enabled_p ())
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2122 "Build SLP failed: unsupported load "
2123 "permutation ");
2124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2125 TDF_SLIM, stmt, 0);
2127 vect_free_slp_instance (new_instance);
2128 return false;
2132 /* If the loads and stores can be handled with load/store-lan
2133 instructions do not generate this SLP instance. */
2134 if (is_a <loop_vec_info> (vinfo)
2135 && loads_permuted
2136 && dr && vect_store_lanes_supported (vectype, group_size))
2138 slp_tree load_node;
2139 FOR_EACH_VEC_ELT (loads, i, load_node)
2141 gimple *first_stmt = GROUP_FIRST_ELEMENT
2142 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2144 /* Use SLP for strided accesses (or if we
2145 can't load-lanes). */
2146 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2147 || ! vect_load_lanes_supported
2148 (STMT_VINFO_VECTYPE (stmt_vinfo),
2149 GROUP_SIZE (stmt_vinfo)))
2150 break;
2152 if (i == loads.length ())
2154 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2156 "Built SLP cancelled: can use "
2157 "load/store-lanes\n");
2158 vect_free_slp_instance (new_instance);
2159 return false;
2163 vinfo->slp_instances.safe_push (new_instance);
2165 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_NOTE, vect_location,
2168 "Final SLP tree for instance:\n");
2169 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2172 return true;
2175 else
2177 /* Failed to SLP. */
2178 /* Free the allocated memory. */
2179 scalar_stmts.release ();
2180 loads.release ();
2183 /* For basic block SLP, try to break the group up into multiples of the
2184 vector size. */
2185 if (is_a <bb_vec_info> (vinfo)
2186 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2187 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2189 /* We consider breaking the group only on VF boundaries from the existing
2190 start. */
2191 for (i = 0; i < group_size; i++)
2192 if (!matches[i]) break;
2194 if (i >= nunits && i < group_size)
2196 /* Split into two groups at the first vector boundary before i. */
2197 gcc_assert ((nunits & (nunits - 1)) == 0);
2198 unsigned group1_size = i & ~(nunits - 1);
2200 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2201 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2202 /* If the first non-match was in the middle of a vector,
2203 skip the rest of that vector. */
2204 if (group1_size < i)
2206 i = group1_size + nunits;
2207 if (i < group_size)
2208 rest = vect_split_slp_store_group (rest, nunits);
2210 if (i < group_size)
2211 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2212 return res;
2214 /* Even though the first vector did not all match, we might be able to SLP
2215 (some) of the remainder. FORNOW ignore this possibility. */
2218 return false;
2222 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2223 trees of packed scalar stmts if SLP is possible. */
2225 bool
2226 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2228 unsigned int i;
2229 gimple *first_element;
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2234 /* Find SLP sequences starting from groups of grouped stores. */
2235 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2236 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2238 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2240 if (loop_vinfo->reduction_chains.length () > 0)
2242 /* Find SLP sequences starting from reduction chains. */
2243 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2244 if (! vect_analyze_slp_instance (vinfo, first_element,
2245 max_tree_size))
2247 /* Dissolve reduction chain group. */
2248 gimple *next, *stmt = first_element;
2249 while (stmt)
2251 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2252 next = GROUP_NEXT_ELEMENT (vinfo);
2253 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2254 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2255 stmt = next;
2257 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2258 = vect_internal_def;
2262 /* Find SLP sequences starting from groups of reductions. */
2263 if (loop_vinfo->reductions.length () > 1)
2264 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2265 max_tree_size);
2268 return true;
2272 /* For each possible SLP instance decide whether to SLP it and calculate overall
2273 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2274 least one instance. */
2276 bool
2277 vect_make_slp_decision (loop_vec_info loop_vinfo)
2279 unsigned int i, unrolling_factor = 1;
2280 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2281 slp_instance instance;
2282 int decided_to_slp = 0;
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2286 "\n");
2288 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2290 /* FORNOW: SLP if you can. */
2291 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2292 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2294 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2295 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2296 loop-based vectorization. Such stmts will be marked as HYBRID. */
2297 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2298 decided_to_slp++;
2301 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2303 if (decided_to_slp && dump_enabled_p ())
2304 dump_printf_loc (MSG_NOTE, vect_location,
2305 "Decided to SLP %d instances. Unrolling factor %d\n",
2306 decided_to_slp, unrolling_factor);
2308 return (decided_to_slp > 0);
2312 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2313 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2315 static void
2316 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2318 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2319 imm_use_iterator imm_iter;
2320 gimple *use_stmt;
2321 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2322 slp_tree child;
2323 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2324 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2325 int j;
2327 /* Propagate hybrid down the SLP tree. */
2328 if (stype == hybrid)
2330 else if (HYBRID_SLP_STMT (stmt_vinfo))
2331 stype = hybrid;
2332 else
2334 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2335 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2336 /* If we get a pattern stmt here we have to use the LHS of the
2337 original stmt for immediate uses. */
2338 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2339 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2340 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2341 tree def;
2342 if (gimple_code (stmt) == GIMPLE_PHI)
2343 def = gimple_phi_result (stmt);
2344 else
2345 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2346 if (def)
2347 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2349 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2350 continue;
2351 use_vinfo = vinfo_for_stmt (use_stmt);
2352 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2353 && STMT_VINFO_RELATED_STMT (use_vinfo))
2354 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2355 if (!STMT_SLP_TYPE (use_vinfo)
2356 && (STMT_VINFO_RELEVANT (use_vinfo)
2357 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2358 && !(gimple_code (use_stmt) == GIMPLE_PHI
2359 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2361 if (dump_enabled_p ())
2363 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2364 "def in non-SLP stmt: ");
2365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2367 stype = hybrid;
2372 if (stype == hybrid
2373 && !HYBRID_SLP_STMT (stmt_vinfo))
2375 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2378 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2380 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2383 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2384 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2385 vect_detect_hybrid_slp_stmts (child, i, stype);
2388 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2390 static tree
2391 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2393 walk_stmt_info *wi = (walk_stmt_info *)data;
2394 struct loop *loopp = (struct loop *)wi->info;
2396 if (wi->is_lhs)
2397 return NULL_TREE;
2399 if (TREE_CODE (*tp) == SSA_NAME
2400 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2402 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2403 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2404 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2406 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2411 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2415 return NULL_TREE;
2418 static tree
2419 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2420 walk_stmt_info *)
2422 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2423 /* If the stmt is in a SLP instance then this isn't a reason
2424 to mark use definitions in other SLP instances as hybrid. */
2425 if (! STMT_SLP_TYPE (use_vinfo)
2426 && (STMT_VINFO_RELEVANT (use_vinfo)
2427 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2428 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2429 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2431 else
2432 *handled = true;
2433 return NULL_TREE;
2436 /* Find stmts that must be both vectorized and SLPed. */
2438 void
2439 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2441 unsigned int i;
2442 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2443 slp_instance instance;
2445 if (dump_enabled_p ())
2446 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2447 "\n");
2449 /* First walk all pattern stmt in the loop and mark defs of uses as
2450 hybrid because immediate uses in them are not recorded. */
2451 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2453 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2454 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2455 gsi_next (&gsi))
2457 gimple *stmt = gsi_stmt (gsi);
2458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2459 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2461 walk_stmt_info wi;
2462 memset (&wi, 0, sizeof (wi));
2463 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2464 gimple_stmt_iterator gsi2
2465 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2466 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2467 vect_detect_hybrid_slp_1, &wi);
2468 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2469 vect_detect_hybrid_slp_2,
2470 vect_detect_hybrid_slp_1, &wi);
2475 /* Then walk the SLP instance trees marking stmts with uses in
2476 non-SLP stmts as hybrid, also propagating hybrid down the
2477 SLP tree, collecting the above info on-the-fly. */
2478 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2480 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2481 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2482 i, pure_slp);
2487 /* Initialize a bb_vec_info struct for the statements between
2488 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2490 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2491 gimple_stmt_iterator region_end_in)
2492 : vec_info (vec_info::bb, init_cost (NULL)),
2493 bb (gsi_bb (region_begin_in)),
2494 region_begin (region_begin_in),
2495 region_end (region_end_in)
2497 gimple_stmt_iterator gsi;
2499 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2500 gsi_next (&gsi))
2502 gimple *stmt = gsi_stmt (gsi);
2503 gimple_set_uid (stmt, 0);
2504 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2507 bb->aux = this;
2511 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2512 stmts in the basic block. */
2514 _bb_vec_info::~_bb_vec_info ()
2516 for (gimple_stmt_iterator si = region_begin;
2517 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2519 gimple *stmt = gsi_stmt (si);
2520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2522 if (stmt_info)
2523 /* Free stmt_vec_info. */
2524 free_stmt_vec_info (stmt);
2526 /* Reset region marker. */
2527 gimple_set_uid (stmt, -1);
2530 bb->aux = NULL;
2534 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2535 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2537 Return true if the operations are supported. */
2539 static bool
2540 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2541 slp_instance node_instance)
2543 bool dummy;
2544 int i, j;
2545 gimple *stmt;
2546 slp_tree child;
2548 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2549 return true;
2551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2552 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2553 return false;
2555 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2557 gcc_assert (stmt_info);
2558 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2560 /* For BB vectorization vector types are assigned here.
2561 Memory accesses already got their vector type assigned
2562 in vect_analyze_data_refs. */
2563 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2564 if (bb_vinfo
2565 && ! STMT_VINFO_DATA_REF (stmt_info))
2567 gcc_assert (PURE_SLP_STMT (stmt_info));
2569 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2570 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_NOTE, vect_location,
2573 "get vectype for scalar type: ");
2574 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2575 dump_printf (MSG_NOTE, "\n");
2578 tree vectype = get_vectype_for_scalar_type (scalar_type);
2579 if (!vectype)
2581 if (dump_enabled_p ())
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2584 "not SLPed: unsupported data-type ");
2585 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2586 scalar_type);
2587 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2589 return false;
2592 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2595 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2596 dump_printf (MSG_NOTE, "\n");
2599 gimple *sstmt;
2600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2601 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2604 /* Calculate the number of vector statements to be created for the
2605 scalar stmts in this node. For SLP reductions it is equal to the
2606 number of vector statements in the children (which has already been
2607 calculated by the recursive call). Otherwise it is the number of
2608 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2609 VF divided by the number of elements in a vector. */
2610 if (GROUP_FIRST_ELEMENT (stmt_info)
2611 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2612 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2613 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2614 else
2616 int vf;
2617 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2618 vf = loop_vinfo->vectorization_factor;
2619 else
2620 vf = 1;
2621 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2623 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2624 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype);
2627 /* Push SLP node def-type to stmt operands. */
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2629 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2631 = SLP_TREE_DEF_TYPE (child);
2632 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2633 /* Restore def-types. */
2634 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2635 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2636 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2637 = vect_internal_def;
2638 if (! res)
2639 return false;
2641 return true;
2645 /* Analyze statements in SLP instances of VINFO. Return true if the
2646 operations are supported. */
2648 bool
2649 vect_slp_analyze_operations (vec_info *vinfo)
2651 slp_instance instance;
2652 int i;
2654 if (dump_enabled_p ())
2655 dump_printf_loc (MSG_NOTE, vect_location,
2656 "=== vect_slp_analyze_operations ===\n");
2658 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2660 if (!vect_slp_analyze_node_operations (vinfo,
2661 SLP_INSTANCE_TREE (instance),
2662 instance))
2664 dump_printf_loc (MSG_NOTE, vect_location,
2665 "removing SLP instance operations starting from: ");
2666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2667 SLP_TREE_SCALAR_STMTS
2668 (SLP_INSTANCE_TREE (instance))[0], 0);
2669 vect_free_slp_instance (instance);
2670 vinfo->slp_instances.ordered_remove (i);
2672 else
2674 /* Compute the costs of the SLP instance. */
2675 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2676 i++;
2680 return !vinfo->slp_instances.is_empty ();
2684 /* Compute the scalar cost of the SLP node NODE and its children
2685 and return it. Do not account defs that are marked in LIFE and
2686 update LIFE according to uses of NODE. */
2688 static unsigned
2689 vect_bb_slp_scalar_cost (basic_block bb,
2690 slp_tree node, vec<bool, va_heap> *life)
2692 unsigned scalar_cost = 0;
2693 unsigned i;
2694 gimple *stmt;
2695 slp_tree child;
2697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2699 unsigned stmt_cost;
2700 ssa_op_iter op_iter;
2701 def_operand_p def_p;
2702 stmt_vec_info stmt_info;
2704 if ((*life)[i])
2705 continue;
2707 /* If there is a non-vectorized use of the defs then the scalar
2708 stmt is kept live in which case we do not account it or any
2709 required defs in the SLP children in the scalar cost. This
2710 way we make the vectorization more costly when compared to
2711 the scalar cost. */
2712 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2714 imm_use_iterator use_iter;
2715 gimple *use_stmt;
2716 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2717 if (!is_gimple_debug (use_stmt)
2718 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2719 use_stmt)
2720 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2722 (*life)[i] = true;
2723 BREAK_FROM_IMM_USE_STMT (use_iter);
2726 if ((*life)[i])
2727 continue;
2729 /* Count scalar stmts only once. */
2730 if (gimple_visited_p (stmt))
2731 continue;
2732 gimple_set_visited (stmt, true);
2734 stmt_info = vinfo_for_stmt (stmt);
2735 if (STMT_VINFO_DATA_REF (stmt_info))
2737 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2738 stmt_cost = vect_get_stmt_cost (scalar_load);
2739 else
2740 stmt_cost = vect_get_stmt_cost (scalar_store);
2742 else
2743 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2745 scalar_cost += stmt_cost;
2748 auto_vec<bool, 20> subtree_life;
2749 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2751 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2753 /* Do not directly pass LIFE to the recursive call, copy it to
2754 confine changes in the callee to the current child/subtree. */
2755 subtree_life.safe_splice (*life);
2756 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2757 subtree_life.truncate (0);
2761 return scalar_cost;
2764 /* Check if vectorization of the basic block is profitable. */
2766 static bool
2767 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2769 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2770 slp_instance instance;
2771 int i;
2772 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2773 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2775 /* Calculate scalar cost. */
2776 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2778 auto_vec<bool, 20> life;
2779 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2780 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2781 SLP_INSTANCE_TREE (instance),
2782 &life);
2785 /* Unset visited flag. */
2786 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2787 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2788 gimple_set_visited (gsi_stmt (gsi), false);
2790 /* Complete the target-specific cost calculation. */
2791 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2792 &vec_inside_cost, &vec_epilogue_cost);
2794 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2796 if (dump_enabled_p ())
2798 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2799 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2800 vec_inside_cost);
2801 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2802 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2803 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2806 /* Vectorization is profitable if its cost is more than the cost of scalar
2807 version. Note that we err on the vector side for equal cost because
2808 the cost estimate is otherwise quite pessimistic (constant uses are
2809 free on the scalar side but cost a load on the vector side for
2810 example). */
2811 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2812 return false;
2814 return true;
2817 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2818 if so and sets fatal to true if failure is independent of
2819 current_vector_size. */
2821 static bb_vec_info
2822 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2823 gimple_stmt_iterator region_end,
2824 vec<data_reference_p> datarefs, int n_stmts,
2825 bool &fatal)
2827 bb_vec_info bb_vinfo;
2828 slp_instance instance;
2829 int i;
2830 int min_vf = 2;
2832 /* The first group of checks is independent of the vector size. */
2833 fatal = true;
2835 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2839 "not vectorized: too many instructions in "
2840 "basic block.\n");
2841 free_data_refs (datarefs);
2842 return NULL;
2845 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2846 if (!bb_vinfo)
2847 return NULL;
2849 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2851 /* Analyze the data references. */
2853 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2857 "not vectorized: unhandled data-ref in basic "
2858 "block.\n");
2860 delete bb_vinfo;
2861 return NULL;
2864 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2868 "not vectorized: not enough data-refs in "
2869 "basic block.\n");
2871 delete bb_vinfo;
2872 return NULL;
2875 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2877 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2879 "not vectorized: unhandled data access in "
2880 "basic block.\n");
2882 delete bb_vinfo;
2883 return NULL;
2886 /* If there are no grouped stores in the region there is no need
2887 to continue with pattern recog as vect_analyze_slp will fail
2888 anyway. */
2889 if (bb_vinfo->grouped_stores.is_empty ())
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "not vectorized: no grouped stores in "
2894 "basic block.\n");
2896 delete bb_vinfo;
2897 return NULL;
2900 /* While the rest of the analysis below depends on it in some way. */
2901 fatal = false;
2903 vect_pattern_recog (bb_vinfo);
2905 /* Check the SLP opportunities in the basic block, analyze and build SLP
2906 trees. */
2907 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2909 if (dump_enabled_p ())
2911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2912 "Failed to SLP the basic block.\n");
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2914 "not vectorized: failed to find SLP opportunities "
2915 "in basic block.\n");
2918 delete bb_vinfo;
2919 return NULL;
2922 vect_record_base_alignments (bb_vinfo);
2924 /* Analyze and verify the alignment of data references and the
2925 dependence in the SLP instances. */
2926 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2929 || ! vect_slp_analyze_instance_dependence (instance))
2931 dump_printf_loc (MSG_NOTE, vect_location,
2932 "removing SLP instance operations starting from: ");
2933 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2934 SLP_TREE_SCALAR_STMTS
2935 (SLP_INSTANCE_TREE (instance))[0], 0);
2936 vect_free_slp_instance (instance);
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2938 continue;
2941 /* Mark all the statements that we want to vectorize as pure SLP and
2942 relevant. */
2943 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2944 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2946 i++;
2948 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2950 delete bb_vinfo;
2951 return NULL;
2954 if (!vect_slp_analyze_operations (bb_vinfo))
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "not vectorized: bad operation in basic block.\n");
2960 delete bb_vinfo;
2961 return NULL;
2964 /* Cost model: check if the vectorization is worthwhile. */
2965 if (!unlimited_cost_model (NULL)
2966 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "not vectorized: vectorization is not "
2971 "profitable.\n");
2973 delete bb_vinfo;
2974 return NULL;
2977 if (dump_enabled_p ())
2978 dump_printf_loc (MSG_NOTE, vect_location,
2979 "Basic block will be vectorized using SLP\n");
2981 return bb_vinfo;
2985 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2986 true if anything in the basic-block was vectorized. */
2988 bool
2989 vect_slp_bb (basic_block bb)
2991 bb_vec_info bb_vinfo;
2992 gimple_stmt_iterator gsi;
2993 unsigned int vector_sizes;
2994 bool any_vectorized = false;
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2999 /* Autodetect first vector size we try. */
3000 current_vector_size = 0;
3001 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3003 gsi = gsi_start_bb (bb);
3005 while (1)
3007 if (gsi_end_p (gsi))
3008 break;
3010 gimple_stmt_iterator region_begin = gsi;
3011 vec<data_reference_p> datarefs = vNULL;
3012 int insns = 0;
3014 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3016 gimple *stmt = gsi_stmt (gsi);
3017 if (is_gimple_debug (stmt))
3018 continue;
3019 insns++;
3021 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3022 vect_location = gimple_location (stmt);
3024 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3025 break;
3028 /* Skip leading unhandled stmts. */
3029 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3031 gsi_next (&gsi);
3032 continue;
3035 gimple_stmt_iterator region_end = gsi;
3037 bool vectorized = false;
3038 bool fatal = false;
3039 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3040 datarefs, insns, fatal);
3041 if (bb_vinfo
3042 && dbg_cnt (vect_slp))
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3047 vect_schedule_slp (bb_vinfo);
3049 if (dump_enabled_p ())
3050 dump_printf_loc (MSG_NOTE, vect_location,
3051 "basic block part vectorized\n");
3053 vectorized = true;
3055 delete bb_vinfo;
3057 any_vectorized |= vectorized;
3059 vector_sizes &= ~current_vector_size;
3060 if (vectorized
3061 || vector_sizes == 0
3062 || current_vector_size == 0
3063 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3064 vector sizes will fail do not bother iterating. */
3065 || fatal)
3067 if (gsi_end_p (region_end))
3068 break;
3070 /* Skip the unhandled stmt. */
3071 gsi_next (&gsi);
3073 /* And reset vector sizes. */
3074 current_vector_size = 0;
3075 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3077 else
3079 /* Try the next biggest vector size. */
3080 current_vector_size = 1 << floor_log2 (vector_sizes);
3081 if (dump_enabled_p ())
3082 dump_printf_loc (MSG_NOTE, vect_location,
3083 "***** Re-trying analysis with "
3084 "vector size %d\n", current_vector_size);
3086 /* Start over. */
3087 gsi = region_begin;
3091 return any_vectorized;
3095 /* Return 1 if vector type of boolean constant which is OPNUM
3096 operand in statement STMT is a boolean vector. */
3098 static bool
3099 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3101 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3102 enum tree_code code = gimple_expr_code (stmt);
3103 tree op, vectype;
3104 gimple *def_stmt;
3105 enum vect_def_type dt;
3107 /* For comparison and COND_EXPR type is chosen depending
3108 on the other comparison operand. */
3109 if (TREE_CODE_CLASS (code) == tcc_comparison)
3111 if (opnum)
3112 op = gimple_assign_rhs1 (stmt);
3113 else
3114 op = gimple_assign_rhs2 (stmt);
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3117 &dt, &vectype))
3118 gcc_unreachable ();
3120 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3123 if (code == COND_EXPR)
3125 tree cond = gimple_assign_rhs1 (stmt);
3127 if (TREE_CODE (cond) == SSA_NAME)
3128 op = cond;
3129 else if (opnum)
3130 op = TREE_OPERAND (cond, 1);
3131 else
3132 op = TREE_OPERAND (cond, 0);
3134 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3135 &dt, &vectype))
3136 gcc_unreachable ();
3138 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3141 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3145 /* For constant and loop invariant defs of SLP_NODE this function returns
3146 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3147 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3148 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3149 REDUC_INDEX is the index of the reduction operand in the statements, unless
3150 it is -1. */
3152 static void
3153 vect_get_constant_vectors (tree op, slp_tree slp_node,
3154 vec<tree> *vec_oprnds,
3155 unsigned int op_num, unsigned int number_of_vectors)
3157 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3158 gimple *stmt = stmts[0];
3159 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3160 unsigned nunits;
3161 tree vec_cst;
3162 unsigned j, number_of_places_left_in_vector;
3163 tree vector_type;
3164 tree vop;
3165 int group_size = stmts.length ();
3166 unsigned int vec_num, i;
3167 unsigned number_of_copies = 1;
3168 vec<tree> voprnds;
3169 voprnds.create (number_of_vectors);
3170 bool constant_p, is_store;
3171 tree neutral_op = NULL;
3172 enum tree_code code = gimple_expr_code (stmt);
3173 gimple_seq ctor_seq = NULL;
3175 /* Check if vector type is a boolean vector. */
3176 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3177 && vect_mask_constant_operand_p (stmt, op_num))
3178 vector_type
3179 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3180 else
3181 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3182 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3184 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3186 is_store = true;
3187 op = gimple_assign_rhs1 (stmt);
3189 else
3190 is_store = false;
3192 gcc_assert (op);
3194 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3195 created vectors. It is greater than 1 if unrolling is performed.
3197 For example, we have two scalar operands, s1 and s2 (e.g., group of
3198 strided accesses of size two), while NUNITS is four (i.e., four scalars
3199 of this type can be packed in a vector). The output vector will contain
3200 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3201 will be 2).
3203 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3204 containing the operands.
3206 For example, NUNITS is four as before, and the group size is 8
3207 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3208 {s5, s6, s7, s8}. */
3210 number_of_copies = nunits * number_of_vectors / group_size;
3212 number_of_places_left_in_vector = nunits;
3213 constant_p = true;
3214 auto_vec<tree, 32> elts (nunits);
3215 elts.quick_grow (nunits);
3216 bool place_after_defs = false;
3217 for (j = 0; j < number_of_copies; j++)
3219 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3221 if (is_store)
3222 op = gimple_assign_rhs1 (stmt);
3223 else
3225 switch (code)
3227 case COND_EXPR:
3229 tree cond = gimple_assign_rhs1 (stmt);
3230 if (TREE_CODE (cond) == SSA_NAME)
3231 op = gimple_op (stmt, op_num + 1);
3232 else if (op_num == 0 || op_num == 1)
3233 op = TREE_OPERAND (cond, op_num);
3234 else
3236 if (op_num == 2)
3237 op = gimple_assign_rhs2 (stmt);
3238 else
3239 op = gimple_assign_rhs3 (stmt);
3242 break;
3244 case CALL_EXPR:
3245 op = gimple_call_arg (stmt, op_num);
3246 break;
3248 case LSHIFT_EXPR:
3249 case RSHIFT_EXPR:
3250 case LROTATE_EXPR:
3251 case RROTATE_EXPR:
3252 op = gimple_op (stmt, op_num + 1);
3253 /* Unlike the other binary operators, shifts/rotates have
3254 the shift count being int, instead of the same type as
3255 the lhs, so make sure the scalar is the right type if
3256 we are dealing with vectors of
3257 long long/long/short/char. */
3258 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3259 op = fold_convert (TREE_TYPE (vector_type), op);
3260 break;
3262 default:
3263 op = gimple_op (stmt, op_num + 1);
3264 break;
3268 /* Create 'vect_ = {op0,op1,...,opn}'. */
3269 number_of_places_left_in_vector--;
3270 tree orig_op = op;
3271 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3273 if (CONSTANT_CLASS_P (op))
3275 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3277 /* Can't use VIEW_CONVERT_EXPR for booleans because
3278 of possibly different sizes of scalar value and
3279 vector element. */
3280 if (integer_zerop (op))
3281 op = build_int_cst (TREE_TYPE (vector_type), 0);
3282 else if (integer_onep (op))
3283 op = build_all_ones_cst (TREE_TYPE (vector_type));
3284 else
3285 gcc_unreachable ();
3287 else
3288 op = fold_unary (VIEW_CONVERT_EXPR,
3289 TREE_TYPE (vector_type), op);
3290 gcc_assert (op && CONSTANT_CLASS_P (op));
3292 else
3294 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3295 gimple *init_stmt;
3296 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3298 tree true_val
3299 = build_all_ones_cst (TREE_TYPE (vector_type));
3300 tree false_val
3301 = build_zero_cst (TREE_TYPE (vector_type));
3302 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3303 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3304 op, true_val,
3305 false_val);
3307 else
3309 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3310 op);
3311 init_stmt
3312 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3313 op);
3315 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3316 op = new_temp;
3319 elts[number_of_places_left_in_vector] = op;
3320 if (!CONSTANT_CLASS_P (op))
3321 constant_p = false;
3322 if (TREE_CODE (orig_op) == SSA_NAME
3323 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3324 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3325 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3326 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3327 place_after_defs = true;
3329 if (number_of_places_left_in_vector == 0)
3331 if (constant_p)
3332 vec_cst = build_vector (vector_type, elts);
3333 else
3335 vec<constructor_elt, va_gc> *v;
3336 unsigned k;
3337 vec_alloc (v, nunits);
3338 for (k = 0; k < nunits; ++k)
3339 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3340 vec_cst = build_constructor (vector_type, v);
3342 tree init;
3343 gimple_stmt_iterator gsi;
3344 if (place_after_defs)
3346 gsi = gsi_for_stmt
3347 (vect_find_last_scalar_stmt_in_slp (slp_node));
3348 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3350 else
3351 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3352 if (ctor_seq != NULL)
3354 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3355 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3356 GSI_SAME_STMT);
3357 ctor_seq = NULL;
3359 voprnds.quick_push (init);
3360 place_after_defs = false;
3361 number_of_places_left_in_vector = nunits;
3362 constant_p = true;
3367 /* Since the vectors are created in the reverse order, we should invert
3368 them. */
3369 vec_num = voprnds.length ();
3370 for (j = vec_num; j != 0; j--)
3372 vop = voprnds[j - 1];
3373 vec_oprnds->quick_push (vop);
3376 voprnds.release ();
3378 /* In case that VF is greater than the unrolling factor needed for the SLP
3379 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3380 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3381 to replicate the vectors. */
3382 while (number_of_vectors > vec_oprnds->length ())
3384 tree neutral_vec = NULL;
3386 if (neutral_op)
3388 if (!neutral_vec)
3389 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3391 vec_oprnds->quick_push (neutral_vec);
3393 else
3395 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3396 vec_oprnds->quick_push (vop);
3402 /* Get vectorized definitions from SLP_NODE that contains corresponding
3403 vectorized def-stmts. */
3405 static void
3406 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3408 tree vec_oprnd;
3409 gimple *vec_def_stmt;
3410 unsigned int i;
3412 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3414 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3416 gcc_assert (vec_def_stmt);
3417 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3418 vec_oprnd = gimple_phi_result (vec_def_stmt);
3419 else
3420 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3421 vec_oprnds->quick_push (vec_oprnd);
3426 /* Get vectorized definitions for SLP_NODE.
3427 If the scalar definitions are loop invariants or constants, collect them and
3428 call vect_get_constant_vectors() to create vector stmts.
3429 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3430 must be stored in the corresponding child of SLP_NODE, and we call
3431 vect_get_slp_vect_defs () to retrieve them. */
3433 void
3434 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3435 vec<vec<tree> > *vec_oprnds)
3437 gimple *first_stmt;
3438 int number_of_vects = 0, i;
3439 unsigned int child_index = 0;
3440 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3441 slp_tree child = NULL;
3442 vec<tree> vec_defs;
3443 tree oprnd;
3444 bool vectorized_defs;
3446 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3447 FOR_EACH_VEC_ELT (ops, i, oprnd)
3449 /* For each operand we check if it has vectorized definitions in a child
3450 node or we need to create them (for invariants and constants). We
3451 check if the LHS of the first stmt of the next child matches OPRND.
3452 If it does, we found the correct child. Otherwise, we call
3453 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3454 to check this child node for the next operand. */
3455 vectorized_defs = false;
3456 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3458 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3460 /* We have to check both pattern and original def, if available. */
3461 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3463 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3464 gimple *related
3465 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3466 tree first_def_op;
3468 if (gimple_code (first_def) == GIMPLE_PHI)
3469 first_def_op = gimple_phi_result (first_def);
3470 else
3471 first_def_op = gimple_get_lhs (first_def);
3472 if (operand_equal_p (oprnd, first_def_op, 0)
3473 || (related
3474 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3476 /* The number of vector defs is determined by the number of
3477 vector statements in the node from which we get those
3478 statements. */
3479 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3480 vectorized_defs = true;
3481 child_index++;
3484 else
3485 child_index++;
3488 if (!vectorized_defs)
3490 if (i == 0)
3492 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3493 /* Number of vector stmts was calculated according to LHS in
3494 vect_schedule_slp_instance (), fix it by replacing LHS with
3495 RHS, if necessary. See vect_get_smallest_scalar_type () for
3496 details. */
3497 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3498 &rhs_size_unit);
3499 if (rhs_size_unit != lhs_size_unit)
3501 number_of_vects *= rhs_size_unit;
3502 number_of_vects /= lhs_size_unit;
3507 /* Allocate memory for vectorized defs. */
3508 vec_defs = vNULL;
3509 vec_defs.create (number_of_vects);
3511 /* For reduction defs we call vect_get_constant_vectors (), since we are
3512 looking for initial loop invariant values. */
3513 if (vectorized_defs)
3514 /* The defs are already vectorized. */
3515 vect_get_slp_vect_defs (child, &vec_defs);
3516 else
3517 /* Build vectors from scalar defs. */
3518 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3519 number_of_vects);
3521 vec_oprnds->quick_push (vec_defs);
3525 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3526 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3527 permute statements for the SLP node NODE of the SLP instance
3528 SLP_NODE_INSTANCE. */
3530 bool
3531 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3532 gimple_stmt_iterator *gsi, int vf,
3533 slp_instance slp_node_instance, bool analyze_only,
3534 unsigned *n_perms)
3536 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3538 tree mask_element_type = NULL_TREE, mask_type;
3539 int nunits, vec_index = 0;
3540 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3541 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3542 int mask_element;
3543 machine_mode mode;
3545 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3546 return false;
3548 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3550 mode = TYPE_MODE (vectype);
3552 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3553 same size as the vector element being permuted. */
3554 mask_element_type = lang_hooks.types.type_for_mode
3555 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3556 mask_type = get_vectype_for_scalar_type (mask_element_type);
3557 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3558 auto_vec_perm_indices mask (nunits);
3559 mask.quick_grow (nunits);
3561 /* Initialize the vect stmts of NODE to properly insert the generated
3562 stmts later. */
3563 if (! analyze_only)
3564 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3565 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3566 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3568 /* Generate permutation masks for every NODE. Number of masks for each NODE
3569 is equal to GROUP_SIZE.
3570 E.g., we have a group of three nodes with three loads from the same
3571 location in each node, and the vector size is 4. I.e., we have a
3572 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3573 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3574 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3577 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3578 The last mask is illegal since we assume two operands for permute
3579 operation, and the mask element values can't be outside that range.
3580 Hence, the last mask must be converted into {2,5,5,5}.
3581 For the first two permutations we need the first and the second input
3582 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3583 we need the second and the third vectors: {b1,c1,a2,b2} and
3584 {c2,a3,b3,c3}. */
3586 int vect_stmts_counter = 0;
3587 int index = 0;
3588 int first_vec_index = -1;
3589 int second_vec_index = -1;
3590 bool noop_p = true;
3591 *n_perms = 0;
3593 for (int j = 0; j < vf; j++)
3595 for (int k = 0; k < group_size; k++)
3597 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3598 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3599 vec_index = i / nunits;
3600 mask_element = i % nunits;
3601 if (vec_index == first_vec_index
3602 || first_vec_index == -1)
3604 first_vec_index = vec_index;
3606 else if (vec_index == second_vec_index
3607 || second_vec_index == -1)
3609 second_vec_index = vec_index;
3610 mask_element += nunits;
3612 else
3614 if (dump_enabled_p ())
3616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3617 "permutation requires at "
3618 "least three vectors ");
3619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3620 stmt, 0);
3622 gcc_assert (analyze_only);
3623 return false;
3626 gcc_assert (mask_element >= 0
3627 && mask_element < 2 * nunits);
3628 if (mask_element != index)
3629 noop_p = false;
3630 mask[index++] = mask_element;
3632 if (index == nunits)
3634 if (! noop_p
3635 && ! can_vec_perm_p (mode, false, &mask))
3637 if (dump_enabled_p ())
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3640 vect_location,
3641 "unsupported vect permute { ");
3642 for (i = 0; i < nunits; ++i)
3643 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3644 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3646 gcc_assert (analyze_only);
3647 return false;
3650 if (! noop_p)
3651 ++*n_perms;
3653 if (!analyze_only)
3655 tree mask_vec = NULL_TREE;
3657 if (! noop_p)
3659 auto_vec<tree, 32> mask_elts (nunits);
3660 for (int l = 0; l < nunits; ++l)
3661 mask_elts.quick_push (build_int_cst (mask_element_type,
3662 mask[l]));
3663 mask_vec = build_vector (mask_type, mask_elts);
3666 if (second_vec_index == -1)
3667 second_vec_index = first_vec_index;
3669 /* Generate the permute statement if necessary. */
3670 tree first_vec = dr_chain[first_vec_index];
3671 tree second_vec = dr_chain[second_vec_index];
3672 gimple *perm_stmt;
3673 if (! noop_p)
3675 tree perm_dest
3676 = vect_create_destination_var (gimple_assign_lhs (stmt),
3677 vectype);
3678 perm_dest = make_ssa_name (perm_dest);
3679 perm_stmt = gimple_build_assign (perm_dest,
3680 VEC_PERM_EXPR,
3681 first_vec, second_vec,
3682 mask_vec);
3683 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3685 else
3686 /* If mask was NULL_TREE generate the requested
3687 identity transform. */
3688 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3690 /* Store the vector statement in NODE. */
3691 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3694 index = 0;
3695 first_vec_index = -1;
3696 second_vec_index = -1;
3697 noop_p = true;
3702 return true;
3707 /* Vectorize SLP instance tree in postorder. */
3709 static bool
3710 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
3712 gimple *stmt;
3713 bool grouped_store, is_store;
3714 gimple_stmt_iterator si;
3715 stmt_vec_info stmt_info;
3716 unsigned int group_size;
3717 tree vectype;
3718 int i, j;
3719 slp_tree child;
3721 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3722 return false;
3724 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3725 vect_schedule_slp_instance (child, instance);
3727 /* Push SLP node def-type to stmts. */
3728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3729 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3730 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3731 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3733 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3734 stmt_info = vinfo_for_stmt (stmt);
3736 /* VECTYPE is the type of the destination. */
3737 vectype = STMT_VINFO_VECTYPE (stmt_info);
3738 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3740 if (!SLP_TREE_VEC_STMTS (node).exists ())
3741 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3743 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_NOTE,vect_location,
3746 "------>vectorizing SLP node starting from: ");
3747 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3750 /* Vectorized stmts go before the last scalar stmt which is where
3751 all uses are ready. */
3752 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3754 /* Mark the first element of the reduction chain as reduction to properly
3755 transform the node. In the analysis phase only the last element of the
3756 chain is marked as reduction. */
3757 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3758 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3760 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3761 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3764 /* Handle two-operation SLP nodes by vectorizing the group with
3765 both operations and then performing a merge. */
3766 if (SLP_TREE_TWO_OPERATORS (node))
3768 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3769 enum tree_code ocode = ERROR_MARK;
3770 gimple *ostmt;
3771 auto_vec_perm_indices mask (group_size);
3772 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3773 if (gimple_assign_rhs_code (ostmt) != code0)
3775 mask.quick_push (1);
3776 ocode = gimple_assign_rhs_code (ostmt);
3778 else
3779 mask.quick_push (0);
3780 if (ocode != ERROR_MARK)
3782 vec<gimple *> v0;
3783 vec<gimple *> v1;
3784 unsigned j;
3785 tree tmask = NULL_TREE;
3786 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3787 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3788 SLP_TREE_VEC_STMTS (node).truncate (0);
3789 gimple_assign_set_rhs_code (stmt, ocode);
3790 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3791 gimple_assign_set_rhs_code (stmt, code0);
3792 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3793 SLP_TREE_VEC_STMTS (node).truncate (0);
3794 tree meltype = build_nonstandard_integer_type
3795 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3796 tree mvectype = get_same_sized_vectype (meltype, vectype);
3797 unsigned k = 0, l;
3798 for (j = 0; j < v0.length (); ++j)
3800 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3801 auto_vec<tree, 32> melts (nunits);
3802 for (l = 0; l < nunits; ++l)
3804 if (k >= group_size)
3805 k = 0;
3806 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3807 melts.quick_push (t);
3809 tmask = build_vector (mvectype, melts);
3811 /* ??? Not all targets support a VEC_PERM_EXPR with a
3812 constant mask that would translate to a vec_merge RTX
3813 (with their vec_perm_const_ok). We can either not
3814 vectorize in that case or let veclower do its job.
3815 Unfortunately that isn't too great and at least for
3816 plus/minus we'd eventually like to match targets
3817 vector addsub instructions. */
3818 gimple *vstmt;
3819 vstmt = gimple_build_assign (make_ssa_name (vectype),
3820 VEC_PERM_EXPR,
3821 gimple_assign_lhs (v0[j]),
3822 gimple_assign_lhs (v1[j]), tmask);
3823 vect_finish_stmt_generation (stmt, vstmt, &si);
3824 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3826 v0.release ();
3827 v1.release ();
3828 return false;
3831 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3833 /* Restore stmt def-types. */
3834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3835 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3839 return is_store;
3842 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3843 For loop vectorization this is done in vectorizable_call, but for SLP
3844 it needs to be deferred until end of vect_schedule_slp, because multiple
3845 SLP instances may refer to the same scalar stmt. */
3847 static void
3848 vect_remove_slp_scalar_calls (slp_tree node)
3850 gimple *stmt, *new_stmt;
3851 gimple_stmt_iterator gsi;
3852 int i;
3853 slp_tree child;
3854 tree lhs;
3855 stmt_vec_info stmt_info;
3857 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3858 return;
3860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3861 vect_remove_slp_scalar_calls (child);
3863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3865 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3866 continue;
3867 stmt_info = vinfo_for_stmt (stmt);
3868 if (stmt_info == NULL
3869 || is_pattern_stmt_p (stmt_info)
3870 || !PURE_SLP_STMT (stmt_info))
3871 continue;
3872 lhs = gimple_call_lhs (stmt);
3873 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3874 set_vinfo_for_stmt (new_stmt, stmt_info);
3875 set_vinfo_for_stmt (stmt, NULL);
3876 STMT_VINFO_STMT (stmt_info) = new_stmt;
3877 gsi = gsi_for_stmt (stmt);
3878 gsi_replace (&gsi, new_stmt, false);
3879 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3883 /* Generate vector code for all SLP instances in the loop/basic block. */
3885 bool
3886 vect_schedule_slp (vec_info *vinfo)
3888 vec<slp_instance> slp_instances;
3889 slp_instance instance;
3890 unsigned int i;
3891 bool is_store = false;
3893 slp_instances = vinfo->slp_instances;
3894 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3896 /* Schedule the tree of INSTANCE. */
3897 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3898 instance);
3899 if (dump_enabled_p ())
3900 dump_printf_loc (MSG_NOTE, vect_location,
3901 "vectorizing stmts using SLP.\n");
3904 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3906 slp_tree root = SLP_INSTANCE_TREE (instance);
3907 gimple *store;
3908 unsigned int j;
3909 gimple_stmt_iterator gsi;
3911 /* Remove scalar call stmts. Do not do this for basic-block
3912 vectorization as not all uses may be vectorized.
3913 ??? Why should this be necessary? DCE should be able to
3914 remove the stmts itself.
3915 ??? For BB vectorization we can as well remove scalar
3916 stmts starting from the SLP tree root if they have no
3917 uses. */
3918 if (is_a <loop_vec_info> (vinfo))
3919 vect_remove_slp_scalar_calls (root);
3921 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3922 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3924 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3925 break;
3927 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3928 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3929 /* Free the attached stmt_vec_info and remove the stmt. */
3930 gsi = gsi_for_stmt (store);
3931 unlink_stmt_vdef (store);
3932 gsi_remove (&gsi, true);
3933 release_defs (store);
3934 free_stmt_vec_info (store);
3938 return is_store;