PR libstdc++/79162 Fix std::string regression due to LWG 2946 (old ABI)
[official-gcc.git] / gcc / tree-vect-slp.c
blob5e40a3741809ab5ff182fc5e4585111727860461
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
488 as for vect_build_slp_tree. */
490 static bool
491 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
492 tree vectype, unsigned int *max_nunits)
494 if (!vectype)
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "Build SLP failed: unsupported data-type in ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
503 /* Fatal mismatch. */
504 return false;
507 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a <bb_vec_info> (vinfo)
510 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: unrolling required "
514 "in basic block SLP\n");
515 /* Fatal mismatch. */
516 return false;
519 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
521 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
523 return true;
526 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES
529 which stmts are not isomorphic to the first one. If MATCHES[0]
530 is false then this indicates the comparison could not be
531 carried out or the stmts will never be vectorized by SLP.
533 Note COND_EXPR is possibly ismorphic to another one after swapping its
534 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
535 the first stmt by swapping the two operands of comparison; set SWAP[i]
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
540 static bool
541 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
542 vec<gimple *> stmts, unsigned int group_size,
543 unsigned nops, unsigned int *max_nunits,
544 bool *matches, bool *two_operators)
546 unsigned int i;
547 gimple *first_stmt = stmts[0], *stmt = stmts[0];
548 enum tree_code first_stmt_code = ERROR_MARK;
549 enum tree_code alt_stmt_code = ERROR_MARK;
550 enum tree_code rhs_code = ERROR_MARK;
551 enum tree_code first_cond_code = ERROR_MARK;
552 tree lhs;
553 bool need_same_oprnds = false;
554 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
555 optab optab;
556 int icode;
557 machine_mode optab_op2_mode;
558 machine_mode vec_mode;
559 HOST_WIDE_INT dummy;
560 gimple *first_load = NULL, *prev_first_load = NULL;
562 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts, i, stmt)
565 swap[i] = 0;
566 matches[i] = false;
568 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
574 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
577 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580 "Build SLP failed: unvectorizable statement ");
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
583 /* Fatal mismatch. */
584 matches[0] = false;
585 return false;
588 lhs = gimple_get_lhs (stmt);
589 if (lhs == NULL_TREE)
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: not GIMPLE_ASSIGN nor "
595 "GIMPLE_CALL ");
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
598 /* Fatal mismatch. */
599 matches[0] = false;
600 return false;
603 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
604 vectype = get_vectype_for_scalar_type (scalar_type);
605 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
606 max_nunits))
608 /* Fatal mismatch. */
609 matches[0] = false;
610 return false;
613 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
615 rhs_code = CALL_EXPR;
616 if (gimple_call_internal_p (call_stmt)
617 || gimple_call_tail_p (call_stmt)
618 || gimple_call_noreturn_p (call_stmt)
619 || !gimple_call_nothrow_p (call_stmt)
620 || gimple_call_chain (call_stmt))
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported call type ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
627 call_stmt, 0);
629 /* Fatal mismatch. */
630 matches[0] = false;
631 return false;
634 else
635 rhs_code = gimple_assign_rhs_code (stmt);
637 /* Check the operation. */
638 if (i == 0)
640 first_stmt_code = rhs_code;
642 /* Shift arguments should be equal in all the packed stmts for a
643 vector shift with scalar shift operand. */
644 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
645 || rhs_code == LROTATE_EXPR
646 || rhs_code == RROTATE_EXPR)
648 vec_mode = TYPE_MODE (vectype);
650 /* First see if we have a vector/vector shift. */
651 optab = optab_for_tree_code (rhs_code, vectype,
652 optab_vector);
654 if (!optab
655 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
657 /* No vector/vector shift, try for a vector/scalar shift. */
658 optab = optab_for_tree_code (rhs_code, vectype,
659 optab_scalar);
661 if (!optab)
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "Build SLP failed: no optab.\n");
666 /* Fatal mismatch. */
667 matches[0] = false;
668 return false;
670 icode = (int) optab_handler (optab, vec_mode);
671 if (icode == CODE_FOR_nothing)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
675 "Build SLP failed: "
676 "op not supported by target.\n");
677 /* Fatal mismatch. */
678 matches[0] = false;
679 return false;
681 optab_op2_mode = insn_data[icode].operand[2].mode;
682 if (!VECTOR_MODE_P (optab_op2_mode))
684 need_same_oprnds = true;
685 first_op1 = gimple_assign_rhs2 (stmt);
689 else if (rhs_code == WIDEN_LSHIFT_EXPR)
691 need_same_oprnds = true;
692 first_op1 = gimple_assign_rhs2 (stmt);
695 else
697 if (first_stmt_code != rhs_code
698 && alt_stmt_code == ERROR_MARK)
699 alt_stmt_code = rhs_code;
700 if (first_stmt_code != rhs_code
701 && (first_stmt_code != IMAGPART_EXPR
702 || rhs_code != REALPART_EXPR)
703 && (first_stmt_code != REALPART_EXPR
704 || rhs_code != IMAGPART_EXPR)
705 /* Handle mismatches in plus/minus by computing both
706 and merging the results. */
707 && !((first_stmt_code == PLUS_EXPR
708 || first_stmt_code == MINUS_EXPR)
709 && (alt_stmt_code == PLUS_EXPR
710 || alt_stmt_code == MINUS_EXPR)
711 && rhs_code == alt_stmt_code)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
713 && (first_stmt_code == ARRAY_REF
714 || first_stmt_code == BIT_FIELD_REF
715 || first_stmt_code == INDIRECT_REF
716 || first_stmt_code == COMPONENT_REF
717 || first_stmt_code == MEM_REF)))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different operation "
723 "in stmt ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "original stmt ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
728 first_stmt, 0);
730 /* Mismatch. */
731 continue;
734 if (need_same_oprnds
735 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740 "Build SLP failed: different shift "
741 "arguments in ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
744 /* Mismatch. */
745 continue;
748 if (rhs_code == CALL_EXPR)
750 gimple *first_stmt = stmts[0];
751 if (gimple_call_num_args (stmt) != nops
752 || !operand_equal_p (gimple_call_fn (first_stmt),
753 gimple_call_fn (stmt), 0)
754 || gimple_call_fntype (first_stmt)
755 != gimple_call_fntype (stmt))
757 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "Build SLP failed: different calls in ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
762 stmt, 0);
764 /* Mismatch. */
765 continue;
770 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
773 if (REFERENCE_CLASS_P (lhs))
775 /* Store. */
778 else
780 /* Load. */
781 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
782 if (prev_first_load)
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load != first_load)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
791 vect_location,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
795 stmt, 0);
797 /* Mismatch. */
798 continue;
801 else
802 prev_first_load = first_load;
804 } /* Grouped access. */
805 else
807 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
809 /* Not grouped load. */
810 if (dump_enabled_p ())
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
817 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */
819 matches[0] = false;
820 return false;
823 /* Not memory operation. */
824 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
825 && TREE_CODE_CLASS (rhs_code) != tcc_unary
826 && TREE_CODE_CLASS (rhs_code) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
828 && rhs_code != CALL_EXPR)
830 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: operation");
834 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
837 /* Fatal mismatch. */
838 matches[0] = false;
839 return false;
842 if (rhs_code == COND_EXPR)
844 tree cond_expr = gimple_assign_rhs1 (stmt);
845 enum tree_code cond_code = TREE_CODE (cond_expr);
846 enum tree_code swap_code = ERROR_MARK;
847 enum tree_code invert_code = ERROR_MARK;
849 if (i == 0)
850 first_cond_code = TREE_CODE (cond_expr);
851 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
853 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
854 swap_code = swap_tree_comparison (cond_code);
855 invert_code = invert_tree_comparison (cond_code, honor_nans);
858 if (first_cond_code == cond_code)
860 /* Isomorphic can be achieved by swapping. */
861 else if (first_cond_code == swap_code)
862 swap[i] = 1;
863 /* Isomorphic can be achieved by inverting. */
864 else if (first_cond_code == invert_code)
865 swap[i] = 2;
866 else
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "Build SLP failed: different"
872 " operation");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
874 stmt, 0);
876 /* Mismatch. */
877 continue;
882 matches[i] = true;
885 for (i = 0; i < group_size; ++i)
886 if (!matches[i])
887 return false;
889 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */
891 if (alt_stmt_code != ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
894 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
895 auto_vec_perm_indices sel (count);
896 for (i = 0; i < count; ++i)
898 unsigned int elt = i;
899 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
900 elt += count;
901 sel.quick_push (elt);
903 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
905 for (i = 0; i < group_size; ++i)
906 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
908 matches[i] = false;
909 if (dump_enabled_p ())
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
912 "Build SLP failed: different operation "
913 "in stmt ");
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
915 stmts[i], 0);
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "original stmt ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 first_stmt, 0);
922 return false;
924 *two_operators = true;
927 return true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */
933 struct bst_traits
935 typedef vec <gimple *> value_type;
936 typedef vec <gimple *> compare_type;
937 static inline hashval_t hash (value_type);
938 static inline bool equal (value_type existing, value_type candidate);
939 static inline bool is_empty (value_type x) { return !x.exists (); }
940 static inline bool is_deleted (value_type x) { return !x.exists (); }
941 static inline void mark_empty (value_type &x) { x.release (); }
942 static inline void mark_deleted (value_type &x) { x.release (); }
943 static inline void remove (value_type &x) { x.release (); }
945 inline hashval_t
946 bst_traits::hash (value_type x)
948 inchash::hash h;
949 for (unsigned i = 0; i < x.length (); ++i)
950 h.add_int (gimple_uid (x[i]));
951 return h.end ();
953 inline bool
954 bst_traits::equal (value_type existing, value_type candidate)
956 if (existing.length () != candidate.length ())
957 return false;
958 for (unsigned i = 0; i < existing.length (); ++i)
959 if (existing[i] != candidate[i])
960 return false;
961 return true;
964 static hash_set <vec <gimple *>, bst_traits> *bst_fail;
966 static slp_tree
967 vect_build_slp_tree_2 (vec_info *vinfo,
968 vec<gimple *> stmts, unsigned int group_size,
969 unsigned int *max_nunits,
970 vec<slp_tree> *loads,
971 bool *matches, unsigned *npermutes, unsigned *tree_size,
972 unsigned max_tree_size);
974 static slp_tree
975 vect_build_slp_tree (vec_info *vinfo,
976 vec<gimple *> stmts, unsigned int group_size,
977 unsigned int *max_nunits,
978 vec<slp_tree> *loads,
979 bool *matches, unsigned *npermutes, unsigned *tree_size,
980 unsigned max_tree_size)
982 if (bst_fail->contains (stmts))
983 return NULL;
984 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
985 loads, matches, npermutes, tree_size,
986 max_tree_size);
987 /* When SLP build fails for stmts record this, otherwise SLP build
988 can be exponential in time when we allow to construct parts from
989 scalars, see PR81723. */
990 if (! res)
992 vec <gimple *> x;
993 x.create (stmts.length ());
994 x.splice (stmts);
995 bst_fail->add (x);
997 return res;
1000 /* Recursively build an SLP tree starting from NODE.
1001 Fail (and return a value not equal to zero) if def-stmts are not
1002 isomorphic, require data permutation or are of unsupported types of
1003 operation. Otherwise, return 0.
1004 The value returned is the depth in the SLP tree where a mismatch
1005 was found. */
1007 static slp_tree
1008 vect_build_slp_tree_2 (vec_info *vinfo,
1009 vec<gimple *> stmts, unsigned int group_size,
1010 unsigned int *max_nunits,
1011 vec<slp_tree> *loads,
1012 bool *matches, unsigned *npermutes, unsigned *tree_size,
1013 unsigned max_tree_size)
1015 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
1016 gimple *stmt;
1017 slp_tree node;
1019 matches[0] = false;
1021 stmt = stmts[0];
1022 if (is_gimple_call (stmt))
1023 nops = gimple_call_num_args (stmt);
1024 else if (is_gimple_assign (stmt))
1026 nops = gimple_num_ops (stmt) - 1;
1027 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1028 nops++;
1030 else if (gimple_code (stmt) == GIMPLE_PHI)
1031 nops = 0;
1032 else
1033 return NULL;
1035 /* If the SLP node is a PHI (induction or reduction), terminate
1036 the recursion. */
1037 if (gimple_code (stmt) == GIMPLE_PHI)
1039 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1040 tree vectype = get_vectype_for_scalar_type (scalar_type);
1041 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1042 max_nunits))
1043 return NULL;
1045 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1046 /* Induction from different IVs is not supported. */
1047 if (def_type == vect_induction_def)
1049 FOR_EACH_VEC_ELT (stmts, i, stmt)
1050 if (stmt != stmts[0])
1051 return NULL;
1053 else
1055 /* Else def types have to match. */
1056 FOR_EACH_VEC_ELT (stmts, i, stmt)
1058 /* But for reduction chains only check on the first stmt. */
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1061 continue;
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1063 return NULL;
1066 node = vect_create_new_slp_node (stmts);
1067 return node;
1071 bool two_operators = false;
1072 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1073 if (!vect_build_slp_tree_1 (vinfo, swap,
1074 stmts, group_size, nops,
1075 &this_max_nunits, matches, &two_operators))
1076 return NULL;
1078 /* If the SLP node is a load, terminate the recursion. */
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1082 *max_nunits = this_max_nunits;
1083 node = vect_create_new_slp_node (stmts);
1084 loads->safe_push (node);
1085 return node;
1088 /* Get at the operands, verifying they are compatible. */
1089 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1090 slp_oprnd_info oprnd_info;
1091 FOR_EACH_VEC_ELT (stmts, i, stmt)
1093 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1094 stmt, i, &oprnds_info);
1095 if (res != 0)
1096 matches[(res == -1) ? 0 : i] = false;
1097 if (!matches[0])
1098 break;
1100 for (i = 0; i < group_size; ++i)
1101 if (!matches[i])
1103 vect_free_oprnd_info (oprnds_info);
1104 return NULL;
1107 auto_vec<slp_tree, 4> children;
1108 auto_vec<slp_tree> this_loads;
1110 stmt = stmts[0];
1112 if (tree_size)
1113 max_tree_size -= *tree_size;
1115 /* Create SLP_TREE nodes for the definition node/s. */
1116 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1118 slp_tree child;
1119 unsigned old_nloads = this_loads.length ();
1120 unsigned old_tree_size = this_tree_size;
1121 unsigned int j;
1123 if (oprnd_info->first_dt != vect_internal_def
1124 && oprnd_info->first_dt != vect_reduction_def
1125 && oprnd_info->first_dt != vect_induction_def)
1126 continue;
1128 if (++this_tree_size > max_tree_size)
1130 FOR_EACH_VEC_ELT (children, j, child)
1131 vect_free_slp_tree (child);
1132 vect_free_oprnd_info (oprnds_info);
1133 return NULL;
1136 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1137 group_size, &this_max_nunits,
1138 &this_loads, matches, npermutes,
1139 &this_tree_size,
1140 max_tree_size)) != NULL)
1142 /* If we have all children of child built up from scalars then just
1143 throw that away and build it up this node from scalars. */
1144 if (!SLP_TREE_CHILDREN (child).is_empty ()
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to
1146 do extra work to cancel the pattern so the uses see the
1147 scalar version. */
1148 && !is_pattern_stmt_p
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1151 slp_tree grandchild;
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1154 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1155 break;
1156 if (!grandchild)
1158 /* Roll back. */
1159 this_loads.truncate (old_nloads);
1160 this_tree_size = old_tree_size;
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1162 vect_free_slp_tree (grandchild);
1163 SLP_TREE_CHILDREN (child).truncate (0);
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "Building parent vector operands from "
1167 "scalars instead\n");
1168 oprnd_info->def_stmts = vNULL;
1169 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1170 children.safe_push (child);
1171 continue;
1175 oprnd_info->def_stmts = vNULL;
1176 children.safe_push (child);
1177 continue;
1180 /* If the SLP build failed fatally and we analyze a basic-block
1181 simply treat nodes we fail to build as externally defined
1182 (and thus build vectors from the scalar defs).
1183 The cost model will reject outright expensive cases.
1184 ??? This doesn't treat cases where permutation ultimatively
1185 fails (or we don't try permutation below). Ideally we'd
1186 even compute a permutation that will end up with the maximum
1187 SLP tree size... */
1188 if (is_a <bb_vec_info> (vinfo)
1189 && !matches[0]
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to
1191 do extra work to cancel the pattern so the uses see the
1192 scalar version. */
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "Building vector operands from scalars\n");
1197 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1198 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1199 children.safe_push (child);
1200 oprnd_info->def_stmts = vNULL;
1201 continue;
1204 /* If the SLP build for operand zero failed and operand zero
1205 and one can be commutated try that for the scalar stmts
1206 that failed the match. */
1207 if (i == 0
1208 /* A first scalar stmt mismatch signals a fatal mismatch. */
1209 && matches[0]
1210 /* ??? For COND_EXPRs we can swap the comparison operands
1211 as well as the arms under some constraints. */
1212 && nops == 2
1213 && oprnds_info[1]->first_dt == vect_internal_def
1214 && is_gimple_assign (stmt)
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1216 && ! two_operators
1217 /* Do so only if the number of not successful permutes was nor more
1218 than a cut-ff as re-trying the recursive match on
1219 possibly each level of the tree would expose exponential
1220 behavior. */
1221 && *npermutes < 4)
1223 /* Verify if we can safely swap or if we committed to a specific
1224 operand order already. */
1225 for (j = 0; j < group_size; ++j)
1226 if (!matches[j]
1227 && (swap[j] != 0
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1230 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1233 "Build SLP failed: cannot swap operands "
1234 "of shared stmt ");
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1236 stmts[j], 0);
1238 goto fail;
1241 /* Swap mismatched definition stmts. */
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "Re-trying with swapped operands of stmts ");
1244 for (j = 0; j < group_size; ++j)
1245 if (!matches[j])
1247 std::swap (oprnds_info[0]->def_stmts[j],
1248 oprnds_info[1]->def_stmts[j]);
1249 dump_printf (MSG_NOTE, "%d ", j);
1251 dump_printf (MSG_NOTE, "\n");
1252 /* And try again with scratch 'matches' ... */
1253 bool *tem = XALLOCAVEC (bool, group_size);
1254 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1255 group_size, &this_max_nunits,
1256 &this_loads, tem, npermutes,
1257 &this_tree_size,
1258 max_tree_size)) != NULL)
1260 /* ... so if successful we can apply the operand swapping
1261 to the GIMPLE IL. This is necessary because for example
1262 vect_get_slp_defs uses operand indexes and thus expects
1263 canonical operand order. This is also necessary even
1264 if we end up building the operand from scalars as
1265 we'll continue to process swapped operand two. */
1266 for (j = 0; j < group_size; ++j)
1268 gimple *stmt = stmts[j];
1269 gimple_set_plf (stmt, GF_PLF_1, false);
1271 for (j = 0; j < group_size; ++j)
1273 gimple *stmt = stmts[j];
1274 if (!matches[j])
1276 /* Avoid swapping operands twice. */
1277 if (gimple_plf (stmt, GF_PLF_1))
1278 continue;
1279 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1280 gimple_assign_rhs2_ptr (stmt));
1281 gimple_set_plf (stmt, GF_PLF_1, true);
1284 /* Verify we swap all duplicates or none. */
1285 if (flag_checking)
1286 for (j = 0; j < group_size; ++j)
1288 gimple *stmt = stmts[j];
1289 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1292 /* If we have all children of child built up from scalars then
1293 just throw that away and build it up this node from scalars. */
1294 if (!SLP_TREE_CHILDREN (child).is_empty ()
1295 /* ??? Rejecting patterns this way doesn't work. We'd have
1296 to do extra work to cancel the pattern so the uses see the
1297 scalar version. */
1298 && !is_pattern_stmt_p
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1301 unsigned int j;
1302 slp_tree grandchild;
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1305 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1306 break;
1307 if (!grandchild)
1309 /* Roll back. */
1310 this_loads.truncate (old_nloads);
1311 this_tree_size = old_tree_size;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1313 vect_free_slp_tree (grandchild);
1314 SLP_TREE_CHILDREN (child).truncate (0);
1316 dump_printf_loc (MSG_NOTE, vect_location,
1317 "Building parent vector operands from "
1318 "scalars instead\n");
1319 oprnd_info->def_stmts = vNULL;
1320 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1321 children.safe_push (child);
1322 continue;
1326 oprnd_info->def_stmts = vNULL;
1327 children.safe_push (child);
1328 continue;
1331 ++*npermutes;
1334 fail:
1335 gcc_assert (child == NULL);
1336 FOR_EACH_VEC_ELT (children, j, child)
1337 vect_free_slp_tree (child);
1338 vect_free_oprnd_info (oprnds_info);
1339 return NULL;
1342 vect_free_oprnd_info (oprnds_info);
1344 if (tree_size)
1345 *tree_size += this_tree_size;
1346 *max_nunits = this_max_nunits;
1347 loads->safe_splice (this_loads);
1349 node = vect_create_new_slp_node (stmts);
1350 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1351 SLP_TREE_CHILDREN (node).splice (children);
1352 return node;
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1357 static void
1358 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1360 int i;
1361 gimple *stmt;
1362 slp_tree child;
1364 dump_printf_loc (dump_kind, loc, "node%s\n",
1365 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1366 ? " (external)" : "");
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1369 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1370 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1373 vect_print_slp_tree (dump_kind, loc, child);
1377 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1378 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1379 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1380 stmts in NODE are to be marked. */
1382 static void
1383 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1385 int i;
1386 gimple *stmt;
1387 slp_tree child;
1389 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1390 return;
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1393 if (j < 0 || i == j)
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1397 vect_mark_slp_stmts (child, mark, j);
1401 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1403 static void
1404 vect_mark_slp_stmts_relevant (slp_tree node)
1406 int i;
1407 gimple *stmt;
1408 stmt_vec_info stmt_info;
1409 slp_tree child;
1411 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1412 return;
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1416 stmt_info = vinfo_for_stmt (stmt);
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1418 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1419 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1422 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1423 vect_mark_slp_stmts_relevant (child);
1427 /* Rearrange the statements of NODE according to PERMUTATION. */
1429 static void
1430 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1431 vec<unsigned> permutation)
1433 gimple *stmt;
1434 vec<gimple *> tmp_stmts;
1435 unsigned int i;
1436 slp_tree child;
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1439 vect_slp_rearrange_stmts (child, group_size, permutation);
1441 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1442 tmp_stmts.create (group_size);
1443 tmp_stmts.quick_grow_cleared (group_size);
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1446 tmp_stmts[permutation[i]] = stmt;
1448 SLP_TREE_SCALAR_STMTS (node).release ();
1449 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1453 /* Attempt to reorder stmts in a reduction chain so that we don't
1454 require any load permutation. Return true if that was possible,
1455 otherwise return false. */
1457 static bool
1458 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1460 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1461 unsigned int i, j;
1462 unsigned int lidx;
1463 slp_tree node, load;
1465 /* Compare all the permutation sequences to the first one. We know
1466 that at least one load is permuted. */
1467 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1468 if (!node->load_permutation.exists ())
1469 return false;
1470 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1472 if (!load->load_permutation.exists ())
1473 return false;
1474 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1475 if (lidx != node->load_permutation[j])
1476 return false;
1479 /* Check that the loads in the first sequence are different and there
1480 are no gaps between them. */
1481 auto_sbitmap load_index (group_size);
1482 bitmap_clear (load_index);
1483 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1485 if (lidx >= group_size)
1486 return false;
1487 if (bitmap_bit_p (load_index, lidx))
1488 return false;
1490 bitmap_set_bit (load_index, lidx);
1492 for (i = 0; i < group_size; i++)
1493 if (!bitmap_bit_p (load_index, i))
1494 return false;
1496 /* This permutation is valid for reduction. Since the order of the
1497 statements in the nodes is not important unless they are memory
1498 accesses, we can rearrange the statements in all the nodes
1499 according to the order of the loads. */
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1501 node->load_permutation);
1503 /* We are done, no actual permutations need to be generated. */
1504 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1507 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1508 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1509 /* But we have to keep those permutations that are required because
1510 of handling of gaps. */
1511 if (unrolling_factor == 1
1512 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1514 SLP_TREE_LOAD_PERMUTATION (node).release ();
1515 else
1516 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1517 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1520 return true;
1523 /* Check if the required load permutations in the SLP instance
1524 SLP_INSTN are supported. */
1526 static bool
1527 vect_supported_load_permutation_p (slp_instance slp_instn)
1529 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1530 unsigned int i, j, k, next;
1531 slp_tree node;
1532 gimple *stmt, *load, *next_load;
1534 if (dump_enabled_p ())
1536 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1538 if (node->load_permutation.exists ())
1539 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1540 dump_printf (MSG_NOTE, "%d ", next);
1541 else
1542 for (k = 0; k < group_size; ++k)
1543 dump_printf (MSG_NOTE, "%d ", k);
1544 dump_printf (MSG_NOTE, "\n");
1547 /* In case of reduction every load permutation is allowed, since the order
1548 of the reduction statements is not important (as opposed to the case of
1549 grouped stores). The only condition we need to check is that all the
1550 load nodes are of the same size and have the same permutation (and then
1551 rearrange all the nodes of the SLP instance according to this
1552 permutation). */
1554 /* Check that all the load nodes are of the same size. */
1555 /* ??? Can't we assert this? */
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1557 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1558 return false;
1560 node = SLP_INSTANCE_TREE (slp_instn);
1561 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1563 /* Reduction (there are no data-refs in the root).
1564 In reduction chain the order of the loads is not important. */
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1567 vect_attempt_slp_rearrange_stmts (slp_instn);
1569 /* In basic block vectorization we allow any subchain of an interleaving
1570 chain.
1571 FORNOW: not supported in loop SLP because of realignment compications. */
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1574 /* Check whether the loads in an instance form a subchain and thus
1575 no permutation is necessary. */
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1578 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1579 continue;
1580 bool subchain_p = true;
1581 next_load = NULL;
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1584 if (j != 0
1585 && (next_load != load
1586 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1588 subchain_p = false;
1589 break;
1591 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1593 if (subchain_p)
1594 SLP_TREE_LOAD_PERMUTATION (node).release ();
1595 else
1597 stmt_vec_info group_info
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1599 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1600 unsigned nunits
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1602 unsigned k, maxk = 0;
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1604 if (k > maxk)
1605 maxk = k;
1606 /* In BB vectorization we may not actually use a loaded vector
1607 accessing elements in excess of GROUP_SIZE. */
1608 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1611 "BB vectorization with gaps at the end of "
1612 "a load is not supported\n");
1613 return false;
1616 /* Verify the permutation can be generated. */
1617 vec<tree> tem;
1618 unsigned n_perms;
1619 if (!vect_transform_slp_perm_load (node, tem, NULL,
1620 1, slp_instn, true, &n_perms))
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1623 vect_location,
1624 "unsupported load permutation\n");
1625 return false;
1629 return true;
1632 /* For loop vectorization verify we can generate the permutation. */
1633 unsigned n_perms;
1634 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1635 if (node->load_permutation.exists ()
1636 && !vect_transform_slp_perm_load
1637 (node, vNULL, NULL,
1638 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1639 &n_perms))
1640 return false;
1642 return true;
1646 /* Find the last store in SLP INSTANCE. */
1648 gimple *
1649 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1651 gimple *last = NULL, *stmt;
1653 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1655 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1656 if (is_pattern_stmt_p (stmt_vinfo))
1657 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1658 else
1659 last = get_later_stmt (stmt, last);
1662 return last;
1665 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1667 static void
1668 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1669 stmt_vector_for_cost *prologue_cost_vec,
1670 stmt_vector_for_cost *body_cost_vec,
1671 unsigned ncopies_for_cost)
1673 unsigned i, j;
1674 slp_tree child;
1675 gimple *stmt;
1676 stmt_vec_info stmt_info;
1677 tree lhs;
1679 /* Recurse down the SLP tree. */
1680 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1681 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1682 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1683 body_cost_vec, ncopies_for_cost);
1685 /* Look at the first scalar stmt to determine the cost. */
1686 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1687 stmt_info = vinfo_for_stmt (stmt);
1688 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1690 vect_memory_access_type memory_access_type
1691 = (STMT_VINFO_STRIDED_P (stmt_info)
1692 ? VMAT_STRIDED_SLP
1693 : VMAT_CONTIGUOUS);
1694 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1695 vect_model_store_cost (stmt_info, ncopies_for_cost,
1696 memory_access_type, vect_uninitialized_def,
1697 node, prologue_cost_vec, body_cost_vec);
1698 else
1700 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1701 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1703 /* If the load is permuted then the alignment is determined by
1704 the first group element not by the first scalar stmt DR. */
1705 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1706 stmt_info = vinfo_for_stmt (stmt);
1707 /* Record the cost for the permutation. */
1708 unsigned n_perms;
1709 vect_transform_slp_perm_load (node, vNULL, NULL,
1710 ncopies_for_cost, instance, true,
1711 &n_perms);
1712 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1713 stmt_info, 0, vect_body);
1714 unsigned nunits
1715 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1716 /* And adjust the number of loads performed. This handles
1717 redundancies as well as loads that are later dead. */
1718 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1719 bitmap_clear (perm);
1720 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1721 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1722 ncopies_for_cost = 0;
1723 bool load_seen = false;
1724 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1726 if (i % nunits == 0)
1728 if (load_seen)
1729 ncopies_for_cost++;
1730 load_seen = false;
1732 if (bitmap_bit_p (perm, i))
1733 load_seen = true;
1735 if (load_seen)
1736 ncopies_for_cost++;
1737 gcc_assert (ncopies_for_cost
1738 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1739 + nunits - 1) / nunits);
1740 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1742 /* Record the cost for the vector loads. */
1743 vect_model_load_cost (stmt_info, ncopies_for_cost,
1744 memory_access_type, node, prologue_cost_vec,
1745 body_cost_vec);
1746 return;
1749 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1751 /* ncopies_for_cost is the number of IVs we generate. */
1752 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1753 stmt_info, 0, vect_body);
1755 /* Prologue cost for the initial values and step vector. */
1756 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1757 CONSTANT_CLASS_P
1758 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1759 (stmt_info))
1760 ? vector_load : vec_construct,
1761 stmt_info, 0, vect_prologue);
1762 record_stmt_cost (prologue_cost_vec, 1,
1763 CONSTANT_CLASS_P
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1765 ? vector_load : vec_construct,
1766 stmt_info, 0, vect_prologue);
1768 /* ??? No easy way to get at the actual number of vector stmts
1769 to be geneated and thus the derived IVs. */
1771 else
1773 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1774 stmt_info, 0, vect_body);
1775 if (SLP_TREE_TWO_OPERATORS (node))
1777 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1778 stmt_info, 0, vect_body);
1779 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1780 stmt_info, 0, vect_body);
1784 /* Push SLP node def-type to stmts. */
1785 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1786 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1787 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1788 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1790 /* Scan operands and account for prologue cost of constants/externals.
1791 ??? This over-estimates cost for multiple uses and should be
1792 re-engineered. */
1793 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1794 lhs = gimple_get_lhs (stmt);
1795 for (i = 0; i < gimple_num_ops (stmt); ++i)
1797 tree op = gimple_op (stmt, i);
1798 gimple *def_stmt;
1799 enum vect_def_type dt;
1800 if (!op || op == lhs)
1801 continue;
1802 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1804 /* Without looking at the actual initializer a vector of
1805 constants can be implemented as load from the constant pool.
1806 ??? We need to pass down stmt_info for a vector type
1807 even if it points to the wrong stmt. */
1808 if (dt == vect_constant_def)
1809 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1810 stmt_info, 0, vect_prologue);
1811 else if (dt == vect_external_def)
1812 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1813 stmt_info, 0, vect_prologue);
1817 /* Restore stmt def-types. */
1818 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1819 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1820 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1821 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1824 /* Compute the cost for the SLP instance INSTANCE. */
1826 static void
1827 vect_analyze_slp_cost (slp_instance instance, void *data)
1829 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1830 unsigned ncopies_for_cost;
1831 stmt_info_for_cost *si;
1832 unsigned i;
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_NOTE, vect_location,
1836 "=== vect_analyze_slp_cost ===\n");
1838 /* Calculate the number of vector stmts to create based on the unrolling
1839 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1840 GROUP_SIZE / NUNITS otherwise. */
1841 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1842 slp_tree node = SLP_INSTANCE_TREE (instance);
1843 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1844 /* Adjust the group_size by the vectorization factor which is always one
1845 for basic-block vectorization. */
1846 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1847 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1848 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1849 /* For reductions look at a reduction operand in case the reduction
1850 operation is widening like DOT_PROD or SAD. */
1851 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1853 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1854 switch (gimple_assign_rhs_code (stmt))
1856 case DOT_PROD_EXPR:
1857 case SAD_EXPR:
1858 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1859 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1860 break;
1861 default:;
1864 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1866 prologue_cost_vec.create (10);
1867 body_cost_vec.create (10);
1868 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1869 &prologue_cost_vec, &body_cost_vec,
1870 ncopies_for_cost);
1872 /* Record the prologue costs, which were delayed until we were
1873 sure that SLP was successful. */
1874 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1876 struct _stmt_vec_info *stmt_info
1877 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1878 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1879 si->misalign, vect_prologue);
1882 /* Record the instance's instructions in the target cost model. */
1883 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1885 struct _stmt_vec_info *stmt_info
1886 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1887 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1888 si->misalign, vect_body);
1891 prologue_cost_vec.release ();
1892 body_cost_vec.release ();
1895 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1896 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1897 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1898 containing the remainder.
1899 Return the first stmt in the second group. */
1901 static gimple *
1902 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1904 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1905 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1906 gcc_assert (group1_size > 0);
1907 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1908 gcc_assert (group2_size > 0);
1909 GROUP_SIZE (first_vinfo) = group1_size;
1911 gimple *stmt = first_stmt;
1912 for (unsigned i = group1_size; i > 1; i--)
1914 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1915 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1917 /* STMT is now the last element of the first group. */
1918 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1919 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1921 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1922 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1924 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1925 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1928 /* For the second group, the GROUP_GAP is that before the original group,
1929 plus skipping over the first vector. */
1930 GROUP_GAP (vinfo_for_stmt (group2)) =
1931 GROUP_GAP (first_vinfo) + group1_size;
1933 /* GROUP_GAP of the first group now has to skip over the second group too. */
1934 GROUP_GAP (first_vinfo) += group2_size;
1936 if (dump_enabled_p ())
1937 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1938 group1_size, group2_size);
1940 return group2;
1943 /* Analyze an SLP instance starting from a group of grouped stores. Call
1944 vect_build_slp_tree to build a tree of packed stmts if possible.
1945 Return FALSE if it's impossible to SLP any stmt in the loop. */
1947 static bool
1948 vect_analyze_slp_instance (vec_info *vinfo,
1949 gimple *stmt, unsigned max_tree_size)
1951 slp_instance new_instance;
1952 slp_tree node;
1953 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1954 unsigned int unrolling_factor = 1, nunits;
1955 tree vectype, scalar_type = NULL_TREE;
1956 gimple *next;
1957 unsigned int i;
1958 unsigned int max_nunits = 0;
1959 vec<slp_tree> loads;
1960 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1961 vec<gimple *> scalar_stmts;
1963 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1965 if (dr)
1967 scalar_type = TREE_TYPE (DR_REF (dr));
1968 vectype = get_vectype_for_scalar_type (scalar_type);
1970 else
1972 gcc_assert (is_a <loop_vec_info> (vinfo));
1973 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1976 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1978 else
1980 gcc_assert (is_a <loop_vec_info> (vinfo));
1981 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1982 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1985 if (!vectype)
1987 if (dump_enabled_p ())
1989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1990 "Build SLP failed: unsupported data-type ");
1991 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1992 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1995 return false;
1997 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1999 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2000 scalar_stmts.create (group_size);
2001 next = stmt;
2002 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2004 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2005 while (next)
2007 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2008 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2009 scalar_stmts.safe_push (
2010 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2011 else
2012 scalar_stmts.safe_push (next);
2013 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2015 /* Mark the first element of the reduction chain as reduction to properly
2016 transform the node. In the reduction analysis phase only the last
2017 element of the chain is marked as reduction. */
2018 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2019 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2021 else
2023 /* Collect reduction statements. */
2024 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2025 for (i = 0; reductions.iterate (i, &next); i++)
2026 scalar_stmts.safe_push (next);
2029 loads.create (group_size);
2031 /* Build the tree for the SLP instance. */
2032 bool *matches = XALLOCAVEC (bool, group_size);
2033 unsigned npermutes = 0;
2034 bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
2035 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2036 &max_nunits, &loads, matches, &npermutes,
2037 NULL, max_tree_size);
2038 delete bst_fail;
2039 if (node != NULL)
2041 /* Calculate the unrolling factor based on the smallest type. */
2042 unrolling_factor
2043 = least_common_multiple (max_nunits, group_size) / group_size;
2045 if (unrolling_factor != 1
2046 && is_a <bb_vec_info> (vinfo))
2049 if (max_nunits > group_size)
2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2052 "Build SLP failed: store group "
2053 "size not a multiple of the vector size "
2054 "in basic block SLP\n");
2055 vect_free_slp_tree (node);
2056 loads.release ();
2057 return false;
2059 /* Fatal mismatch. */
2060 matches[group_size/max_nunits * max_nunits] = false;
2061 vect_free_slp_tree (node);
2062 loads.release ();
2064 else
2066 /* Create a new SLP instance. */
2067 new_instance = XNEW (struct _slp_instance);
2068 SLP_INSTANCE_TREE (new_instance) = node;
2069 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2070 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2071 SLP_INSTANCE_LOADS (new_instance) = loads;
2073 /* Compute the load permutation. */
2074 slp_tree load_node;
2075 bool loads_permuted = false;
2076 FOR_EACH_VEC_ELT (loads, i, load_node)
2078 vec<unsigned> load_permutation;
2079 int j;
2080 gimple *load, *first_stmt;
2081 bool this_load_permuted = false;
2082 load_permutation.create (group_size);
2083 first_stmt = GROUP_FIRST_ELEMENT
2084 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2085 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2087 int load_place = vect_get_place_in_interleaving_chain
2088 (load, first_stmt);
2089 gcc_assert (load_place != -1);
2090 if (load_place != j)
2091 this_load_permuted = true;
2092 load_permutation.safe_push (load_place);
2094 if (!this_load_permuted
2095 /* The load requires permutation when unrolling exposes
2096 a gap either because the group is larger than the SLP
2097 group-size or because there is a gap between the groups. */
2098 && (unrolling_factor == 1
2099 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2100 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2102 load_permutation.release ();
2103 continue;
2105 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2106 loads_permuted = true;
2109 if (loads_permuted)
2111 if (!vect_supported_load_permutation_p (new_instance))
2113 if (dump_enabled_p ())
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2116 "Build SLP failed: unsupported load "
2117 "permutation ");
2118 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2119 TDF_SLIM, stmt, 0);
2121 vect_free_slp_instance (new_instance);
2122 return false;
2126 /* If the loads and stores can be handled with load/store-lan
2127 instructions do not generate this SLP instance. */
2128 if (is_a <loop_vec_info> (vinfo)
2129 && loads_permuted
2130 && dr && vect_store_lanes_supported (vectype, group_size))
2132 slp_tree load_node;
2133 FOR_EACH_VEC_ELT (loads, i, load_node)
2135 gimple *first_stmt = GROUP_FIRST_ELEMENT
2136 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2138 /* Use SLP for strided accesses (or if we
2139 can't load-lanes). */
2140 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2141 || ! vect_load_lanes_supported
2142 (STMT_VINFO_VECTYPE (stmt_vinfo),
2143 GROUP_SIZE (stmt_vinfo)))
2144 break;
2146 if (i == loads.length ())
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150 "Built SLP cancelled: can use "
2151 "load/store-lanes\n");
2152 vect_free_slp_instance (new_instance);
2153 return false;
2157 vinfo->slp_instances.safe_push (new_instance);
2159 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE, vect_location,
2162 "Final SLP tree for instance:\n");
2163 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2166 return true;
2169 else
2171 /* Failed to SLP. */
2172 /* Free the allocated memory. */
2173 scalar_stmts.release ();
2174 loads.release ();
2177 /* For basic block SLP, try to break the group up into multiples of the
2178 vector size. */
2179 if (is_a <bb_vec_info> (vinfo)
2180 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2181 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2183 /* We consider breaking the group only on VF boundaries from the existing
2184 start. */
2185 for (i = 0; i < group_size; i++)
2186 if (!matches[i]) break;
2188 if (i >= nunits && i < group_size)
2190 /* Split into two groups at the first vector boundary before i. */
2191 gcc_assert ((nunits & (nunits - 1)) == 0);
2192 unsigned group1_size = i & ~(nunits - 1);
2194 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2195 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2196 /* If the first non-match was in the middle of a vector,
2197 skip the rest of that vector. */
2198 if (group1_size < i)
2200 i = group1_size + nunits;
2201 if (i < group_size)
2202 rest = vect_split_slp_store_group (rest, nunits);
2204 if (i < group_size)
2205 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2206 return res;
2208 /* Even though the first vector did not all match, we might be able to SLP
2209 (some) of the remainder. FORNOW ignore this possibility. */
2212 return false;
2216 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2217 trees of packed scalar stmts if SLP is possible. */
2219 bool
2220 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2222 unsigned int i;
2223 gimple *first_element;
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2228 /* Find SLP sequences starting from groups of grouped stores. */
2229 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2230 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2232 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2234 if (loop_vinfo->reduction_chains.length () > 0)
2236 /* Find SLP sequences starting from reduction chains. */
2237 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2238 if (! vect_analyze_slp_instance (vinfo, first_element,
2239 max_tree_size))
2241 /* Dissolve reduction chain group. */
2242 gimple *next, *stmt = first_element;
2243 while (stmt)
2245 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2246 next = GROUP_NEXT_ELEMENT (vinfo);
2247 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2248 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2249 stmt = next;
2251 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2252 = vect_internal_def;
2256 /* Find SLP sequences starting from groups of reductions. */
2257 if (loop_vinfo->reductions.length () > 1)
2258 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2259 max_tree_size);
2262 return true;
2266 /* For each possible SLP instance decide whether to SLP it and calculate overall
2267 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2268 least one instance. */
2270 bool
2271 vect_make_slp_decision (loop_vec_info loop_vinfo)
2273 unsigned int i, unrolling_factor = 1;
2274 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2275 slp_instance instance;
2276 int decided_to_slp = 0;
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2280 "\n");
2282 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2284 /* FORNOW: SLP if you can. */
2285 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2286 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2288 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2289 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2290 loop-based vectorization. Such stmts will be marked as HYBRID. */
2291 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2292 decided_to_slp++;
2295 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2297 if (decided_to_slp && dump_enabled_p ())
2298 dump_printf_loc (MSG_NOTE, vect_location,
2299 "Decided to SLP %d instances. Unrolling factor %d\n",
2300 decided_to_slp, unrolling_factor);
2302 return (decided_to_slp > 0);
2306 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2307 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2309 static void
2310 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2312 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2313 imm_use_iterator imm_iter;
2314 gimple *use_stmt;
2315 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2316 slp_tree child;
2317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2319 int j;
2321 /* Propagate hybrid down the SLP tree. */
2322 if (stype == hybrid)
2324 else if (HYBRID_SLP_STMT (stmt_vinfo))
2325 stype = hybrid;
2326 else
2328 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2329 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2330 /* If we get a pattern stmt here we have to use the LHS of the
2331 original stmt for immediate uses. */
2332 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2333 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2334 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2335 tree def;
2336 if (gimple_code (stmt) == GIMPLE_PHI)
2337 def = gimple_phi_result (stmt);
2338 else
2339 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2340 if (def)
2341 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2343 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2344 continue;
2345 use_vinfo = vinfo_for_stmt (use_stmt);
2346 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2347 && STMT_VINFO_RELATED_STMT (use_vinfo))
2348 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2349 if (!STMT_SLP_TYPE (use_vinfo)
2350 && (STMT_VINFO_RELEVANT (use_vinfo)
2351 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2352 && !(gimple_code (use_stmt) == GIMPLE_PHI
2353 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2355 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2358 "def in non-SLP stmt: ");
2359 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2361 stype = hybrid;
2366 if (stype == hybrid
2367 && !HYBRID_SLP_STMT (stmt_vinfo))
2369 if (dump_enabled_p ())
2371 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2372 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2374 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2377 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2378 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2379 vect_detect_hybrid_slp_stmts (child, i, stype);
2382 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2384 static tree
2385 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2387 walk_stmt_info *wi = (walk_stmt_info *)data;
2388 struct loop *loopp = (struct loop *)wi->info;
2390 if (wi->is_lhs)
2391 return NULL_TREE;
2393 if (TREE_CODE (*tp) == SSA_NAME
2394 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2396 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2397 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2398 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2400 if (dump_enabled_p ())
2402 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2403 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2405 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2409 return NULL_TREE;
2412 static tree
2413 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2414 walk_stmt_info *)
2416 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2417 /* If the stmt is in a SLP instance then this isn't a reason
2418 to mark use definitions in other SLP instances as hybrid. */
2419 if (! STMT_SLP_TYPE (use_vinfo)
2420 && (STMT_VINFO_RELEVANT (use_vinfo)
2421 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2422 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2423 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2425 else
2426 *handled = true;
2427 return NULL_TREE;
2430 /* Find stmts that must be both vectorized and SLPed. */
2432 void
2433 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2435 unsigned int i;
2436 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2437 slp_instance instance;
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2441 "\n");
2443 /* First walk all pattern stmt in the loop and mark defs of uses as
2444 hybrid because immediate uses in them are not recorded. */
2445 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2447 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2448 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2449 gsi_next (&gsi))
2451 gimple *stmt = gsi_stmt (gsi);
2452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2453 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2455 walk_stmt_info wi;
2456 memset (&wi, 0, sizeof (wi));
2457 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2458 gimple_stmt_iterator gsi2
2459 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2460 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2461 vect_detect_hybrid_slp_1, &wi);
2462 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2463 vect_detect_hybrid_slp_2,
2464 vect_detect_hybrid_slp_1, &wi);
2469 /* Then walk the SLP instance trees marking stmts with uses in
2470 non-SLP stmts as hybrid, also propagating hybrid down the
2471 SLP tree, collecting the above info on-the-fly. */
2472 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2474 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2475 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2476 i, pure_slp);
2481 /* Initialize a bb_vec_info struct for the statements between
2482 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2484 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2485 gimple_stmt_iterator region_end_in)
2486 : vec_info (vec_info::bb, init_cost (NULL)),
2487 bb (gsi_bb (region_begin_in)),
2488 region_begin (region_begin_in),
2489 region_end (region_end_in)
2491 gimple_stmt_iterator gsi;
2493 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2494 gsi_next (&gsi))
2496 gimple *stmt = gsi_stmt (gsi);
2497 gimple_set_uid (stmt, 0);
2498 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2501 bb->aux = this;
2505 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2506 stmts in the basic block. */
2508 _bb_vec_info::~_bb_vec_info ()
2510 for (gimple_stmt_iterator si = region_begin;
2511 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2513 gimple *stmt = gsi_stmt (si);
2514 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2516 if (stmt_info)
2517 /* Free stmt_vec_info. */
2518 free_stmt_vec_info (stmt);
2520 /* Reset region marker. */
2521 gimple_set_uid (stmt, -1);
2524 bb->aux = NULL;
2528 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2529 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2531 Return true if the operations are supported. */
2533 static bool
2534 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2535 slp_instance node_instance)
2537 bool dummy;
2538 int i, j;
2539 gimple *stmt;
2540 slp_tree child;
2542 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2543 return true;
2545 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2546 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2547 return false;
2549 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2551 gcc_assert (stmt_info);
2552 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2554 /* For BB vectorization vector types are assigned here.
2555 Memory accesses already got their vector type assigned
2556 in vect_analyze_data_refs. */
2557 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2558 if (bb_vinfo
2559 && ! STMT_VINFO_DATA_REF (stmt_info))
2561 gcc_assert (PURE_SLP_STMT (stmt_info));
2563 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2564 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_NOTE, vect_location,
2567 "get vectype for scalar type: ");
2568 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2569 dump_printf (MSG_NOTE, "\n");
2572 tree vectype = get_vectype_for_scalar_type (scalar_type);
2573 if (!vectype)
2575 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2578 "not SLPed: unsupported data-type ");
2579 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2580 scalar_type);
2581 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2583 return false;
2586 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2589 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2590 dump_printf (MSG_NOTE, "\n");
2593 gimple *sstmt;
2594 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2595 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2598 /* Calculate the number of vector statements to be created for the
2599 scalar stmts in this node. For SLP reductions it is equal to the
2600 number of vector statements in the children (which has already been
2601 calculated by the recursive call). Otherwise it is the number of
2602 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2603 VF divided by the number of elements in a vector. */
2604 if (GROUP_FIRST_ELEMENT (stmt_info)
2605 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2606 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2607 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2608 else
2610 int vf;
2611 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2612 vf = loop_vinfo->vectorization_factor;
2613 else
2614 vf = 1;
2615 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2616 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2617 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2618 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype);
2621 /* Push SLP node def-type to stmt operands. */
2622 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2623 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2624 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2625 = SLP_TREE_DEF_TYPE (child);
2626 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2627 /* Restore def-types. */
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2629 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2631 = vect_internal_def;
2632 if (! res)
2633 return false;
2635 return true;
2639 /* Analyze statements in SLP instances of VINFO. Return true if the
2640 operations are supported. */
2642 bool
2643 vect_slp_analyze_operations (vec_info *vinfo)
2645 slp_instance instance;
2646 int i;
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_NOTE, vect_location,
2650 "=== vect_slp_analyze_operations ===\n");
2652 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2654 if (!vect_slp_analyze_node_operations (vinfo,
2655 SLP_INSTANCE_TREE (instance),
2656 instance))
2658 dump_printf_loc (MSG_NOTE, vect_location,
2659 "removing SLP instance operations starting from: ");
2660 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2661 SLP_TREE_SCALAR_STMTS
2662 (SLP_INSTANCE_TREE (instance))[0], 0);
2663 vect_free_slp_instance (instance);
2664 vinfo->slp_instances.ordered_remove (i);
2666 else
2668 /* Compute the costs of the SLP instance. */
2669 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2670 i++;
2674 return !vinfo->slp_instances.is_empty ();
2678 /* Compute the scalar cost of the SLP node NODE and its children
2679 and return it. Do not account defs that are marked in LIFE and
2680 update LIFE according to uses of NODE. */
2682 static unsigned
2683 vect_bb_slp_scalar_cost (basic_block bb,
2684 slp_tree node, vec<bool, va_heap> *life)
2686 unsigned scalar_cost = 0;
2687 unsigned i;
2688 gimple *stmt;
2689 slp_tree child;
2691 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2693 unsigned stmt_cost;
2694 ssa_op_iter op_iter;
2695 def_operand_p def_p;
2696 stmt_vec_info stmt_info;
2698 if ((*life)[i])
2699 continue;
2701 /* If there is a non-vectorized use of the defs then the scalar
2702 stmt is kept live in which case we do not account it or any
2703 required defs in the SLP children in the scalar cost. This
2704 way we make the vectorization more costly when compared to
2705 the scalar cost. */
2706 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2708 imm_use_iterator use_iter;
2709 gimple *use_stmt;
2710 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2711 if (!is_gimple_debug (use_stmt)
2712 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2713 use_stmt)
2714 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2716 (*life)[i] = true;
2717 BREAK_FROM_IMM_USE_STMT (use_iter);
2720 if ((*life)[i])
2721 continue;
2723 /* Count scalar stmts only once. */
2724 if (gimple_visited_p (stmt))
2725 continue;
2726 gimple_set_visited (stmt, true);
2728 stmt_info = vinfo_for_stmt (stmt);
2729 if (STMT_VINFO_DATA_REF (stmt_info))
2731 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2732 stmt_cost = vect_get_stmt_cost (scalar_load);
2733 else
2734 stmt_cost = vect_get_stmt_cost (scalar_store);
2736 else
2737 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2739 scalar_cost += stmt_cost;
2742 auto_vec<bool, 20> subtree_life;
2743 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2745 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2747 /* Do not directly pass LIFE to the recursive call, copy it to
2748 confine changes in the callee to the current child/subtree. */
2749 subtree_life.safe_splice (*life);
2750 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2751 subtree_life.truncate (0);
2755 return scalar_cost;
2758 /* Check if vectorization of the basic block is profitable. */
2760 static bool
2761 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2763 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2764 slp_instance instance;
2765 int i;
2766 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2767 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2769 /* Calculate scalar cost. */
2770 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2772 auto_vec<bool, 20> life;
2773 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2774 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2775 SLP_INSTANCE_TREE (instance),
2776 &life);
2779 /* Unset visited flag. */
2780 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2781 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2782 gimple_set_visited (gsi_stmt (gsi), false);
2784 /* Complete the target-specific cost calculation. */
2785 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2786 &vec_inside_cost, &vec_epilogue_cost);
2788 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2790 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2793 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2794 vec_inside_cost);
2795 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2796 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2797 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2800 /* Vectorization is profitable if its cost is more than the cost of scalar
2801 version. Note that we err on the vector side for equal cost because
2802 the cost estimate is otherwise quite pessimistic (constant uses are
2803 free on the scalar side but cost a load on the vector side for
2804 example). */
2805 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2806 return false;
2808 return true;
2811 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2812 if so and sets fatal to true if failure is independent of
2813 current_vector_size. */
2815 static bb_vec_info
2816 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2817 gimple_stmt_iterator region_end,
2818 vec<data_reference_p> datarefs, int n_stmts,
2819 bool &fatal)
2821 bb_vec_info bb_vinfo;
2822 slp_instance instance;
2823 int i;
2824 int min_vf = 2;
2826 /* The first group of checks is independent of the vector size. */
2827 fatal = true;
2829 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2831 if (dump_enabled_p ())
2832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2833 "not vectorized: too many instructions in "
2834 "basic block.\n");
2835 free_data_refs (datarefs);
2836 return NULL;
2839 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2840 if (!bb_vinfo)
2841 return NULL;
2843 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2845 /* Analyze the data references. */
2847 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2851 "not vectorized: unhandled data-ref in basic "
2852 "block.\n");
2854 delete bb_vinfo;
2855 return NULL;
2858 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2862 "not vectorized: not enough data-refs in "
2863 "basic block.\n");
2865 delete bb_vinfo;
2866 return NULL;
2869 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2871 if (dump_enabled_p ())
2872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2873 "not vectorized: unhandled data access in "
2874 "basic block.\n");
2876 delete bb_vinfo;
2877 return NULL;
2880 /* If there are no grouped stores in the region there is no need
2881 to continue with pattern recog as vect_analyze_slp will fail
2882 anyway. */
2883 if (bb_vinfo->grouped_stores.is_empty ())
2885 if (dump_enabled_p ())
2886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2887 "not vectorized: no grouped stores in "
2888 "basic block.\n");
2890 delete bb_vinfo;
2891 return NULL;
2894 /* While the rest of the analysis below depends on it in some way. */
2895 fatal = false;
2897 vect_pattern_recog (bb_vinfo);
2899 /* Check the SLP opportunities in the basic block, analyze and build SLP
2900 trees. */
2901 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2903 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2906 "Failed to SLP the basic block.\n");
2907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2908 "not vectorized: failed to find SLP opportunities "
2909 "in basic block.\n");
2912 delete bb_vinfo;
2913 return NULL;
2916 vect_record_base_alignments (bb_vinfo);
2918 /* Analyze and verify the alignment of data references and the
2919 dependence in the SLP instances. */
2920 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2922 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2923 || ! vect_slp_analyze_instance_dependence (instance))
2925 dump_printf_loc (MSG_NOTE, vect_location,
2926 "removing SLP instance operations starting from: ");
2927 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2928 SLP_TREE_SCALAR_STMTS
2929 (SLP_INSTANCE_TREE (instance))[0], 0);
2930 vect_free_slp_instance (instance);
2931 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2932 continue;
2935 /* Mark all the statements that we want to vectorize as pure SLP and
2936 relevant. */
2937 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2938 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2940 i++;
2942 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2944 delete bb_vinfo;
2945 return NULL;
2948 if (!vect_slp_analyze_operations (bb_vinfo))
2950 if (dump_enabled_p ())
2951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2952 "not vectorized: bad operation in basic block.\n");
2954 delete bb_vinfo;
2955 return NULL;
2958 /* Cost model: check if the vectorization is worthwhile. */
2959 if (!unlimited_cost_model (NULL)
2960 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2962 if (dump_enabled_p ())
2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2964 "not vectorized: vectorization is not "
2965 "profitable.\n");
2967 delete bb_vinfo;
2968 return NULL;
2971 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE, vect_location,
2973 "Basic block will be vectorized using SLP\n");
2975 return bb_vinfo;
2979 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2980 true if anything in the basic-block was vectorized. */
2982 bool
2983 vect_slp_bb (basic_block bb)
2985 bb_vec_info bb_vinfo;
2986 gimple_stmt_iterator gsi;
2987 unsigned int vector_sizes;
2988 bool any_vectorized = false;
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2993 /* Autodetect first vector size we try. */
2994 current_vector_size = 0;
2995 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2997 gsi = gsi_start_bb (bb);
2999 while (1)
3001 if (gsi_end_p (gsi))
3002 break;
3004 gimple_stmt_iterator region_begin = gsi;
3005 vec<data_reference_p> datarefs = vNULL;
3006 int insns = 0;
3008 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3010 gimple *stmt = gsi_stmt (gsi);
3011 if (is_gimple_debug (stmt))
3012 continue;
3013 insns++;
3015 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3016 vect_location = gimple_location (stmt);
3018 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3019 break;
3022 /* Skip leading unhandled stmts. */
3023 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3025 gsi_next (&gsi);
3026 continue;
3029 gimple_stmt_iterator region_end = gsi;
3031 bool vectorized = false;
3032 bool fatal = false;
3033 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3034 datarefs, insns, fatal);
3035 if (bb_vinfo
3036 && dbg_cnt (vect_slp))
3038 if (dump_enabled_p ())
3039 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3041 vect_schedule_slp (bb_vinfo);
3043 if (dump_enabled_p ())
3044 dump_printf_loc (MSG_NOTE, vect_location,
3045 "basic block part vectorized\n");
3047 vectorized = true;
3049 delete bb_vinfo;
3051 any_vectorized |= vectorized;
3053 vector_sizes &= ~current_vector_size;
3054 if (vectorized
3055 || vector_sizes == 0
3056 || current_vector_size == 0
3057 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3058 vector sizes will fail do not bother iterating. */
3059 || fatal)
3061 if (gsi_end_p (region_end))
3062 break;
3064 /* Skip the unhandled stmt. */
3065 gsi_next (&gsi);
3067 /* And reset vector sizes. */
3068 current_vector_size = 0;
3069 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3071 else
3073 /* Try the next biggest vector size. */
3074 current_vector_size = 1 << floor_log2 (vector_sizes);
3075 if (dump_enabled_p ())
3076 dump_printf_loc (MSG_NOTE, vect_location,
3077 "***** Re-trying analysis with "
3078 "vector size %d\n", current_vector_size);
3080 /* Start over. */
3081 gsi = region_begin;
3085 return any_vectorized;
3089 /* Return 1 if vector type of boolean constant which is OPNUM
3090 operand in statement STMT is a boolean vector. */
3092 static bool
3093 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3095 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3096 enum tree_code code = gimple_expr_code (stmt);
3097 tree op, vectype;
3098 gimple *def_stmt;
3099 enum vect_def_type dt;
3101 /* For comparison and COND_EXPR type is chosen depending
3102 on the other comparison operand. */
3103 if (TREE_CODE_CLASS (code) == tcc_comparison)
3105 if (opnum)
3106 op = gimple_assign_rhs1 (stmt);
3107 else
3108 op = gimple_assign_rhs2 (stmt);
3110 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3111 &dt, &vectype))
3112 gcc_unreachable ();
3114 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3117 if (code == COND_EXPR)
3119 tree cond = gimple_assign_rhs1 (stmt);
3121 if (TREE_CODE (cond) == SSA_NAME)
3122 op = cond;
3123 else if (opnum)
3124 op = TREE_OPERAND (cond, 1);
3125 else
3126 op = TREE_OPERAND (cond, 0);
3128 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3129 &dt, &vectype))
3130 gcc_unreachable ();
3132 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3135 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3139 /* For constant and loop invariant defs of SLP_NODE this function returns
3140 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3141 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3142 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3143 REDUC_INDEX is the index of the reduction operand in the statements, unless
3144 it is -1. */
3146 static void
3147 vect_get_constant_vectors (tree op, slp_tree slp_node,
3148 vec<tree> *vec_oprnds,
3149 unsigned int op_num, unsigned int number_of_vectors)
3151 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3152 gimple *stmt = stmts[0];
3153 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3154 unsigned nunits;
3155 tree vec_cst;
3156 unsigned j, number_of_places_left_in_vector;
3157 tree vector_type;
3158 tree vop;
3159 int group_size = stmts.length ();
3160 unsigned int vec_num, i;
3161 unsigned number_of_copies = 1;
3162 vec<tree> voprnds;
3163 voprnds.create (number_of_vectors);
3164 bool constant_p, is_store;
3165 tree neutral_op = NULL;
3166 enum tree_code code = gimple_expr_code (stmt);
3167 gimple_seq ctor_seq = NULL;
3169 /* Check if vector type is a boolean vector. */
3170 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3171 && vect_mask_constant_operand_p (stmt, op_num))
3172 vector_type
3173 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3174 else
3175 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3176 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3178 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3180 is_store = true;
3181 op = gimple_assign_rhs1 (stmt);
3183 else
3184 is_store = false;
3186 gcc_assert (op);
3188 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3189 created vectors. It is greater than 1 if unrolling is performed.
3191 For example, we have two scalar operands, s1 and s2 (e.g., group of
3192 strided accesses of size two), while NUNITS is four (i.e., four scalars
3193 of this type can be packed in a vector). The output vector will contain
3194 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3195 will be 2).
3197 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3198 containing the operands.
3200 For example, NUNITS is four as before, and the group size is 8
3201 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3202 {s5, s6, s7, s8}. */
3204 number_of_copies = nunits * number_of_vectors / group_size;
3206 number_of_places_left_in_vector = nunits;
3207 constant_p = true;
3208 auto_vec<tree, 32> elts (nunits);
3209 elts.quick_grow (nunits);
3210 bool place_after_defs = false;
3211 for (j = 0; j < number_of_copies; j++)
3213 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3215 if (is_store)
3216 op = gimple_assign_rhs1 (stmt);
3217 else
3219 switch (code)
3221 case COND_EXPR:
3223 tree cond = gimple_assign_rhs1 (stmt);
3224 if (TREE_CODE (cond) == SSA_NAME)
3225 op = gimple_op (stmt, op_num + 1);
3226 else if (op_num == 0 || op_num == 1)
3227 op = TREE_OPERAND (cond, op_num);
3228 else
3230 if (op_num == 2)
3231 op = gimple_assign_rhs2 (stmt);
3232 else
3233 op = gimple_assign_rhs3 (stmt);
3236 break;
3238 case CALL_EXPR:
3239 op = gimple_call_arg (stmt, op_num);
3240 break;
3242 case LSHIFT_EXPR:
3243 case RSHIFT_EXPR:
3244 case LROTATE_EXPR:
3245 case RROTATE_EXPR:
3246 op = gimple_op (stmt, op_num + 1);
3247 /* Unlike the other binary operators, shifts/rotates have
3248 the shift count being int, instead of the same type as
3249 the lhs, so make sure the scalar is the right type if
3250 we are dealing with vectors of
3251 long long/long/short/char. */
3252 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3253 op = fold_convert (TREE_TYPE (vector_type), op);
3254 break;
3256 default:
3257 op = gimple_op (stmt, op_num + 1);
3258 break;
3262 /* Create 'vect_ = {op0,op1,...,opn}'. */
3263 number_of_places_left_in_vector--;
3264 tree orig_op = op;
3265 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3267 if (CONSTANT_CLASS_P (op))
3269 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3271 /* Can't use VIEW_CONVERT_EXPR for booleans because
3272 of possibly different sizes of scalar value and
3273 vector element. */
3274 if (integer_zerop (op))
3275 op = build_int_cst (TREE_TYPE (vector_type), 0);
3276 else if (integer_onep (op))
3277 op = build_all_ones_cst (TREE_TYPE (vector_type));
3278 else
3279 gcc_unreachable ();
3281 else
3282 op = fold_unary (VIEW_CONVERT_EXPR,
3283 TREE_TYPE (vector_type), op);
3284 gcc_assert (op && CONSTANT_CLASS_P (op));
3286 else
3288 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3289 gimple *init_stmt;
3290 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3292 tree true_val
3293 = build_all_ones_cst (TREE_TYPE (vector_type));
3294 tree false_val
3295 = build_zero_cst (TREE_TYPE (vector_type));
3296 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3297 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3298 op, true_val,
3299 false_val);
3301 else
3303 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3304 op);
3305 init_stmt
3306 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3307 op);
3309 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3310 op = new_temp;
3313 elts[number_of_places_left_in_vector] = op;
3314 if (!CONSTANT_CLASS_P (op))
3315 constant_p = false;
3316 if (TREE_CODE (orig_op) == SSA_NAME
3317 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3318 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3319 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3320 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3321 place_after_defs = true;
3323 if (number_of_places_left_in_vector == 0)
3325 if (constant_p)
3326 vec_cst = build_vector (vector_type, elts);
3327 else
3329 vec<constructor_elt, va_gc> *v;
3330 unsigned k;
3331 vec_alloc (v, nunits);
3332 for (k = 0; k < nunits; ++k)
3333 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3334 vec_cst = build_constructor (vector_type, v);
3336 tree init;
3337 gimple_stmt_iterator gsi;
3338 if (place_after_defs)
3340 gsi = gsi_for_stmt
3341 (vect_find_last_scalar_stmt_in_slp (slp_node));
3342 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3344 else
3345 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3346 if (ctor_seq != NULL)
3348 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3349 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3350 GSI_SAME_STMT);
3351 ctor_seq = NULL;
3353 voprnds.quick_push (init);
3354 place_after_defs = false;
3355 number_of_places_left_in_vector = nunits;
3356 constant_p = true;
3361 /* Since the vectors are created in the reverse order, we should invert
3362 them. */
3363 vec_num = voprnds.length ();
3364 for (j = vec_num; j != 0; j--)
3366 vop = voprnds[j - 1];
3367 vec_oprnds->quick_push (vop);
3370 voprnds.release ();
3372 /* In case that VF is greater than the unrolling factor needed for the SLP
3373 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3374 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3375 to replicate the vectors. */
3376 while (number_of_vectors > vec_oprnds->length ())
3378 tree neutral_vec = NULL;
3380 if (neutral_op)
3382 if (!neutral_vec)
3383 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3385 vec_oprnds->quick_push (neutral_vec);
3387 else
3389 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3390 vec_oprnds->quick_push (vop);
3396 /* Get vectorized definitions from SLP_NODE that contains corresponding
3397 vectorized def-stmts. */
3399 static void
3400 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3402 tree vec_oprnd;
3403 gimple *vec_def_stmt;
3404 unsigned int i;
3406 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3408 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3410 gcc_assert (vec_def_stmt);
3411 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3412 vec_oprnd = gimple_phi_result (vec_def_stmt);
3413 else
3414 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3415 vec_oprnds->quick_push (vec_oprnd);
3420 /* Get vectorized definitions for SLP_NODE.
3421 If the scalar definitions are loop invariants or constants, collect them and
3422 call vect_get_constant_vectors() to create vector stmts.
3423 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3424 must be stored in the corresponding child of SLP_NODE, and we call
3425 vect_get_slp_vect_defs () to retrieve them. */
3427 void
3428 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3429 vec<vec<tree> > *vec_oprnds)
3431 gimple *first_stmt;
3432 int number_of_vects = 0, i;
3433 unsigned int child_index = 0;
3434 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3435 slp_tree child = NULL;
3436 vec<tree> vec_defs;
3437 tree oprnd;
3438 bool vectorized_defs;
3440 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3441 FOR_EACH_VEC_ELT (ops, i, oprnd)
3443 /* For each operand we check if it has vectorized definitions in a child
3444 node or we need to create them (for invariants and constants). We
3445 check if the LHS of the first stmt of the next child matches OPRND.
3446 If it does, we found the correct child. Otherwise, we call
3447 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3448 to check this child node for the next operand. */
3449 vectorized_defs = false;
3450 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3452 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3454 /* We have to check both pattern and original def, if available. */
3455 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3457 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3458 gimple *related
3459 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3460 tree first_def_op;
3462 if (gimple_code (first_def) == GIMPLE_PHI)
3463 first_def_op = gimple_phi_result (first_def);
3464 else
3465 first_def_op = gimple_get_lhs (first_def);
3466 if (operand_equal_p (oprnd, first_def_op, 0)
3467 || (related
3468 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3470 /* The number of vector defs is determined by the number of
3471 vector statements in the node from which we get those
3472 statements. */
3473 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3474 vectorized_defs = true;
3475 child_index++;
3478 else
3479 child_index++;
3482 if (!vectorized_defs)
3484 if (i == 0)
3486 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3487 /* Number of vector stmts was calculated according to LHS in
3488 vect_schedule_slp_instance (), fix it by replacing LHS with
3489 RHS, if necessary. See vect_get_smallest_scalar_type () for
3490 details. */
3491 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3492 &rhs_size_unit);
3493 if (rhs_size_unit != lhs_size_unit)
3495 number_of_vects *= rhs_size_unit;
3496 number_of_vects /= lhs_size_unit;
3501 /* Allocate memory for vectorized defs. */
3502 vec_defs = vNULL;
3503 vec_defs.create (number_of_vects);
3505 /* For reduction defs we call vect_get_constant_vectors (), since we are
3506 looking for initial loop invariant values. */
3507 if (vectorized_defs)
3508 /* The defs are already vectorized. */
3509 vect_get_slp_vect_defs (child, &vec_defs);
3510 else
3511 /* Build vectors from scalar defs. */
3512 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3513 number_of_vects);
3515 vec_oprnds->quick_push (vec_defs);
3519 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3520 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3521 permute statements for the SLP node NODE of the SLP instance
3522 SLP_NODE_INSTANCE. */
3524 bool
3525 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3526 gimple_stmt_iterator *gsi, int vf,
3527 slp_instance slp_node_instance, bool analyze_only,
3528 unsigned *n_perms)
3530 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3532 tree mask_element_type = NULL_TREE, mask_type;
3533 int nunits, vec_index = 0;
3534 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3535 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3536 int mask_element;
3537 machine_mode mode;
3539 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3540 return false;
3542 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3544 mode = TYPE_MODE (vectype);
3546 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3547 same size as the vector element being permuted. */
3548 mask_element_type = lang_hooks.types.type_for_mode
3549 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3550 mask_type = get_vectype_for_scalar_type (mask_element_type);
3551 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3552 auto_vec_perm_indices mask (nunits);
3553 mask.quick_grow (nunits);
3555 /* Initialize the vect stmts of NODE to properly insert the generated
3556 stmts later. */
3557 if (! analyze_only)
3558 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3559 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3560 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3562 /* Generate permutation masks for every NODE. Number of masks for each NODE
3563 is equal to GROUP_SIZE.
3564 E.g., we have a group of three nodes with three loads from the same
3565 location in each node, and the vector size is 4. I.e., we have a
3566 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3567 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3568 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3571 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3572 The last mask is illegal since we assume two operands for permute
3573 operation, and the mask element values can't be outside that range.
3574 Hence, the last mask must be converted into {2,5,5,5}.
3575 For the first two permutations we need the first and the second input
3576 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3577 we need the second and the third vectors: {b1,c1,a2,b2} and
3578 {c2,a3,b3,c3}. */
3580 int vect_stmts_counter = 0;
3581 int index = 0;
3582 int first_vec_index = -1;
3583 int second_vec_index = -1;
3584 bool noop_p = true;
3585 *n_perms = 0;
3587 for (int j = 0; j < vf; j++)
3589 for (int k = 0; k < group_size; k++)
3591 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3592 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3593 vec_index = i / nunits;
3594 mask_element = i % nunits;
3595 if (vec_index == first_vec_index
3596 || first_vec_index == -1)
3598 first_vec_index = vec_index;
3600 else if (vec_index == second_vec_index
3601 || second_vec_index == -1)
3603 second_vec_index = vec_index;
3604 mask_element += nunits;
3606 else
3608 if (dump_enabled_p ())
3610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3611 "permutation requires at "
3612 "least three vectors ");
3613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3614 stmt, 0);
3616 return false;
3619 gcc_assert (mask_element >= 0
3620 && mask_element < 2 * nunits);
3621 if (mask_element != index)
3622 noop_p = false;
3623 mask[index++] = mask_element;
3625 if (index == nunits)
3627 if (! noop_p
3628 && ! can_vec_perm_p (mode, false, &mask))
3630 if (dump_enabled_p ())
3632 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3633 vect_location,
3634 "unsupported vect permute { ");
3635 for (i = 0; i < nunits; ++i)
3636 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3637 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3639 return false;
3642 if (! noop_p)
3643 ++*n_perms;
3645 if (!analyze_only)
3647 tree mask_vec = NULL_TREE;
3649 if (! noop_p)
3651 auto_vec<tree, 32> mask_elts (nunits);
3652 for (int l = 0; l < nunits; ++l)
3653 mask_elts.quick_push (build_int_cst (mask_element_type,
3654 mask[l]));
3655 mask_vec = build_vector (mask_type, mask_elts);
3658 if (second_vec_index == -1)
3659 second_vec_index = first_vec_index;
3661 /* Generate the permute statement if necessary. */
3662 tree first_vec = dr_chain[first_vec_index];
3663 tree second_vec = dr_chain[second_vec_index];
3664 gimple *perm_stmt;
3665 if (! noop_p)
3667 tree perm_dest
3668 = vect_create_destination_var (gimple_assign_lhs (stmt),
3669 vectype);
3670 perm_dest = make_ssa_name (perm_dest);
3671 perm_stmt = gimple_build_assign (perm_dest,
3672 VEC_PERM_EXPR,
3673 first_vec, second_vec,
3674 mask_vec);
3675 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3677 else
3678 /* If mask was NULL_TREE generate the requested
3679 identity transform. */
3680 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3682 /* Store the vector statement in NODE. */
3683 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3686 index = 0;
3687 first_vec_index = -1;
3688 second_vec_index = -1;
3689 noop_p = true;
3694 return true;
3699 /* Vectorize SLP instance tree in postorder. */
3701 static bool
3702 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
3704 gimple *stmt;
3705 bool grouped_store, is_store;
3706 gimple_stmt_iterator si;
3707 stmt_vec_info stmt_info;
3708 unsigned int group_size;
3709 tree vectype;
3710 int i, j;
3711 slp_tree child;
3713 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3714 return false;
3716 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3717 vect_schedule_slp_instance (child, instance);
3719 /* Push SLP node def-type to stmts. */
3720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3721 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3723 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3725 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3726 stmt_info = vinfo_for_stmt (stmt);
3728 /* VECTYPE is the type of the destination. */
3729 vectype = STMT_VINFO_VECTYPE (stmt_info);
3730 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3732 if (!SLP_TREE_VEC_STMTS (node).exists ())
3733 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3735 if (dump_enabled_p ())
3737 dump_printf_loc (MSG_NOTE,vect_location,
3738 "------>vectorizing SLP node starting from: ");
3739 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3742 /* Vectorized stmts go before the last scalar stmt which is where
3743 all uses are ready. */
3744 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3746 /* Mark the first element of the reduction chain as reduction to properly
3747 transform the node. In the analysis phase only the last element of the
3748 chain is marked as reduction. */
3749 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3750 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3752 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3753 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3756 /* Handle two-operation SLP nodes by vectorizing the group with
3757 both operations and then performing a merge. */
3758 if (SLP_TREE_TWO_OPERATORS (node))
3760 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3761 enum tree_code ocode = ERROR_MARK;
3762 gimple *ostmt;
3763 auto_vec_perm_indices mask (group_size);
3764 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3765 if (gimple_assign_rhs_code (ostmt) != code0)
3767 mask.quick_push (1);
3768 ocode = gimple_assign_rhs_code (ostmt);
3770 else
3771 mask.quick_push (0);
3772 if (ocode != ERROR_MARK)
3774 vec<gimple *> v0;
3775 vec<gimple *> v1;
3776 unsigned j;
3777 tree tmask = NULL_TREE;
3778 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3779 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3780 SLP_TREE_VEC_STMTS (node).truncate (0);
3781 gimple_assign_set_rhs_code (stmt, ocode);
3782 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3783 gimple_assign_set_rhs_code (stmt, code0);
3784 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3785 SLP_TREE_VEC_STMTS (node).truncate (0);
3786 tree meltype = build_nonstandard_integer_type
3787 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3788 tree mvectype = get_same_sized_vectype (meltype, vectype);
3789 unsigned k = 0, l;
3790 for (j = 0; j < v0.length (); ++j)
3792 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3793 auto_vec<tree, 32> melts (nunits);
3794 for (l = 0; l < nunits; ++l)
3796 if (k >= group_size)
3797 k = 0;
3798 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3799 melts.quick_push (t);
3801 tmask = build_vector (mvectype, melts);
3803 /* ??? Not all targets support a VEC_PERM_EXPR with a
3804 constant mask that would translate to a vec_merge RTX
3805 (with their vec_perm_const_ok). We can either not
3806 vectorize in that case or let veclower do its job.
3807 Unfortunately that isn't too great and at least for
3808 plus/minus we'd eventually like to match targets
3809 vector addsub instructions. */
3810 gimple *vstmt;
3811 vstmt = gimple_build_assign (make_ssa_name (vectype),
3812 VEC_PERM_EXPR,
3813 gimple_assign_lhs (v0[j]),
3814 gimple_assign_lhs (v1[j]), tmask);
3815 vect_finish_stmt_generation (stmt, vstmt, &si);
3816 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3818 v0.release ();
3819 v1.release ();
3820 return false;
3823 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3825 /* Restore stmt def-types. */
3826 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3827 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3828 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3829 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3831 return is_store;
3834 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3835 For loop vectorization this is done in vectorizable_call, but for SLP
3836 it needs to be deferred until end of vect_schedule_slp, because multiple
3837 SLP instances may refer to the same scalar stmt. */
3839 static void
3840 vect_remove_slp_scalar_calls (slp_tree node)
3842 gimple *stmt, *new_stmt;
3843 gimple_stmt_iterator gsi;
3844 int i;
3845 slp_tree child;
3846 tree lhs;
3847 stmt_vec_info stmt_info;
3849 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3850 return;
3852 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3853 vect_remove_slp_scalar_calls (child);
3855 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3857 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3858 continue;
3859 stmt_info = vinfo_for_stmt (stmt);
3860 if (stmt_info == NULL
3861 || is_pattern_stmt_p (stmt_info)
3862 || !PURE_SLP_STMT (stmt_info))
3863 continue;
3864 lhs = gimple_call_lhs (stmt);
3865 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3866 set_vinfo_for_stmt (new_stmt, stmt_info);
3867 set_vinfo_for_stmt (stmt, NULL);
3868 STMT_VINFO_STMT (stmt_info) = new_stmt;
3869 gsi = gsi_for_stmt (stmt);
3870 gsi_replace (&gsi, new_stmt, false);
3871 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3875 /* Generate vector code for all SLP instances in the loop/basic block. */
3877 bool
3878 vect_schedule_slp (vec_info *vinfo)
3880 vec<slp_instance> slp_instances;
3881 slp_instance instance;
3882 unsigned int i;
3883 bool is_store = false;
3885 slp_instances = vinfo->slp_instances;
3886 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3888 /* Schedule the tree of INSTANCE. */
3889 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3890 instance);
3891 if (dump_enabled_p ())
3892 dump_printf_loc (MSG_NOTE, vect_location,
3893 "vectorizing stmts using SLP.\n");
3896 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3898 slp_tree root = SLP_INSTANCE_TREE (instance);
3899 gimple *store;
3900 unsigned int j;
3901 gimple_stmt_iterator gsi;
3903 /* Remove scalar call stmts. Do not do this for basic-block
3904 vectorization as not all uses may be vectorized.
3905 ??? Why should this be necessary? DCE should be able to
3906 remove the stmts itself.
3907 ??? For BB vectorization we can as well remove scalar
3908 stmts starting from the SLP tree root if they have no
3909 uses. */
3910 if (is_a <loop_vec_info> (vinfo))
3911 vect_remove_slp_scalar_calls (root);
3913 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3914 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3916 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3917 break;
3919 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3920 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3921 /* Free the attached stmt_vec_info and remove the stmt. */
3922 gsi = gsi_for_stmt (store);
3923 unlink_stmt_vdef (store);
3924 gsi_remove (&gsi, true);
3925 release_defs (store);
3926 free_stmt_vec_info (store);
3930 return is_store;