Daily bump.
[official-gcc.git] / gcc / tree-vect-slp.c
blob19f2ac43fd638113327426f4fe76ecdd986f3c54
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
488 as for vect_build_slp_tree. */
490 static bool
491 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
492 tree vectype, unsigned int *max_nunits)
494 if (!vectype)
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "Build SLP failed: unsupported data-type in ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
503 /* Fatal mismatch. */
504 return false;
507 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a <bb_vec_info> (vinfo)
510 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: unrolling required "
514 "in basic block SLP\n");
515 /* Fatal mismatch. */
516 return false;
519 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
521 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
523 return true;
526 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES
529 which stmts are not isomorphic to the first one. If MATCHES[0]
530 is false then this indicates the comparison could not be
531 carried out or the stmts will never be vectorized by SLP.
533 Note COND_EXPR is possibly ismorphic to another one after swapping its
534 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
535 the first stmt by swapping the two operands of comparison; set SWAP[i]
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
540 static bool
541 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
542 vec<gimple *> stmts, unsigned int group_size,
543 unsigned nops, unsigned int *max_nunits,
544 bool *matches, bool *two_operators)
546 unsigned int i;
547 gimple *first_stmt = stmts[0], *stmt = stmts[0];
548 enum tree_code first_stmt_code = ERROR_MARK;
549 enum tree_code alt_stmt_code = ERROR_MARK;
550 enum tree_code rhs_code = ERROR_MARK;
551 enum tree_code first_cond_code = ERROR_MARK;
552 tree lhs;
553 bool need_same_oprnds = false;
554 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
555 optab optab;
556 int icode;
557 machine_mode optab_op2_mode;
558 machine_mode vec_mode;
559 HOST_WIDE_INT dummy;
560 gimple *first_load = NULL, *prev_first_load = NULL;
562 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts, i, stmt)
565 swap[i] = 0;
566 matches[i] = false;
568 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
574 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
577 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580 "Build SLP failed: unvectorizable statement ");
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
583 /* Fatal mismatch. */
584 matches[0] = false;
585 return false;
588 lhs = gimple_get_lhs (stmt);
589 if (lhs == NULL_TREE)
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: not GIMPLE_ASSIGN nor "
595 "GIMPLE_CALL ");
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
598 /* Fatal mismatch. */
599 matches[0] = false;
600 return false;
603 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
604 vectype = get_vectype_for_scalar_type (scalar_type);
605 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
606 max_nunits))
608 /* Fatal mismatch. */
609 matches[0] = false;
610 return false;
613 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
615 rhs_code = CALL_EXPR;
616 if (gimple_call_internal_p (call_stmt)
617 || gimple_call_tail_p (call_stmt)
618 || gimple_call_noreturn_p (call_stmt)
619 || !gimple_call_nothrow_p (call_stmt)
620 || gimple_call_chain (call_stmt))
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported call type ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
627 call_stmt, 0);
629 /* Fatal mismatch. */
630 matches[0] = false;
631 return false;
634 else
635 rhs_code = gimple_assign_rhs_code (stmt);
637 /* Check the operation. */
638 if (i == 0)
640 first_stmt_code = rhs_code;
642 /* Shift arguments should be equal in all the packed stmts for a
643 vector shift with scalar shift operand. */
644 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
645 || rhs_code == LROTATE_EXPR
646 || rhs_code == RROTATE_EXPR)
648 vec_mode = TYPE_MODE (vectype);
650 /* First see if we have a vector/vector shift. */
651 optab = optab_for_tree_code (rhs_code, vectype,
652 optab_vector);
654 if (!optab
655 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
657 /* No vector/vector shift, try for a vector/scalar shift. */
658 optab = optab_for_tree_code (rhs_code, vectype,
659 optab_scalar);
661 if (!optab)
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "Build SLP failed: no optab.\n");
666 /* Fatal mismatch. */
667 matches[0] = false;
668 return false;
670 icode = (int) optab_handler (optab, vec_mode);
671 if (icode == CODE_FOR_nothing)
673 if (dump_enabled_p ())
674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
675 "Build SLP failed: "
676 "op not supported by target.\n");
677 /* Fatal mismatch. */
678 matches[0] = false;
679 return false;
681 optab_op2_mode = insn_data[icode].operand[2].mode;
682 if (!VECTOR_MODE_P (optab_op2_mode))
684 need_same_oprnds = true;
685 first_op1 = gimple_assign_rhs2 (stmt);
689 else if (rhs_code == WIDEN_LSHIFT_EXPR)
691 need_same_oprnds = true;
692 first_op1 = gimple_assign_rhs2 (stmt);
695 else
697 if (first_stmt_code != rhs_code
698 && alt_stmt_code == ERROR_MARK)
699 alt_stmt_code = rhs_code;
700 if (first_stmt_code != rhs_code
701 && (first_stmt_code != IMAGPART_EXPR
702 || rhs_code != REALPART_EXPR)
703 && (first_stmt_code != REALPART_EXPR
704 || rhs_code != IMAGPART_EXPR)
705 /* Handle mismatches in plus/minus by computing both
706 and merging the results. */
707 && !((first_stmt_code == PLUS_EXPR
708 || first_stmt_code == MINUS_EXPR)
709 && (alt_stmt_code == PLUS_EXPR
710 || alt_stmt_code == MINUS_EXPR)
711 && rhs_code == alt_stmt_code)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
713 && (first_stmt_code == ARRAY_REF
714 || first_stmt_code == BIT_FIELD_REF
715 || first_stmt_code == INDIRECT_REF
716 || first_stmt_code == COMPONENT_REF
717 || first_stmt_code == MEM_REF)))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different operation "
723 "in stmt ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "original stmt ");
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
728 first_stmt, 0);
730 /* Mismatch. */
731 continue;
734 if (need_same_oprnds
735 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
737 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740 "Build SLP failed: different shift "
741 "arguments in ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
744 /* Mismatch. */
745 continue;
748 if (rhs_code == CALL_EXPR)
750 gimple *first_stmt = stmts[0];
751 if (gimple_call_num_args (stmt) != nops
752 || !operand_equal_p (gimple_call_fn (first_stmt),
753 gimple_call_fn (stmt), 0)
754 || gimple_call_fntype (first_stmt)
755 != gimple_call_fntype (stmt))
757 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "Build SLP failed: different calls in ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
762 stmt, 0);
764 /* Mismatch. */
765 continue;
770 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
773 if (REFERENCE_CLASS_P (lhs))
775 /* Store. */
778 else
780 /* Load. */
781 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
782 if (prev_first_load)
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load != first_load)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
791 vect_location,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
795 stmt, 0);
797 /* Mismatch. */
798 continue;
801 else
802 prev_first_load = first_load;
804 } /* Grouped access. */
805 else
807 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
809 /* Not grouped load. */
810 if (dump_enabled_p ())
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
817 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */
819 matches[0] = false;
820 return false;
823 /* Not memory operation. */
824 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
825 && TREE_CODE_CLASS (rhs_code) != tcc_unary
826 && TREE_CODE_CLASS (rhs_code) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
828 && rhs_code != CALL_EXPR)
830 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: operation");
834 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
837 /* Fatal mismatch. */
838 matches[0] = false;
839 return false;
842 if (rhs_code == COND_EXPR)
844 tree cond_expr = gimple_assign_rhs1 (stmt);
845 enum tree_code cond_code = TREE_CODE (cond_expr);
846 enum tree_code swap_code = ERROR_MARK;
847 enum tree_code invert_code = ERROR_MARK;
849 if (i == 0)
850 first_cond_code = TREE_CODE (cond_expr);
851 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
853 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
854 swap_code = swap_tree_comparison (cond_code);
855 invert_code = invert_tree_comparison (cond_code, honor_nans);
858 if (first_cond_code == cond_code)
860 /* Isomorphic can be achieved by swapping. */
861 else if (first_cond_code == swap_code)
862 swap[i] = 1;
863 /* Isomorphic can be achieved by inverting. */
864 else if (first_cond_code == invert_code)
865 swap[i] = 2;
866 else
868 if (dump_enabled_p ())
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
871 "Build SLP failed: different"
872 " operation");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
874 stmt, 0);
876 /* Mismatch. */
877 continue;
882 matches[i] = true;
885 for (i = 0; i < group_size; ++i)
886 if (!matches[i])
887 return false;
889 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */
891 if (alt_stmt_code != ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
894 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
895 auto_vec_perm_indices sel (count);
896 for (i = 0; i < count; ++i)
898 unsigned int elt = i;
899 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
900 elt += count;
901 sel.quick_push (elt);
903 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
905 for (i = 0; i < group_size; ++i)
906 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
908 matches[i] = false;
909 if (dump_enabled_p ())
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
912 "Build SLP failed: different operation "
913 "in stmt ");
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
915 stmts[i], 0);
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "original stmt ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 first_stmt, 0);
922 return false;
924 *two_operators = true;
927 return true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */
933 struct bst_traits
935 typedef vec <gimple *> value_type;
936 typedef vec <gimple *> compare_type;
937 static inline hashval_t hash (value_type);
938 static inline bool equal (value_type existing, value_type candidate);
939 static inline bool is_empty (value_type x) { return !x.exists (); }
940 static inline bool is_deleted (value_type x) { return !x.exists (); }
941 static inline void mark_empty (value_type &x) { x.release (); }
942 static inline void mark_deleted (value_type &x) { x.release (); }
943 static inline void remove (value_type &x) { x.release (); }
945 inline hashval_t
946 bst_traits::hash (value_type x)
948 inchash::hash h;
949 for (unsigned i = 0; i < x.length (); ++i)
950 h.add_int (gimple_uid (x[i]));
951 return h.end ();
953 inline bool
954 bst_traits::equal (value_type existing, value_type candidate)
956 if (existing.length () != candidate.length ())
957 return false;
958 for (unsigned i = 0; i < existing.length (); ++i)
959 if (existing[i] != candidate[i])
960 return false;
961 return true;
964 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
965 static scalar_stmts_set_t *bst_fail;
967 static slp_tree
968 vect_build_slp_tree_2 (vec_info *vinfo,
969 vec<gimple *> stmts, unsigned int group_size,
970 unsigned int *max_nunits,
971 vec<slp_tree> *loads,
972 bool *matches, unsigned *npermutes, unsigned *tree_size,
973 unsigned max_tree_size);
975 static slp_tree
976 vect_build_slp_tree (vec_info *vinfo,
977 vec<gimple *> stmts, unsigned int group_size,
978 unsigned int *max_nunits,
979 vec<slp_tree> *loads,
980 bool *matches, unsigned *npermutes, unsigned *tree_size,
981 unsigned max_tree_size)
983 if (bst_fail->contains (stmts))
984 return NULL;
985 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
986 loads, matches, npermutes, tree_size,
987 max_tree_size);
988 /* When SLP build fails for stmts record this, otherwise SLP build
989 can be exponential in time when we allow to construct parts from
990 scalars, see PR81723. */
991 if (! res)
993 vec <gimple *> x;
994 x.create (stmts.length ());
995 x.splice (stmts);
996 bst_fail->add (x);
998 return res;
1001 /* Recursively build an SLP tree starting from NODE.
1002 Fail (and return a value not equal to zero) if def-stmts are not
1003 isomorphic, require data permutation or are of unsupported types of
1004 operation. Otherwise, return 0.
1005 The value returned is the depth in the SLP tree where a mismatch
1006 was found. */
1008 static slp_tree
1009 vect_build_slp_tree_2 (vec_info *vinfo,
1010 vec<gimple *> stmts, unsigned int group_size,
1011 unsigned int *max_nunits,
1012 vec<slp_tree> *loads,
1013 bool *matches, unsigned *npermutes, unsigned *tree_size,
1014 unsigned max_tree_size)
1016 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
1017 gimple *stmt;
1018 slp_tree node;
1020 matches[0] = false;
1022 stmt = stmts[0];
1023 if (is_gimple_call (stmt))
1024 nops = gimple_call_num_args (stmt);
1025 else if (is_gimple_assign (stmt))
1027 nops = gimple_num_ops (stmt) - 1;
1028 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1029 nops++;
1031 else if (gimple_code (stmt) == GIMPLE_PHI)
1032 nops = 0;
1033 else
1034 return NULL;
1036 /* If the SLP node is a PHI (induction or reduction), terminate
1037 the recursion. */
1038 if (gimple_code (stmt) == GIMPLE_PHI)
1040 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1041 tree vectype = get_vectype_for_scalar_type (scalar_type);
1042 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1043 max_nunits))
1044 return NULL;
1046 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1047 /* Induction from different IVs is not supported. */
1048 if (def_type == vect_induction_def)
1050 FOR_EACH_VEC_ELT (stmts, i, stmt)
1051 if (stmt != stmts[0])
1052 return NULL;
1054 else
1056 /* Else def types have to match. */
1057 FOR_EACH_VEC_ELT (stmts, i, stmt)
1059 /* But for reduction chains only check on the first stmt. */
1060 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1061 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1062 continue;
1063 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1064 return NULL;
1067 node = vect_create_new_slp_node (stmts);
1068 return node;
1072 bool two_operators = false;
1073 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1074 if (!vect_build_slp_tree_1 (vinfo, swap,
1075 stmts, group_size, nops,
1076 &this_max_nunits, matches, &two_operators))
1077 return NULL;
1079 /* If the SLP node is a load, terminate the recursion. */
1080 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1081 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1083 *max_nunits = this_max_nunits;
1084 node = vect_create_new_slp_node (stmts);
1085 loads->safe_push (node);
1086 return node;
1089 /* Get at the operands, verifying they are compatible. */
1090 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1091 slp_oprnd_info oprnd_info;
1092 FOR_EACH_VEC_ELT (stmts, i, stmt)
1094 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1095 stmt, i, &oprnds_info);
1096 if (res != 0)
1097 matches[(res == -1) ? 0 : i] = false;
1098 if (!matches[0])
1099 break;
1101 for (i = 0; i < group_size; ++i)
1102 if (!matches[i])
1104 vect_free_oprnd_info (oprnds_info);
1105 return NULL;
1108 auto_vec<slp_tree, 4> children;
1109 auto_vec<slp_tree> this_loads;
1111 stmt = stmts[0];
1113 if (tree_size)
1114 max_tree_size -= *tree_size;
1116 /* Create SLP_TREE nodes for the definition node/s. */
1117 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1119 slp_tree child;
1120 unsigned old_nloads = this_loads.length ();
1121 unsigned old_tree_size = this_tree_size;
1122 unsigned int j;
1124 if (oprnd_info->first_dt != vect_internal_def
1125 && oprnd_info->first_dt != vect_reduction_def
1126 && oprnd_info->first_dt != vect_induction_def)
1127 continue;
1129 if (++this_tree_size > max_tree_size)
1131 FOR_EACH_VEC_ELT (children, j, child)
1132 vect_free_slp_tree (child);
1133 vect_free_oprnd_info (oprnds_info);
1134 return NULL;
1137 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1138 group_size, &this_max_nunits,
1139 &this_loads, matches, npermutes,
1140 &this_tree_size,
1141 max_tree_size)) != NULL)
1143 /* If we have all children of child built up from scalars then just
1144 throw that away and build it up this node from scalars. */
1145 if (!SLP_TREE_CHILDREN (child).is_empty ()
1146 /* ??? Rejecting patterns this way doesn't work. We'd have to
1147 do extra work to cancel the pattern so the uses see the
1148 scalar version. */
1149 && !is_pattern_stmt_p
1150 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1152 slp_tree grandchild;
1154 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1155 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1156 break;
1157 if (!grandchild)
1159 /* Roll back. */
1160 this_loads.truncate (old_nloads);
1161 this_tree_size = old_tree_size;
1162 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1163 vect_free_slp_tree (grandchild);
1164 SLP_TREE_CHILDREN (child).truncate (0);
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "Building parent vector operands from "
1168 "scalars instead\n");
1169 oprnd_info->def_stmts = vNULL;
1170 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1171 children.safe_push (child);
1172 continue;
1176 oprnd_info->def_stmts = vNULL;
1177 children.safe_push (child);
1178 continue;
1181 /* If the SLP build failed fatally and we analyze a basic-block
1182 simply treat nodes we fail to build as externally defined
1183 (and thus build vectors from the scalar defs).
1184 The cost model will reject outright expensive cases.
1185 ??? This doesn't treat cases where permutation ultimatively
1186 fails (or we don't try permutation below). Ideally we'd
1187 even compute a permutation that will end up with the maximum
1188 SLP tree size... */
1189 if (is_a <bb_vec_info> (vinfo)
1190 && !matches[0]
1191 /* ??? Rejecting patterns this way doesn't work. We'd have to
1192 do extra work to cancel the pattern so the uses see the
1193 scalar version. */
1194 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1196 dump_printf_loc (MSG_NOTE, vect_location,
1197 "Building vector operands from scalars\n");
1198 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1199 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1200 children.safe_push (child);
1201 oprnd_info->def_stmts = vNULL;
1202 continue;
1205 /* If the SLP build for operand zero failed and operand zero
1206 and one can be commutated try that for the scalar stmts
1207 that failed the match. */
1208 if (i == 0
1209 /* A first scalar stmt mismatch signals a fatal mismatch. */
1210 && matches[0]
1211 /* ??? For COND_EXPRs we can swap the comparison operands
1212 as well as the arms under some constraints. */
1213 && nops == 2
1214 && oprnds_info[1]->first_dt == vect_internal_def
1215 && is_gimple_assign (stmt)
1216 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1217 && ! two_operators
1218 /* Do so only if the number of not successful permutes was nor more
1219 than a cut-ff as re-trying the recursive match on
1220 possibly each level of the tree would expose exponential
1221 behavior. */
1222 && *npermutes < 4)
1224 /* Verify if we can safely swap or if we committed to a specific
1225 operand order already. */
1226 for (j = 0; j < group_size; ++j)
1227 if (!matches[j]
1228 && (swap[j] != 0
1229 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1231 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234 "Build SLP failed: cannot swap operands "
1235 "of shared stmt ");
1236 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1237 stmts[j], 0);
1239 goto fail;
1242 /* Swap mismatched definition stmts. */
1243 dump_printf_loc (MSG_NOTE, vect_location,
1244 "Re-trying with swapped operands of stmts ");
1245 for (j = 0; j < group_size; ++j)
1246 if (!matches[j])
1248 std::swap (oprnds_info[0]->def_stmts[j],
1249 oprnds_info[1]->def_stmts[j]);
1250 dump_printf (MSG_NOTE, "%d ", j);
1252 dump_printf (MSG_NOTE, "\n");
1253 /* And try again with scratch 'matches' ... */
1254 bool *tem = XALLOCAVEC (bool, group_size);
1255 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1256 group_size, &this_max_nunits,
1257 &this_loads, tem, npermutes,
1258 &this_tree_size,
1259 max_tree_size)) != NULL)
1261 /* ... so if successful we can apply the operand swapping
1262 to the GIMPLE IL. This is necessary because for example
1263 vect_get_slp_defs uses operand indexes and thus expects
1264 canonical operand order. This is also necessary even
1265 if we end up building the operand from scalars as
1266 we'll continue to process swapped operand two. */
1267 for (j = 0; j < group_size; ++j)
1269 gimple *stmt = stmts[j];
1270 gimple_set_plf (stmt, GF_PLF_1, false);
1272 for (j = 0; j < group_size; ++j)
1274 gimple *stmt = stmts[j];
1275 if (!matches[j])
1277 /* Avoid swapping operands twice. */
1278 if (gimple_plf (stmt, GF_PLF_1))
1279 continue;
1280 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1281 gimple_assign_rhs2_ptr (stmt));
1282 gimple_set_plf (stmt, GF_PLF_1, true);
1285 /* Verify we swap all duplicates or none. */
1286 if (flag_checking)
1287 for (j = 0; j < group_size; ++j)
1289 gimple *stmt = stmts[j];
1290 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1293 /* If we have all children of child built up from scalars then
1294 just throw that away and build it up this node from scalars. */
1295 if (!SLP_TREE_CHILDREN (child).is_empty ()
1296 /* ??? Rejecting patterns this way doesn't work. We'd have
1297 to do extra work to cancel the pattern so the uses see the
1298 scalar version. */
1299 && !is_pattern_stmt_p
1300 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1302 unsigned int j;
1303 slp_tree grandchild;
1305 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1306 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1307 break;
1308 if (!grandchild)
1310 /* Roll back. */
1311 this_loads.truncate (old_nloads);
1312 this_tree_size = old_tree_size;
1313 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1314 vect_free_slp_tree (grandchild);
1315 SLP_TREE_CHILDREN (child).truncate (0);
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "Building parent vector operands from "
1319 "scalars instead\n");
1320 oprnd_info->def_stmts = vNULL;
1321 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1322 children.safe_push (child);
1323 continue;
1327 oprnd_info->def_stmts = vNULL;
1328 children.safe_push (child);
1329 continue;
1332 ++*npermutes;
1335 fail:
1336 gcc_assert (child == NULL);
1337 FOR_EACH_VEC_ELT (children, j, child)
1338 vect_free_slp_tree (child);
1339 vect_free_oprnd_info (oprnds_info);
1340 return NULL;
1343 vect_free_oprnd_info (oprnds_info);
1345 if (tree_size)
1346 *tree_size += this_tree_size;
1347 *max_nunits = this_max_nunits;
1348 loads->safe_splice (this_loads);
1350 node = vect_create_new_slp_node (stmts);
1351 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1352 SLP_TREE_CHILDREN (node).splice (children);
1353 return node;
1356 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1358 static void
1359 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1361 int i;
1362 gimple *stmt;
1363 slp_tree child;
1365 dump_printf_loc (dump_kind, loc, "node%s\n",
1366 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1367 ? " (external)" : "");
1368 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1370 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1371 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1373 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1374 vect_print_slp_tree (dump_kind, loc, child);
1378 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1379 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1380 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1381 stmts in NODE are to be marked. */
1383 static void
1384 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1386 int i;
1387 gimple *stmt;
1388 slp_tree child;
1390 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1391 return;
1393 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1394 if (j < 0 || i == j)
1395 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1398 vect_mark_slp_stmts (child, mark, j);
1402 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1404 static void
1405 vect_mark_slp_stmts_relevant (slp_tree node)
1407 int i;
1408 gimple *stmt;
1409 stmt_vec_info stmt_info;
1410 slp_tree child;
1412 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1413 return;
1415 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1417 stmt_info = vinfo_for_stmt (stmt);
1418 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1419 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1420 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1423 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1424 vect_mark_slp_stmts_relevant (child);
1428 /* Rearrange the statements of NODE according to PERMUTATION. */
1430 static void
1431 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1432 vec<unsigned> permutation)
1434 gimple *stmt;
1435 vec<gimple *> tmp_stmts;
1436 unsigned int i;
1437 slp_tree child;
1439 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1440 vect_slp_rearrange_stmts (child, group_size, permutation);
1442 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1443 tmp_stmts.create (group_size);
1444 tmp_stmts.quick_grow_cleared (group_size);
1446 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1447 tmp_stmts[permutation[i]] = stmt;
1449 SLP_TREE_SCALAR_STMTS (node).release ();
1450 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1454 /* Attempt to reorder stmts in a reduction chain so that we don't
1455 require any load permutation. Return true if that was possible,
1456 otherwise return false. */
1458 static bool
1459 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1461 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1462 unsigned int i, j;
1463 unsigned int lidx;
1464 slp_tree node, load;
1466 /* Compare all the permutation sequences to the first one. We know
1467 that at least one load is permuted. */
1468 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1469 if (!node->load_permutation.exists ())
1470 return false;
1471 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1473 if (!load->load_permutation.exists ())
1474 return false;
1475 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1476 if (lidx != node->load_permutation[j])
1477 return false;
1480 /* Check that the loads in the first sequence are different and there
1481 are no gaps between them. */
1482 auto_sbitmap load_index (group_size);
1483 bitmap_clear (load_index);
1484 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1486 if (lidx >= group_size)
1487 return false;
1488 if (bitmap_bit_p (load_index, lidx))
1489 return false;
1491 bitmap_set_bit (load_index, lidx);
1493 for (i = 0; i < group_size; i++)
1494 if (!bitmap_bit_p (load_index, i))
1495 return false;
1497 /* This permutation is valid for reduction. Since the order of the
1498 statements in the nodes is not important unless they are memory
1499 accesses, we can rearrange the statements in all the nodes
1500 according to the order of the loads. */
1501 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1502 node->load_permutation);
1504 /* We are done, no actual permutations need to be generated. */
1505 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1506 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1508 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1509 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1510 /* But we have to keep those permutations that are required because
1511 of handling of gaps. */
1512 if (unrolling_factor == 1
1513 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1514 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1515 SLP_TREE_LOAD_PERMUTATION (node).release ();
1516 else
1517 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1518 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1521 return true;
1524 /* Check if the required load permutations in the SLP instance
1525 SLP_INSTN are supported. */
1527 static bool
1528 vect_supported_load_permutation_p (slp_instance slp_instn)
1530 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1531 unsigned int i, j, k, next;
1532 slp_tree node;
1533 gimple *stmt, *load, *next_load;
1535 if (dump_enabled_p ())
1537 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1538 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1539 if (node->load_permutation.exists ())
1540 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1541 dump_printf (MSG_NOTE, "%d ", next);
1542 else
1543 for (k = 0; k < group_size; ++k)
1544 dump_printf (MSG_NOTE, "%d ", k);
1545 dump_printf (MSG_NOTE, "\n");
1548 /* In case of reduction every load permutation is allowed, since the order
1549 of the reduction statements is not important (as opposed to the case of
1550 grouped stores). The only condition we need to check is that all the
1551 load nodes are of the same size and have the same permutation (and then
1552 rearrange all the nodes of the SLP instance according to this
1553 permutation). */
1555 /* Check that all the load nodes are of the same size. */
1556 /* ??? Can't we assert this? */
1557 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1558 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1559 return false;
1561 node = SLP_INSTANCE_TREE (slp_instn);
1562 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1564 /* Reduction (there are no data-refs in the root).
1565 In reduction chain the order of the loads is not important. */
1566 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1567 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1568 vect_attempt_slp_rearrange_stmts (slp_instn);
1570 /* In basic block vectorization we allow any subchain of an interleaving
1571 chain.
1572 FORNOW: not supported in loop SLP because of realignment compications. */
1573 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1575 /* Check whether the loads in an instance form a subchain and thus
1576 no permutation is necessary. */
1577 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1579 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1580 continue;
1581 bool subchain_p = true;
1582 next_load = NULL;
1583 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1585 if (j != 0
1586 && (next_load != load
1587 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1589 subchain_p = false;
1590 break;
1592 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1594 if (subchain_p)
1595 SLP_TREE_LOAD_PERMUTATION (node).release ();
1596 else
1598 stmt_vec_info group_info
1599 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1600 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1601 unsigned nunits
1602 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1603 unsigned k, maxk = 0;
1604 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1605 if (k > maxk)
1606 maxk = k;
1607 /* In BB vectorization we may not actually use a loaded vector
1608 accessing elements in excess of GROUP_SIZE. */
1609 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1612 "BB vectorization with gaps at the end of "
1613 "a load is not supported\n");
1614 return false;
1617 /* Verify the permutation can be generated. */
1618 vec<tree> tem;
1619 unsigned n_perms;
1620 if (!vect_transform_slp_perm_load (node, tem, NULL,
1621 1, slp_instn, true, &n_perms))
1623 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1624 vect_location,
1625 "unsupported load permutation\n");
1626 return false;
1630 return true;
1633 /* For loop vectorization verify we can generate the permutation. Be
1634 conservative about the vectorization factor, there are permutations
1635 that will use three vector inputs only starting from a specific factor
1636 and the vectorization factor is not yet final.
1637 ??? The SLP instance unrolling factor might not be the maximum one. */
1638 unsigned n_perms;
1639 unsigned test_vf
1640 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1641 LOOP_VINFO_VECT_FACTOR
1642 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1643 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1644 if (node->load_permutation.exists ()
1645 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1646 slp_instn, true, &n_perms))
1647 return false;
1649 return true;
1653 /* Find the last store in SLP INSTANCE. */
1655 gimple *
1656 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1658 gimple *last = NULL, *stmt;
1660 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1662 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1663 if (is_pattern_stmt_p (stmt_vinfo))
1664 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1665 else
1666 last = get_later_stmt (stmt, last);
1669 return last;
1672 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1674 static void
1675 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1676 stmt_vector_for_cost *prologue_cost_vec,
1677 stmt_vector_for_cost *body_cost_vec,
1678 unsigned ncopies_for_cost,
1679 scalar_stmts_set_t* visited)
1681 unsigned i, j;
1682 slp_tree child;
1683 gimple *stmt;
1684 stmt_vec_info stmt_info;
1685 tree lhs;
1687 /* If we already costed the exact same set of scalar stmts we're done.
1688 We share the generated vector stmts for those. */
1689 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1690 return;
1692 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1694 /* Recurse down the SLP tree. */
1695 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1696 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1697 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1698 body_cost_vec, ncopies_for_cost, visited);
1700 /* Look at the first scalar stmt to determine the cost. */
1701 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1702 stmt_info = vinfo_for_stmt (stmt);
1703 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1705 vect_memory_access_type memory_access_type
1706 = (STMT_VINFO_STRIDED_P (stmt_info)
1707 ? VMAT_STRIDED_SLP
1708 : VMAT_CONTIGUOUS);
1709 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1710 vect_model_store_cost (stmt_info, ncopies_for_cost,
1711 memory_access_type, vect_uninitialized_def,
1712 node, prologue_cost_vec, body_cost_vec);
1713 else
1715 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1716 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1718 /* If the load is permuted then the alignment is determined by
1719 the first group element not by the first scalar stmt DR. */
1720 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1721 stmt_info = vinfo_for_stmt (stmt);
1722 /* Record the cost for the permutation. */
1723 unsigned n_perms;
1724 vect_transform_slp_perm_load (node, vNULL, NULL,
1725 ncopies_for_cost, instance, true,
1726 &n_perms);
1727 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1728 stmt_info, 0, vect_body);
1729 unsigned nunits
1730 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1731 /* And adjust the number of loads performed. This handles
1732 redundancies as well as loads that are later dead. */
1733 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1734 bitmap_clear (perm);
1735 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1736 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1737 ncopies_for_cost = 0;
1738 bool load_seen = false;
1739 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1741 if (i % nunits == 0)
1743 if (load_seen)
1744 ncopies_for_cost++;
1745 load_seen = false;
1747 if (bitmap_bit_p (perm, i))
1748 load_seen = true;
1750 if (load_seen)
1751 ncopies_for_cost++;
1752 gcc_assert (ncopies_for_cost
1753 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1754 + nunits - 1) / nunits);
1755 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1757 /* Record the cost for the vector loads. */
1758 vect_model_load_cost (stmt_info, ncopies_for_cost,
1759 memory_access_type, node, prologue_cost_vec,
1760 body_cost_vec);
1761 return;
1764 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1766 /* ncopies_for_cost is the number of IVs we generate. */
1767 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1768 stmt_info, 0, vect_body);
1770 /* Prologue cost for the initial values and step vector. */
1771 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1772 CONSTANT_CLASS_P
1773 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1774 (stmt_info))
1775 ? vector_load : vec_construct,
1776 stmt_info, 0, vect_prologue);
1777 record_stmt_cost (prologue_cost_vec, 1,
1778 CONSTANT_CLASS_P
1779 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1780 ? vector_load : vec_construct,
1781 stmt_info, 0, vect_prologue);
1783 /* ??? No easy way to get at the actual number of vector stmts
1784 to be geneated and thus the derived IVs. */
1786 else
1788 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1789 stmt_info, 0, vect_body);
1790 if (SLP_TREE_TWO_OPERATORS (node))
1792 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1793 stmt_info, 0, vect_body);
1794 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1795 stmt_info, 0, vect_body);
1799 /* Push SLP node def-type to stmts. */
1800 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1801 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1802 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1803 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1805 /* Scan operands and account for prologue cost of constants/externals.
1806 ??? This over-estimates cost for multiple uses and should be
1807 re-engineered. */
1808 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1809 lhs = gimple_get_lhs (stmt);
1810 for (i = 0; i < gimple_num_ops (stmt); ++i)
1812 tree op = gimple_op (stmt, i);
1813 gimple *def_stmt;
1814 enum vect_def_type dt;
1815 if (!op || op == lhs)
1816 continue;
1817 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1819 /* Without looking at the actual initializer a vector of
1820 constants can be implemented as load from the constant pool.
1821 ??? We need to pass down stmt_info for a vector type
1822 even if it points to the wrong stmt. */
1823 if (dt == vect_constant_def)
1824 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1825 stmt_info, 0, vect_prologue);
1826 else if (dt == vect_external_def)
1827 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1828 stmt_info, 0, vect_prologue);
1832 /* Restore stmt def-types. */
1833 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1834 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1836 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1839 /* Compute the cost for the SLP instance INSTANCE. */
1841 static void
1842 vect_analyze_slp_cost (slp_instance instance, void *data)
1844 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1845 unsigned ncopies_for_cost;
1846 stmt_info_for_cost *si;
1847 unsigned i;
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_NOTE, vect_location,
1851 "=== vect_analyze_slp_cost ===\n");
1853 /* Calculate the number of vector stmts to create based on the unrolling
1854 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1855 GROUP_SIZE / NUNITS otherwise. */
1856 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1857 slp_tree node = SLP_INSTANCE_TREE (instance);
1858 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1859 /* Adjust the group_size by the vectorization factor which is always one
1860 for basic-block vectorization. */
1861 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1862 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1863 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1864 /* For reductions look at a reduction operand in case the reduction
1865 operation is widening like DOT_PROD or SAD. */
1866 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1868 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1869 switch (gimple_assign_rhs_code (stmt))
1871 case DOT_PROD_EXPR:
1872 case SAD_EXPR:
1873 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1874 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1875 break;
1876 default:;
1879 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1881 prologue_cost_vec.create (10);
1882 body_cost_vec.create (10);
1883 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1884 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1885 &prologue_cost_vec, &body_cost_vec,
1886 ncopies_for_cost, visited);
1887 delete visited;
1889 /* Record the prologue costs, which were delayed until we were
1890 sure that SLP was successful. */
1891 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1893 struct _stmt_vec_info *stmt_info
1894 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1895 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1896 si->misalign, vect_prologue);
1899 /* Record the instance's instructions in the target cost model. */
1900 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1902 struct _stmt_vec_info *stmt_info
1903 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1904 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1905 si->misalign, vect_body);
1908 prologue_cost_vec.release ();
1909 body_cost_vec.release ();
1912 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1913 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1914 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1915 containing the remainder.
1916 Return the first stmt in the second group. */
1918 static gimple *
1919 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1921 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1922 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1923 gcc_assert (group1_size > 0);
1924 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1925 gcc_assert (group2_size > 0);
1926 GROUP_SIZE (first_vinfo) = group1_size;
1928 gimple *stmt = first_stmt;
1929 for (unsigned i = group1_size; i > 1; i--)
1931 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1932 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1934 /* STMT is now the last element of the first group. */
1935 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1936 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1938 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1939 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1941 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1942 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1945 /* For the second group, the GROUP_GAP is that before the original group,
1946 plus skipping over the first vector. */
1947 GROUP_GAP (vinfo_for_stmt (group2)) =
1948 GROUP_GAP (first_vinfo) + group1_size;
1950 /* GROUP_GAP of the first group now has to skip over the second group too. */
1951 GROUP_GAP (first_vinfo) += group2_size;
1953 if (dump_enabled_p ())
1954 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1955 group1_size, group2_size);
1957 return group2;
1960 /* Analyze an SLP instance starting from a group of grouped stores. Call
1961 vect_build_slp_tree to build a tree of packed stmts if possible.
1962 Return FALSE if it's impossible to SLP any stmt in the loop. */
1964 static bool
1965 vect_analyze_slp_instance (vec_info *vinfo,
1966 gimple *stmt, unsigned max_tree_size)
1968 slp_instance new_instance;
1969 slp_tree node;
1970 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1971 unsigned int unrolling_factor = 1, nunits;
1972 tree vectype, scalar_type = NULL_TREE;
1973 gimple *next;
1974 unsigned int i;
1975 unsigned int max_nunits = 0;
1976 vec<slp_tree> loads;
1977 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1978 vec<gimple *> scalar_stmts;
1980 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1982 if (dr)
1984 scalar_type = TREE_TYPE (DR_REF (dr));
1985 vectype = get_vectype_for_scalar_type (scalar_type);
1987 else
1989 gcc_assert (is_a <loop_vec_info> (vinfo));
1990 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1993 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1995 else
1997 gcc_assert (is_a <loop_vec_info> (vinfo));
1998 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1999 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2002 if (!vectype)
2004 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2007 "Build SLP failed: unsupported data-type ");
2008 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2009 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2012 return false;
2014 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2016 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2017 scalar_stmts.create (group_size);
2018 next = stmt;
2019 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2021 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2022 while (next)
2024 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2025 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2026 scalar_stmts.safe_push (
2027 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2028 else
2029 scalar_stmts.safe_push (next);
2030 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2032 /* Mark the first element of the reduction chain as reduction to properly
2033 transform the node. In the reduction analysis phase only the last
2034 element of the chain is marked as reduction. */
2035 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2036 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2038 else
2040 /* Collect reduction statements. */
2041 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2042 for (i = 0; reductions.iterate (i, &next); i++)
2043 scalar_stmts.safe_push (next);
2046 loads.create (group_size);
2048 /* Build the tree for the SLP instance. */
2049 bool *matches = XALLOCAVEC (bool, group_size);
2050 unsigned npermutes = 0;
2051 bst_fail = new scalar_stmts_set_t ();
2052 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2053 &max_nunits, &loads, matches, &npermutes,
2054 NULL, max_tree_size);
2055 delete bst_fail;
2056 if (node != NULL)
2058 /* Calculate the unrolling factor based on the smallest type. */
2059 unrolling_factor
2060 = least_common_multiple (max_nunits, group_size) / group_size;
2062 if (unrolling_factor != 1
2063 && is_a <bb_vec_info> (vinfo))
2066 if (max_nunits > group_size)
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "Build SLP failed: store group "
2070 "size not a multiple of the vector size "
2071 "in basic block SLP\n");
2072 vect_free_slp_tree (node);
2073 loads.release ();
2074 return false;
2076 /* Fatal mismatch. */
2077 matches[group_size/max_nunits * max_nunits] = false;
2078 vect_free_slp_tree (node);
2079 loads.release ();
2081 else
2083 /* Create a new SLP instance. */
2084 new_instance = XNEW (struct _slp_instance);
2085 SLP_INSTANCE_TREE (new_instance) = node;
2086 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2087 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2088 SLP_INSTANCE_LOADS (new_instance) = loads;
2090 /* Compute the load permutation. */
2091 slp_tree load_node;
2092 bool loads_permuted = false;
2093 FOR_EACH_VEC_ELT (loads, i, load_node)
2095 vec<unsigned> load_permutation;
2096 int j;
2097 gimple *load, *first_stmt;
2098 bool this_load_permuted = false;
2099 load_permutation.create (group_size);
2100 first_stmt = GROUP_FIRST_ELEMENT
2101 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2102 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2104 int load_place = vect_get_place_in_interleaving_chain
2105 (load, first_stmt);
2106 gcc_assert (load_place != -1);
2107 if (load_place != j)
2108 this_load_permuted = true;
2109 load_permutation.safe_push (load_place);
2111 if (!this_load_permuted
2112 /* The load requires permutation when unrolling exposes
2113 a gap either because the group is larger than the SLP
2114 group-size or because there is a gap between the groups. */
2115 && (unrolling_factor == 1
2116 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2117 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2119 load_permutation.release ();
2120 continue;
2122 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2123 loads_permuted = true;
2126 if (loads_permuted)
2128 if (!vect_supported_load_permutation_p (new_instance))
2130 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "Build SLP failed: unsupported load "
2134 "permutation ");
2135 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2136 TDF_SLIM, stmt, 0);
2138 vect_free_slp_instance (new_instance);
2139 return false;
2143 /* If the loads and stores can be handled with load/store-lan
2144 instructions do not generate this SLP instance. */
2145 if (is_a <loop_vec_info> (vinfo)
2146 && loads_permuted
2147 && dr && vect_store_lanes_supported (vectype, group_size))
2149 slp_tree load_node;
2150 FOR_EACH_VEC_ELT (loads, i, load_node)
2152 gimple *first_stmt = GROUP_FIRST_ELEMENT
2153 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2154 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2155 /* Use SLP for strided accesses (or if we
2156 can't load-lanes). */
2157 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2158 || ! vect_load_lanes_supported
2159 (STMT_VINFO_VECTYPE (stmt_vinfo),
2160 GROUP_SIZE (stmt_vinfo)))
2161 break;
2163 if (i == loads.length ())
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2167 "Built SLP cancelled: can use "
2168 "load/store-lanes\n");
2169 vect_free_slp_instance (new_instance);
2170 return false;
2174 vinfo->slp_instances.safe_push (new_instance);
2176 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_NOTE, vect_location,
2179 "Final SLP tree for instance:\n");
2180 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2183 return true;
2186 else
2188 /* Failed to SLP. */
2189 /* Free the allocated memory. */
2190 scalar_stmts.release ();
2191 loads.release ();
2194 /* For basic block SLP, try to break the group up into multiples of the
2195 vector size. */
2196 if (is_a <bb_vec_info> (vinfo)
2197 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2198 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2200 /* We consider breaking the group only on VF boundaries from the existing
2201 start. */
2202 for (i = 0; i < group_size; i++)
2203 if (!matches[i]) break;
2205 if (i >= nunits && i < group_size)
2207 /* Split into two groups at the first vector boundary before i. */
2208 gcc_assert ((nunits & (nunits - 1)) == 0);
2209 unsigned group1_size = i & ~(nunits - 1);
2211 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2212 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2213 /* If the first non-match was in the middle of a vector,
2214 skip the rest of that vector. */
2215 if (group1_size < i)
2217 i = group1_size + nunits;
2218 if (i < group_size)
2219 rest = vect_split_slp_store_group (rest, nunits);
2221 if (i < group_size)
2222 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2223 return res;
2225 /* Even though the first vector did not all match, we might be able to SLP
2226 (some) of the remainder. FORNOW ignore this possibility. */
2229 return false;
2233 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2234 trees of packed scalar stmts if SLP is possible. */
2236 bool
2237 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2239 unsigned int i;
2240 gimple *first_element;
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2245 /* Find SLP sequences starting from groups of grouped stores. */
2246 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2247 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2249 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2251 if (loop_vinfo->reduction_chains.length () > 0)
2253 /* Find SLP sequences starting from reduction chains. */
2254 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2255 if (! vect_analyze_slp_instance (vinfo, first_element,
2256 max_tree_size))
2258 /* Dissolve reduction chain group. */
2259 gimple *next, *stmt = first_element;
2260 while (stmt)
2262 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2263 next = GROUP_NEXT_ELEMENT (vinfo);
2264 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2265 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2266 stmt = next;
2268 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2269 = vect_internal_def;
2273 /* Find SLP sequences starting from groups of reductions. */
2274 if (loop_vinfo->reductions.length () > 1)
2275 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2276 max_tree_size);
2279 return true;
2283 /* For each possible SLP instance decide whether to SLP it and calculate overall
2284 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2285 least one instance. */
2287 bool
2288 vect_make_slp_decision (loop_vec_info loop_vinfo)
2290 unsigned int i, unrolling_factor = 1;
2291 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2292 slp_instance instance;
2293 int decided_to_slp = 0;
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2297 "\n");
2299 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2301 /* FORNOW: SLP if you can. */
2302 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2303 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2305 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2306 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2307 loop-based vectorization. Such stmts will be marked as HYBRID. */
2308 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2309 decided_to_slp++;
2312 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2314 if (decided_to_slp && dump_enabled_p ())
2315 dump_printf_loc (MSG_NOTE, vect_location,
2316 "Decided to SLP %d instances. Unrolling factor %d\n",
2317 decided_to_slp, unrolling_factor);
2319 return (decided_to_slp > 0);
2323 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2324 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2326 static void
2327 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2329 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2330 imm_use_iterator imm_iter;
2331 gimple *use_stmt;
2332 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2333 slp_tree child;
2334 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2335 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2336 int j;
2338 /* Propagate hybrid down the SLP tree. */
2339 if (stype == hybrid)
2341 else if (HYBRID_SLP_STMT (stmt_vinfo))
2342 stype = hybrid;
2343 else
2345 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2346 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2347 /* If we get a pattern stmt here we have to use the LHS of the
2348 original stmt for immediate uses. */
2349 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2350 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2351 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2352 tree def;
2353 if (gimple_code (stmt) == GIMPLE_PHI)
2354 def = gimple_phi_result (stmt);
2355 else
2356 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2357 if (def)
2358 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2360 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2361 continue;
2362 use_vinfo = vinfo_for_stmt (use_stmt);
2363 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2364 && STMT_VINFO_RELATED_STMT (use_vinfo))
2365 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2366 if (!STMT_SLP_TYPE (use_vinfo)
2367 && (STMT_VINFO_RELEVANT (use_vinfo)
2368 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2369 && !(gimple_code (use_stmt) == GIMPLE_PHI
2370 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2372 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2375 "def in non-SLP stmt: ");
2376 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2378 stype = hybrid;
2383 if (stype == hybrid
2384 && !HYBRID_SLP_STMT (stmt_vinfo))
2386 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2389 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2391 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2394 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2395 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2396 vect_detect_hybrid_slp_stmts (child, i, stype);
2399 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2401 static tree
2402 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2404 walk_stmt_info *wi = (walk_stmt_info *)data;
2405 struct loop *loopp = (struct loop *)wi->info;
2407 if (wi->is_lhs)
2408 return NULL_TREE;
2410 if (TREE_CODE (*tp) == SSA_NAME
2411 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2413 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2414 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2415 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2417 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2420 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2422 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2426 return NULL_TREE;
2429 static tree
2430 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2431 walk_stmt_info *)
2433 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2434 /* If the stmt is in a SLP instance then this isn't a reason
2435 to mark use definitions in other SLP instances as hybrid. */
2436 if (! STMT_SLP_TYPE (use_vinfo)
2437 && (STMT_VINFO_RELEVANT (use_vinfo)
2438 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2439 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2440 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2442 else
2443 *handled = true;
2444 return NULL_TREE;
2447 /* Find stmts that must be both vectorized and SLPed. */
2449 void
2450 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2452 unsigned int i;
2453 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2454 slp_instance instance;
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2458 "\n");
2460 /* First walk all pattern stmt in the loop and mark defs of uses as
2461 hybrid because immediate uses in them are not recorded. */
2462 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2464 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2465 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2466 gsi_next (&gsi))
2468 gimple *stmt = gsi_stmt (gsi);
2469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2470 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2472 walk_stmt_info wi;
2473 memset (&wi, 0, sizeof (wi));
2474 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2475 gimple_stmt_iterator gsi2
2476 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2477 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2478 vect_detect_hybrid_slp_1, &wi);
2479 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2480 vect_detect_hybrid_slp_2,
2481 vect_detect_hybrid_slp_1, &wi);
2486 /* Then walk the SLP instance trees marking stmts with uses in
2487 non-SLP stmts as hybrid, also propagating hybrid down the
2488 SLP tree, collecting the above info on-the-fly. */
2489 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2491 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2492 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2493 i, pure_slp);
2498 /* Initialize a bb_vec_info struct for the statements between
2499 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2501 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2502 gimple_stmt_iterator region_end_in)
2503 : vec_info (vec_info::bb, init_cost (NULL)),
2504 bb (gsi_bb (region_begin_in)),
2505 region_begin (region_begin_in),
2506 region_end (region_end_in)
2508 gimple_stmt_iterator gsi;
2510 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2511 gsi_next (&gsi))
2513 gimple *stmt = gsi_stmt (gsi);
2514 gimple_set_uid (stmt, 0);
2515 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2518 bb->aux = this;
2522 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2523 stmts in the basic block. */
2525 _bb_vec_info::~_bb_vec_info ()
2527 for (gimple_stmt_iterator si = region_begin;
2528 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2530 gimple *stmt = gsi_stmt (si);
2531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2533 if (stmt_info)
2534 /* Free stmt_vec_info. */
2535 free_stmt_vec_info (stmt);
2537 /* Reset region marker. */
2538 gimple_set_uid (stmt, -1);
2541 bb->aux = NULL;
2545 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2546 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2548 Return true if the operations are supported. */
2550 static bool
2551 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2552 slp_instance node_instance)
2554 bool dummy;
2555 int i, j;
2556 gimple *stmt;
2557 slp_tree child;
2559 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2560 return true;
2562 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2563 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2564 return false;
2566 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2568 gcc_assert (stmt_info);
2569 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2571 /* For BB vectorization vector types are assigned here.
2572 Memory accesses already got their vector type assigned
2573 in vect_analyze_data_refs. */
2574 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2575 if (bb_vinfo
2576 && ! STMT_VINFO_DATA_REF (stmt_info))
2578 gcc_assert (PURE_SLP_STMT (stmt_info));
2580 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2581 if (dump_enabled_p ())
2583 dump_printf_loc (MSG_NOTE, vect_location,
2584 "get vectype for scalar type: ");
2585 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2586 dump_printf (MSG_NOTE, "\n");
2589 tree vectype = get_vectype_for_scalar_type (scalar_type);
2590 if (!vectype)
2592 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 "not SLPed: unsupported data-type ");
2596 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2597 scalar_type);
2598 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2600 return false;
2603 if (dump_enabled_p ())
2605 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2606 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2607 dump_printf (MSG_NOTE, "\n");
2610 gimple *sstmt;
2611 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2612 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2615 /* Calculate the number of vector statements to be created for the
2616 scalar stmts in this node. For SLP reductions it is equal to the
2617 number of vector statements in the children (which has already been
2618 calculated by the recursive call). Otherwise it is the number of
2619 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2620 VF divided by the number of elements in a vector. */
2621 if (GROUP_FIRST_ELEMENT (stmt_info)
2622 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2623 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2624 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2625 else
2627 int vf;
2628 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2629 vf = loop_vinfo->vectorization_factor;
2630 else
2631 vf = 1;
2632 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2633 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2634 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2635 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype);
2638 /* Push SLP node def-type to stmt operands. */
2639 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2640 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2641 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2642 = SLP_TREE_DEF_TYPE (child);
2643 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2644 /* Restore def-types. */
2645 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2646 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2647 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2648 = vect_internal_def;
2649 if (! res)
2650 return false;
2652 return true;
2656 /* Analyze statements in SLP instances of VINFO. Return true if the
2657 operations are supported. */
2659 bool
2660 vect_slp_analyze_operations (vec_info *vinfo)
2662 slp_instance instance;
2663 int i;
2665 if (dump_enabled_p ())
2666 dump_printf_loc (MSG_NOTE, vect_location,
2667 "=== vect_slp_analyze_operations ===\n");
2669 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2671 if (!vect_slp_analyze_node_operations (vinfo,
2672 SLP_INSTANCE_TREE (instance),
2673 instance))
2675 dump_printf_loc (MSG_NOTE, vect_location,
2676 "removing SLP instance operations starting from: ");
2677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2678 SLP_TREE_SCALAR_STMTS
2679 (SLP_INSTANCE_TREE (instance))[0], 0);
2680 vect_free_slp_instance (instance);
2681 vinfo->slp_instances.ordered_remove (i);
2683 else
2685 /* Compute the costs of the SLP instance. */
2686 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2687 i++;
2691 return !vinfo->slp_instances.is_empty ();
2695 /* Compute the scalar cost of the SLP node NODE and its children
2696 and return it. Do not account defs that are marked in LIFE and
2697 update LIFE according to uses of NODE. */
2699 static unsigned
2700 vect_bb_slp_scalar_cost (basic_block bb,
2701 slp_tree node, vec<bool, va_heap> *life)
2703 unsigned scalar_cost = 0;
2704 unsigned i;
2705 gimple *stmt;
2706 slp_tree child;
2708 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2710 unsigned stmt_cost;
2711 ssa_op_iter op_iter;
2712 def_operand_p def_p;
2713 stmt_vec_info stmt_info;
2715 if ((*life)[i])
2716 continue;
2718 /* If there is a non-vectorized use of the defs then the scalar
2719 stmt is kept live in which case we do not account it or any
2720 required defs in the SLP children in the scalar cost. This
2721 way we make the vectorization more costly when compared to
2722 the scalar cost. */
2723 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2725 imm_use_iterator use_iter;
2726 gimple *use_stmt;
2727 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2728 if (!is_gimple_debug (use_stmt)
2729 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2730 use_stmt)
2731 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2733 (*life)[i] = true;
2734 BREAK_FROM_IMM_USE_STMT (use_iter);
2737 if ((*life)[i])
2738 continue;
2740 /* Count scalar stmts only once. */
2741 if (gimple_visited_p (stmt))
2742 continue;
2743 gimple_set_visited (stmt, true);
2745 stmt_info = vinfo_for_stmt (stmt);
2746 if (STMT_VINFO_DATA_REF (stmt_info))
2748 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2749 stmt_cost = vect_get_stmt_cost (scalar_load);
2750 else
2751 stmt_cost = vect_get_stmt_cost (scalar_store);
2753 else
2754 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2756 scalar_cost += stmt_cost;
2759 auto_vec<bool, 20> subtree_life;
2760 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2762 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2764 /* Do not directly pass LIFE to the recursive call, copy it to
2765 confine changes in the callee to the current child/subtree. */
2766 subtree_life.safe_splice (*life);
2767 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2768 subtree_life.truncate (0);
2772 return scalar_cost;
2775 /* Check if vectorization of the basic block is profitable. */
2777 static bool
2778 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2780 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2781 slp_instance instance;
2782 int i;
2783 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2784 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2786 /* Calculate scalar cost. */
2787 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2789 auto_vec<bool, 20> life;
2790 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2791 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2792 SLP_INSTANCE_TREE (instance),
2793 &life);
2796 /* Unset visited flag. */
2797 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2798 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2799 gimple_set_visited (gsi_stmt (gsi), false);
2801 /* Complete the target-specific cost calculation. */
2802 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2803 &vec_inside_cost, &vec_epilogue_cost);
2805 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2807 if (dump_enabled_p ())
2809 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2810 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2811 vec_inside_cost);
2812 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2813 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2814 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2817 /* Vectorization is profitable if its cost is more than the cost of scalar
2818 version. Note that we err on the vector side for equal cost because
2819 the cost estimate is otherwise quite pessimistic (constant uses are
2820 free on the scalar side but cost a load on the vector side for
2821 example). */
2822 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2823 return false;
2825 return true;
2828 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2829 if so and sets fatal to true if failure is independent of
2830 current_vector_size. */
2832 static bb_vec_info
2833 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2834 gimple_stmt_iterator region_end,
2835 vec<data_reference_p> datarefs, int n_stmts,
2836 bool &fatal)
2838 bb_vec_info bb_vinfo;
2839 slp_instance instance;
2840 int i;
2841 int min_vf = 2;
2843 /* The first group of checks is independent of the vector size. */
2844 fatal = true;
2846 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2848 if (dump_enabled_p ())
2849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2850 "not vectorized: too many instructions in "
2851 "basic block.\n");
2852 free_data_refs (datarefs);
2853 return NULL;
2856 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2857 if (!bb_vinfo)
2858 return NULL;
2860 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2862 /* Analyze the data references. */
2864 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2868 "not vectorized: unhandled data-ref in basic "
2869 "block.\n");
2871 delete bb_vinfo;
2872 return NULL;
2875 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2877 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2879 "not vectorized: not enough data-refs in "
2880 "basic block.\n");
2882 delete bb_vinfo;
2883 return NULL;
2886 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2890 "not vectorized: unhandled data access in "
2891 "basic block.\n");
2893 delete bb_vinfo;
2894 return NULL;
2897 /* If there are no grouped stores in the region there is no need
2898 to continue with pattern recog as vect_analyze_slp will fail
2899 anyway. */
2900 if (bb_vinfo->grouped_stores.is_empty ())
2902 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2904 "not vectorized: no grouped stores in "
2905 "basic block.\n");
2907 delete bb_vinfo;
2908 return NULL;
2911 /* While the rest of the analysis below depends on it in some way. */
2912 fatal = false;
2914 vect_pattern_recog (bb_vinfo);
2916 /* Check the SLP opportunities in the basic block, analyze and build SLP
2917 trees. */
2918 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2920 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2923 "Failed to SLP the basic block.\n");
2924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2925 "not vectorized: failed to find SLP opportunities "
2926 "in basic block.\n");
2929 delete bb_vinfo;
2930 return NULL;
2933 vect_record_base_alignments (bb_vinfo);
2935 /* Analyze and verify the alignment of data references and the
2936 dependence in the SLP instances. */
2937 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2939 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2940 || ! vect_slp_analyze_instance_dependence (instance))
2942 dump_printf_loc (MSG_NOTE, vect_location,
2943 "removing SLP instance operations starting from: ");
2944 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2945 SLP_TREE_SCALAR_STMTS
2946 (SLP_INSTANCE_TREE (instance))[0], 0);
2947 vect_free_slp_instance (instance);
2948 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2949 continue;
2952 /* Mark all the statements that we want to vectorize as pure SLP and
2953 relevant. */
2954 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2955 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2957 i++;
2959 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2961 delete bb_vinfo;
2962 return NULL;
2965 if (!vect_slp_analyze_operations (bb_vinfo))
2967 if (dump_enabled_p ())
2968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2969 "not vectorized: bad operation in basic block.\n");
2971 delete bb_vinfo;
2972 return NULL;
2975 /* Cost model: check if the vectorization is worthwhile. */
2976 if (!unlimited_cost_model (NULL)
2977 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2979 if (dump_enabled_p ())
2980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2981 "not vectorized: vectorization is not "
2982 "profitable.\n");
2984 delete bb_vinfo;
2985 return NULL;
2988 if (dump_enabled_p ())
2989 dump_printf_loc (MSG_NOTE, vect_location,
2990 "Basic block will be vectorized using SLP\n");
2992 return bb_vinfo;
2996 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2997 true if anything in the basic-block was vectorized. */
2999 bool
3000 vect_slp_bb (basic_block bb)
3002 bb_vec_info bb_vinfo;
3003 gimple_stmt_iterator gsi;
3004 unsigned int vector_sizes;
3005 bool any_vectorized = false;
3007 if (dump_enabled_p ())
3008 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3010 /* Autodetect first vector size we try. */
3011 current_vector_size = 0;
3012 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3014 gsi = gsi_start_bb (bb);
3016 while (1)
3018 if (gsi_end_p (gsi))
3019 break;
3021 gimple_stmt_iterator region_begin = gsi;
3022 vec<data_reference_p> datarefs = vNULL;
3023 int insns = 0;
3025 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3027 gimple *stmt = gsi_stmt (gsi);
3028 if (is_gimple_debug (stmt))
3029 continue;
3030 insns++;
3032 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3033 vect_location = gimple_location (stmt);
3035 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3036 break;
3039 /* Skip leading unhandled stmts. */
3040 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3042 gsi_next (&gsi);
3043 continue;
3046 gimple_stmt_iterator region_end = gsi;
3048 bool vectorized = false;
3049 bool fatal = false;
3050 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3051 datarefs, insns, fatal);
3052 if (bb_vinfo
3053 && dbg_cnt (vect_slp))
3055 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3058 vect_schedule_slp (bb_vinfo);
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE, vect_location,
3062 "basic block part vectorized\n");
3064 vectorized = true;
3066 delete bb_vinfo;
3068 any_vectorized |= vectorized;
3070 vector_sizes &= ~current_vector_size;
3071 if (vectorized
3072 || vector_sizes == 0
3073 || current_vector_size == 0
3074 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3075 vector sizes will fail do not bother iterating. */
3076 || fatal)
3078 if (gsi_end_p (region_end))
3079 break;
3081 /* Skip the unhandled stmt. */
3082 gsi_next (&gsi);
3084 /* And reset vector sizes. */
3085 current_vector_size = 0;
3086 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3088 else
3090 /* Try the next biggest vector size. */
3091 current_vector_size = 1 << floor_log2 (vector_sizes);
3092 if (dump_enabled_p ())
3093 dump_printf_loc (MSG_NOTE, vect_location,
3094 "***** Re-trying analysis with "
3095 "vector size %d\n", current_vector_size);
3097 /* Start over. */
3098 gsi = region_begin;
3102 return any_vectorized;
3106 /* Return 1 if vector type of boolean constant which is OPNUM
3107 operand in statement STMT is a boolean vector. */
3109 static bool
3110 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3112 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3113 enum tree_code code = gimple_expr_code (stmt);
3114 tree op, vectype;
3115 gimple *def_stmt;
3116 enum vect_def_type dt;
3118 /* For comparison and COND_EXPR type is chosen depending
3119 on the other comparison operand. */
3120 if (TREE_CODE_CLASS (code) == tcc_comparison)
3122 if (opnum)
3123 op = gimple_assign_rhs1 (stmt);
3124 else
3125 op = gimple_assign_rhs2 (stmt);
3127 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3128 &dt, &vectype))
3129 gcc_unreachable ();
3131 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3134 if (code == COND_EXPR)
3136 tree cond = gimple_assign_rhs1 (stmt);
3138 if (TREE_CODE (cond) == SSA_NAME)
3139 op = cond;
3140 else if (opnum)
3141 op = TREE_OPERAND (cond, 1);
3142 else
3143 op = TREE_OPERAND (cond, 0);
3145 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3146 &dt, &vectype))
3147 gcc_unreachable ();
3149 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3152 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3156 /* For constant and loop invariant defs of SLP_NODE this function returns
3157 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3158 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3159 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3160 REDUC_INDEX is the index of the reduction operand in the statements, unless
3161 it is -1. */
3163 static void
3164 vect_get_constant_vectors (tree op, slp_tree slp_node,
3165 vec<tree> *vec_oprnds,
3166 unsigned int op_num, unsigned int number_of_vectors)
3168 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3169 gimple *stmt = stmts[0];
3170 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3171 unsigned nunits;
3172 tree vec_cst;
3173 unsigned j, number_of_places_left_in_vector;
3174 tree vector_type;
3175 tree vop;
3176 int group_size = stmts.length ();
3177 unsigned int vec_num, i;
3178 unsigned number_of_copies = 1;
3179 vec<tree> voprnds;
3180 voprnds.create (number_of_vectors);
3181 bool constant_p, is_store;
3182 tree neutral_op = NULL;
3183 enum tree_code code = gimple_expr_code (stmt);
3184 gimple_seq ctor_seq = NULL;
3186 /* Check if vector type is a boolean vector. */
3187 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3188 && vect_mask_constant_operand_p (stmt, op_num))
3189 vector_type
3190 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3191 else
3192 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3193 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3195 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3197 is_store = true;
3198 op = gimple_assign_rhs1 (stmt);
3200 else
3201 is_store = false;
3203 gcc_assert (op);
3205 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3206 created vectors. It is greater than 1 if unrolling is performed.
3208 For example, we have two scalar operands, s1 and s2 (e.g., group of
3209 strided accesses of size two), while NUNITS is four (i.e., four scalars
3210 of this type can be packed in a vector). The output vector will contain
3211 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3212 will be 2).
3214 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3215 containing the operands.
3217 For example, NUNITS is four as before, and the group size is 8
3218 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3219 {s5, s6, s7, s8}. */
3221 number_of_copies = nunits * number_of_vectors / group_size;
3223 number_of_places_left_in_vector = nunits;
3224 constant_p = true;
3225 auto_vec<tree, 32> elts (nunits);
3226 elts.quick_grow (nunits);
3227 bool place_after_defs = false;
3228 for (j = 0; j < number_of_copies; j++)
3230 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3232 if (is_store)
3233 op = gimple_assign_rhs1 (stmt);
3234 else
3236 switch (code)
3238 case COND_EXPR:
3240 tree cond = gimple_assign_rhs1 (stmt);
3241 if (TREE_CODE (cond) == SSA_NAME)
3242 op = gimple_op (stmt, op_num + 1);
3243 else if (op_num == 0 || op_num == 1)
3244 op = TREE_OPERAND (cond, op_num);
3245 else
3247 if (op_num == 2)
3248 op = gimple_assign_rhs2 (stmt);
3249 else
3250 op = gimple_assign_rhs3 (stmt);
3253 break;
3255 case CALL_EXPR:
3256 op = gimple_call_arg (stmt, op_num);
3257 break;
3259 case LSHIFT_EXPR:
3260 case RSHIFT_EXPR:
3261 case LROTATE_EXPR:
3262 case RROTATE_EXPR:
3263 op = gimple_op (stmt, op_num + 1);
3264 /* Unlike the other binary operators, shifts/rotates have
3265 the shift count being int, instead of the same type as
3266 the lhs, so make sure the scalar is the right type if
3267 we are dealing with vectors of
3268 long long/long/short/char. */
3269 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3270 op = fold_convert (TREE_TYPE (vector_type), op);
3271 break;
3273 default:
3274 op = gimple_op (stmt, op_num + 1);
3275 break;
3279 /* Create 'vect_ = {op0,op1,...,opn}'. */
3280 number_of_places_left_in_vector--;
3281 tree orig_op = op;
3282 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3284 if (CONSTANT_CLASS_P (op))
3286 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3288 /* Can't use VIEW_CONVERT_EXPR for booleans because
3289 of possibly different sizes of scalar value and
3290 vector element. */
3291 if (integer_zerop (op))
3292 op = build_int_cst (TREE_TYPE (vector_type), 0);
3293 else if (integer_onep (op))
3294 op = build_all_ones_cst (TREE_TYPE (vector_type));
3295 else
3296 gcc_unreachable ();
3298 else
3299 op = fold_unary (VIEW_CONVERT_EXPR,
3300 TREE_TYPE (vector_type), op);
3301 gcc_assert (op && CONSTANT_CLASS_P (op));
3303 else
3305 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3306 gimple *init_stmt;
3307 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3309 tree true_val
3310 = build_all_ones_cst (TREE_TYPE (vector_type));
3311 tree false_val
3312 = build_zero_cst (TREE_TYPE (vector_type));
3313 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3314 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3315 op, true_val,
3316 false_val);
3318 else
3320 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3321 op);
3322 init_stmt
3323 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3324 op);
3326 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3327 op = new_temp;
3330 elts[number_of_places_left_in_vector] = op;
3331 if (!CONSTANT_CLASS_P (op))
3332 constant_p = false;
3333 if (TREE_CODE (orig_op) == SSA_NAME
3334 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3335 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3336 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3337 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3338 place_after_defs = true;
3340 if (number_of_places_left_in_vector == 0)
3342 if (constant_p)
3343 vec_cst = build_vector (vector_type, elts);
3344 else
3346 vec<constructor_elt, va_gc> *v;
3347 unsigned k;
3348 vec_alloc (v, nunits);
3349 for (k = 0; k < nunits; ++k)
3350 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3351 vec_cst = build_constructor (vector_type, v);
3353 tree init;
3354 gimple_stmt_iterator gsi;
3355 if (place_after_defs)
3357 gsi = gsi_for_stmt
3358 (vect_find_last_scalar_stmt_in_slp (slp_node));
3359 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3361 else
3362 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3363 if (ctor_seq != NULL)
3365 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3366 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3367 GSI_SAME_STMT);
3368 ctor_seq = NULL;
3370 voprnds.quick_push (init);
3371 place_after_defs = false;
3372 number_of_places_left_in_vector = nunits;
3373 constant_p = true;
3378 /* Since the vectors are created in the reverse order, we should invert
3379 them. */
3380 vec_num = voprnds.length ();
3381 for (j = vec_num; j != 0; j--)
3383 vop = voprnds[j - 1];
3384 vec_oprnds->quick_push (vop);
3387 voprnds.release ();
3389 /* In case that VF is greater than the unrolling factor needed for the SLP
3390 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3391 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3392 to replicate the vectors. */
3393 while (number_of_vectors > vec_oprnds->length ())
3395 tree neutral_vec = NULL;
3397 if (neutral_op)
3399 if (!neutral_vec)
3400 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3402 vec_oprnds->quick_push (neutral_vec);
3404 else
3406 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3407 vec_oprnds->quick_push (vop);
3413 /* Get vectorized definitions from SLP_NODE that contains corresponding
3414 vectorized def-stmts. */
3416 static void
3417 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3419 tree vec_oprnd;
3420 gimple *vec_def_stmt;
3421 unsigned int i;
3423 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3425 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3427 gcc_assert (vec_def_stmt);
3428 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3429 vec_oprnd = gimple_phi_result (vec_def_stmt);
3430 else
3431 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3432 vec_oprnds->quick_push (vec_oprnd);
3437 /* Get vectorized definitions for SLP_NODE.
3438 If the scalar definitions are loop invariants or constants, collect them and
3439 call vect_get_constant_vectors() to create vector stmts.
3440 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3441 must be stored in the corresponding child of SLP_NODE, and we call
3442 vect_get_slp_vect_defs () to retrieve them. */
3444 void
3445 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3446 vec<vec<tree> > *vec_oprnds)
3448 gimple *first_stmt;
3449 int number_of_vects = 0, i;
3450 unsigned int child_index = 0;
3451 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3452 slp_tree child = NULL;
3453 vec<tree> vec_defs;
3454 tree oprnd;
3455 bool vectorized_defs;
3457 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3458 FOR_EACH_VEC_ELT (ops, i, oprnd)
3460 /* For each operand we check if it has vectorized definitions in a child
3461 node or we need to create them (for invariants and constants). We
3462 check if the LHS of the first stmt of the next child matches OPRND.
3463 If it does, we found the correct child. Otherwise, we call
3464 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3465 to check this child node for the next operand. */
3466 vectorized_defs = false;
3467 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3469 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3471 /* We have to check both pattern and original def, if available. */
3472 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3474 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3475 gimple *related
3476 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3477 tree first_def_op;
3479 if (gimple_code (first_def) == GIMPLE_PHI)
3480 first_def_op = gimple_phi_result (first_def);
3481 else
3482 first_def_op = gimple_get_lhs (first_def);
3483 if (operand_equal_p (oprnd, first_def_op, 0)
3484 || (related
3485 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3487 /* The number of vector defs is determined by the number of
3488 vector statements in the node from which we get those
3489 statements. */
3490 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3491 vectorized_defs = true;
3492 child_index++;
3495 else
3496 child_index++;
3499 if (!vectorized_defs)
3501 if (i == 0)
3503 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3504 /* Number of vector stmts was calculated according to LHS in
3505 vect_schedule_slp_instance (), fix it by replacing LHS with
3506 RHS, if necessary. See vect_get_smallest_scalar_type () for
3507 details. */
3508 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3509 &rhs_size_unit);
3510 if (rhs_size_unit != lhs_size_unit)
3512 number_of_vects *= rhs_size_unit;
3513 number_of_vects /= lhs_size_unit;
3518 /* Allocate memory for vectorized defs. */
3519 vec_defs = vNULL;
3520 vec_defs.create (number_of_vects);
3522 /* For reduction defs we call vect_get_constant_vectors (), since we are
3523 looking for initial loop invariant values. */
3524 if (vectorized_defs)
3525 /* The defs are already vectorized. */
3526 vect_get_slp_vect_defs (child, &vec_defs);
3527 else
3528 /* Build vectors from scalar defs. */
3529 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3530 number_of_vects);
3532 vec_oprnds->quick_push (vec_defs);
3536 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3537 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3538 permute statements for the SLP node NODE of the SLP instance
3539 SLP_NODE_INSTANCE. */
3541 bool
3542 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3543 gimple_stmt_iterator *gsi, int vf,
3544 slp_instance slp_node_instance, bool analyze_only,
3545 unsigned *n_perms)
3547 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3548 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3549 tree mask_element_type = NULL_TREE, mask_type;
3550 int nunits, vec_index = 0;
3551 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3552 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3553 int mask_element;
3554 machine_mode mode;
3556 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3557 return false;
3559 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3561 mode = TYPE_MODE (vectype);
3563 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3564 same size as the vector element being permuted. */
3565 mask_element_type = lang_hooks.types.type_for_mode
3566 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3567 mask_type = get_vectype_for_scalar_type (mask_element_type);
3568 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3569 auto_vec_perm_indices mask (nunits);
3570 mask.quick_grow (nunits);
3572 /* Initialize the vect stmts of NODE to properly insert the generated
3573 stmts later. */
3574 if (! analyze_only)
3575 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3576 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3577 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3579 /* Generate permutation masks for every NODE. Number of masks for each NODE
3580 is equal to GROUP_SIZE.
3581 E.g., we have a group of three nodes with three loads from the same
3582 location in each node, and the vector size is 4. I.e., we have a
3583 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3584 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3585 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3588 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3589 The last mask is illegal since we assume two operands for permute
3590 operation, and the mask element values can't be outside that range.
3591 Hence, the last mask must be converted into {2,5,5,5}.
3592 For the first two permutations we need the first and the second input
3593 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3594 we need the second and the third vectors: {b1,c1,a2,b2} and
3595 {c2,a3,b3,c3}. */
3597 int vect_stmts_counter = 0;
3598 int index = 0;
3599 int first_vec_index = -1;
3600 int second_vec_index = -1;
3601 bool noop_p = true;
3602 *n_perms = 0;
3604 for (int j = 0; j < vf; j++)
3606 for (int k = 0; k < group_size; k++)
3608 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3609 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3610 vec_index = i / nunits;
3611 mask_element = i % nunits;
3612 if (vec_index == first_vec_index
3613 || first_vec_index == -1)
3615 first_vec_index = vec_index;
3617 else if (vec_index == second_vec_index
3618 || second_vec_index == -1)
3620 second_vec_index = vec_index;
3621 mask_element += nunits;
3623 else
3625 if (dump_enabled_p ())
3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628 "permutation requires at "
3629 "least three vectors ");
3630 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3631 stmt, 0);
3633 gcc_assert (analyze_only);
3634 return false;
3637 gcc_assert (mask_element >= 0
3638 && mask_element < 2 * nunits);
3639 if (mask_element != index)
3640 noop_p = false;
3641 mask[index++] = mask_element;
3643 if (index == nunits)
3645 if (! noop_p
3646 && ! can_vec_perm_p (mode, false, &mask))
3648 if (dump_enabled_p ())
3650 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3651 vect_location,
3652 "unsupported vect permute { ");
3653 for (i = 0; i < nunits; ++i)
3654 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3655 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3657 gcc_assert (analyze_only);
3658 return false;
3661 if (! noop_p)
3662 ++*n_perms;
3664 if (!analyze_only)
3666 tree mask_vec = NULL_TREE;
3668 if (! noop_p)
3670 auto_vec<tree, 32> mask_elts (nunits);
3671 for (int l = 0; l < nunits; ++l)
3672 mask_elts.quick_push (build_int_cst (mask_element_type,
3673 mask[l]));
3674 mask_vec = build_vector (mask_type, mask_elts);
3677 if (second_vec_index == -1)
3678 second_vec_index = first_vec_index;
3680 /* Generate the permute statement if necessary. */
3681 tree first_vec = dr_chain[first_vec_index];
3682 tree second_vec = dr_chain[second_vec_index];
3683 gimple *perm_stmt;
3684 if (! noop_p)
3686 tree perm_dest
3687 = vect_create_destination_var (gimple_assign_lhs (stmt),
3688 vectype);
3689 perm_dest = make_ssa_name (perm_dest);
3690 perm_stmt = gimple_build_assign (perm_dest,
3691 VEC_PERM_EXPR,
3692 first_vec, second_vec,
3693 mask_vec);
3694 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3696 else
3697 /* If mask was NULL_TREE generate the requested
3698 identity transform. */
3699 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3701 /* Store the vector statement in NODE. */
3702 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3705 index = 0;
3706 first_vec_index = -1;
3707 second_vec_index = -1;
3708 noop_p = true;
3713 return true;
3716 typedef hash_map <vec <gimple *>, slp_tree,
3717 simple_hashmap_traits <bst_traits, slp_tree> >
3718 scalar_stmts_to_slp_tree_map_t;
3720 /* Vectorize SLP instance tree in postorder. */
3722 static bool
3723 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3724 scalar_stmts_to_slp_tree_map_t *bst_map)
3726 gimple *stmt;
3727 bool grouped_store, is_store;
3728 gimple_stmt_iterator si;
3729 stmt_vec_info stmt_info;
3730 unsigned int group_size;
3731 tree vectype;
3732 int i, j;
3733 slp_tree child;
3735 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3736 return false;
3738 /* See if we have already vectorized the same set of stmts and reuse their
3739 vectorized stmts. */
3740 slp_tree &leader
3741 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3742 if (leader)
3744 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3745 return false;
3748 leader = node;
3749 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3750 vect_schedule_slp_instance (child, instance, bst_map);
3752 /* Push SLP node def-type to stmts. */
3753 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3754 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3755 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3756 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3758 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3759 stmt_info = vinfo_for_stmt (stmt);
3761 /* VECTYPE is the type of the destination. */
3762 vectype = STMT_VINFO_VECTYPE (stmt_info);
3763 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3765 if (!SLP_TREE_VEC_STMTS (node).exists ())
3766 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3768 if (dump_enabled_p ())
3770 dump_printf_loc (MSG_NOTE,vect_location,
3771 "------>vectorizing SLP node starting from: ");
3772 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3775 /* Vectorized stmts go before the last scalar stmt which is where
3776 all uses are ready. */
3777 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3779 /* Mark the first element of the reduction chain as reduction to properly
3780 transform the node. In the analysis phase only the last element of the
3781 chain is marked as reduction. */
3782 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3783 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3785 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3786 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3789 /* Handle two-operation SLP nodes by vectorizing the group with
3790 both operations and then performing a merge. */
3791 if (SLP_TREE_TWO_OPERATORS (node))
3793 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3794 enum tree_code ocode = ERROR_MARK;
3795 gimple *ostmt;
3796 auto_vec_perm_indices mask (group_size);
3797 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3798 if (gimple_assign_rhs_code (ostmt) != code0)
3800 mask.quick_push (1);
3801 ocode = gimple_assign_rhs_code (ostmt);
3803 else
3804 mask.quick_push (0);
3805 if (ocode != ERROR_MARK)
3807 vec<gimple *> v0;
3808 vec<gimple *> v1;
3809 unsigned j;
3810 tree tmask = NULL_TREE;
3811 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3812 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3813 SLP_TREE_VEC_STMTS (node).truncate (0);
3814 gimple_assign_set_rhs_code (stmt, ocode);
3815 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3816 gimple_assign_set_rhs_code (stmt, code0);
3817 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3818 SLP_TREE_VEC_STMTS (node).truncate (0);
3819 tree meltype = build_nonstandard_integer_type
3820 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3821 tree mvectype = get_same_sized_vectype (meltype, vectype);
3822 unsigned k = 0, l;
3823 for (j = 0; j < v0.length (); ++j)
3825 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3826 auto_vec<tree, 32> melts (nunits);
3827 for (l = 0; l < nunits; ++l)
3829 if (k >= group_size)
3830 k = 0;
3831 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3832 melts.quick_push (t);
3834 tmask = build_vector (mvectype, melts);
3836 /* ??? Not all targets support a VEC_PERM_EXPR with a
3837 constant mask that would translate to a vec_merge RTX
3838 (with their vec_perm_const_ok). We can either not
3839 vectorize in that case or let veclower do its job.
3840 Unfortunately that isn't too great and at least for
3841 plus/minus we'd eventually like to match targets
3842 vector addsub instructions. */
3843 gimple *vstmt;
3844 vstmt = gimple_build_assign (make_ssa_name (vectype),
3845 VEC_PERM_EXPR,
3846 gimple_assign_lhs (v0[j]),
3847 gimple_assign_lhs (v1[j]), tmask);
3848 vect_finish_stmt_generation (stmt, vstmt, &si);
3849 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3851 v0.release ();
3852 v1.release ();
3853 return false;
3856 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3858 /* Restore stmt def-types. */
3859 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3860 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3861 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3862 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3864 return is_store;
3867 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3868 For loop vectorization this is done in vectorizable_call, but for SLP
3869 it needs to be deferred until end of vect_schedule_slp, because multiple
3870 SLP instances may refer to the same scalar stmt. */
3872 static void
3873 vect_remove_slp_scalar_calls (slp_tree node)
3875 gimple *stmt, *new_stmt;
3876 gimple_stmt_iterator gsi;
3877 int i;
3878 slp_tree child;
3879 tree lhs;
3880 stmt_vec_info stmt_info;
3882 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3883 return;
3885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3886 vect_remove_slp_scalar_calls (child);
3888 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3890 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3891 continue;
3892 stmt_info = vinfo_for_stmt (stmt);
3893 if (stmt_info == NULL
3894 || is_pattern_stmt_p (stmt_info)
3895 || !PURE_SLP_STMT (stmt_info))
3896 continue;
3897 lhs = gimple_call_lhs (stmt);
3898 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3899 set_vinfo_for_stmt (new_stmt, stmt_info);
3900 set_vinfo_for_stmt (stmt, NULL);
3901 STMT_VINFO_STMT (stmt_info) = new_stmt;
3902 gsi = gsi_for_stmt (stmt);
3903 gsi_replace (&gsi, new_stmt, false);
3904 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3908 /* Generate vector code for all SLP instances in the loop/basic block. */
3910 bool
3911 vect_schedule_slp (vec_info *vinfo)
3913 vec<slp_instance> slp_instances;
3914 slp_instance instance;
3915 unsigned int i;
3916 bool is_store = false;
3918 slp_instances = vinfo->slp_instances;
3919 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3921 /* Schedule the tree of INSTANCE. */
3922 scalar_stmts_to_slp_tree_map_t *bst_map
3923 = new scalar_stmts_to_slp_tree_map_t ();
3924 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3925 instance, bst_map);
3926 delete bst_map;
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_NOTE, vect_location,
3929 "vectorizing stmts using SLP.\n");
3932 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3934 slp_tree root = SLP_INSTANCE_TREE (instance);
3935 gimple *store;
3936 unsigned int j;
3937 gimple_stmt_iterator gsi;
3939 /* Remove scalar call stmts. Do not do this for basic-block
3940 vectorization as not all uses may be vectorized.
3941 ??? Why should this be necessary? DCE should be able to
3942 remove the stmts itself.
3943 ??? For BB vectorization we can as well remove scalar
3944 stmts starting from the SLP tree root if they have no
3945 uses. */
3946 if (is_a <loop_vec_info> (vinfo))
3947 vect_remove_slp_scalar_calls (root);
3949 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3950 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3952 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3953 break;
3955 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3956 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3957 /* Free the attached stmt_vec_info and remove the stmt. */
3958 gsi = gsi_for_stmt (store);
3959 unlink_stmt_vdef (store);
3960 gsi_remove (&gsi, true);
3961 release_defs (store);
3962 free_stmt_vec_info (store);
3966 return is_store;