* tree-loop-distribution.c (struct partition): New field recording
[official-gcc.git] / gcc / tree-vect-slp.c
blob4502146595ddec9b2305b6321bb8fd8ea1c95adb
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
498 static bool
499 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
500 vec<gimple *> stmts, unsigned int group_size,
501 unsigned nops, unsigned int *max_nunits,
502 bool *matches, bool *two_operators)
504 unsigned int i;
505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
509 enum tree_code first_cond_code = ERROR_MARK;
510 tree lhs;
511 bool need_same_oprnds = false;
512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
513 optab optab;
514 int icode;
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
517 HOST_WIDE_INT dummy;
518 gimple *first_load = NULL, *prev_first_load = NULL;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts, i, stmt)
523 swap[i] = 0;
524 matches[i] = false;
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
541 /* Fatal mismatch. */
542 matches[0] = false;
543 return false;
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
556 /* Fatal mismatch. */
557 matches[0] = false;
558 return false;
561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
573 /* Fatal mismatch. */
574 matches[0] = false;
575 return false;
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a <bb_vec_info> (vinfo)
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
597 rhs_code = CALL_EXPR;
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
619 /* Check the operation. */
620 if (i == 0)
622 first_stmt_code = rhs_code;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
630 vec_mode = TYPE_MODE (vectype);
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
636 if (!optab
637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
643 if (!optab)
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
649 matches[0] = false;
650 return false;
652 icode = (int) optab_handler (optab, vec_mode);
653 if (icode == CODE_FOR_nothing)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
660 matches[0] = false;
661 return false;
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
677 else
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
686 || rhs_code != IMAGPART_EXPR)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
695 && (first_stmt_code == ARRAY_REF
696 || first_stmt_code == BIT_FIELD_REF
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
712 /* Mismatch. */
713 continue;
716 if (need_same_oprnds
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
726 /* Mismatch. */
727 continue;
730 if (rhs_code == CALL_EXPR)
732 gimple *first_stmt = stmts[0];
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
746 /* Mismatch. */
747 continue;
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
755 if (REFERENCE_CLASS_P (lhs))
757 /* Store. */
760 else
762 /* Load. */
763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
764 if (prev_first_load)
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load != first_load)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
779 /* Mismatch. */
780 continue;
783 else
784 prev_first_load = first_load;
786 } /* Grouped access. */
787 else
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
801 matches[0] = false;
802 return false;
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
810 && rhs_code != CALL_EXPR)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
824 if (rhs_code == COND_EXPR)
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
840 if (first_cond_code == cond_code)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
853 "Build SLP failed: different"
854 " operation");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 stmt, 0);
858 /* Mismatch. */
859 continue;
864 matches[i] = true;
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
876 unsigned char *sel
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
878 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
880 sel[i] = i;
881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
882 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
884 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
886 for (i = 0; i < group_size; ++i)
887 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
889 matches[i] = false;
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
896 stmts[i], 0);
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "original stmt ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
900 first_stmt, 0);
903 return false;
905 *two_operators = true;
908 return true;
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
916 was found. */
918 static slp_tree
919 vect_build_slp_tree (vec_info *vinfo,
920 vec<gimple *> stmts, unsigned int group_size,
921 unsigned int *max_nunits,
922 vec<slp_tree> *loads,
923 bool *matches, unsigned *npermutes, unsigned *tree_size,
924 unsigned max_tree_size)
926 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
927 gimple *stmt;
928 slp_tree node;
930 matches[0] = false;
932 stmt = stmts[0];
933 if (is_gimple_call (stmt))
934 nops = gimple_call_num_args (stmt);
935 else if (is_gimple_assign (stmt))
937 nops = gimple_num_ops (stmt) - 1;
938 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
939 nops++;
941 else if (gimple_code (stmt) == GIMPLE_PHI)
942 nops = 0;
943 else
944 return NULL;
946 /* If the SLP node is a PHI (induction or reduction), terminate
947 the recursion. */
948 if (gimple_code (stmt) == GIMPLE_PHI)
950 /* Induction from different IVs is not supported. */
951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) == vect_induction_def)
952 FOR_EACH_VEC_ELT (stmts, i, stmt)
953 if (stmt != stmts[0])
954 return NULL;
955 node = vect_create_new_slp_node (stmts);
956 return node;
960 bool two_operators = false;
961 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
962 if (!vect_build_slp_tree_1 (vinfo, swap,
963 stmts, group_size, nops,
964 &this_max_nunits, matches, &two_operators))
965 return NULL;
967 /* If the SLP node is a load, terminate the recursion. */
968 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
969 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
971 *max_nunits = this_max_nunits;
972 node = vect_create_new_slp_node (stmts);
973 loads->safe_push (node);
974 return node;
977 /* Get at the operands, verifying they are compatible. */
978 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
979 slp_oprnd_info oprnd_info;
980 FOR_EACH_VEC_ELT (stmts, i, stmt)
982 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
983 stmt, i, &oprnds_info);
984 if (res != 0)
985 matches[(res == -1) ? 0 : i] = false;
986 if (!matches[0])
987 break;
989 for (i = 0; i < group_size; ++i)
990 if (!matches[i])
992 vect_free_oprnd_info (oprnds_info);
993 return NULL;
996 auto_vec<slp_tree, 4> children;
997 auto_vec<slp_tree> this_loads;
999 stmt = stmts[0];
1001 /* Create SLP_TREE nodes for the definition node/s. */
1002 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1004 slp_tree child;
1005 unsigned old_nloads = this_loads.length ();
1006 unsigned old_tree_size = this_tree_size;
1007 unsigned int j;
1009 if (oprnd_info->first_dt != vect_internal_def
1010 && oprnd_info->first_dt != vect_reduction_def
1011 && oprnd_info->first_dt != vect_induction_def)
1012 continue;
1014 if (++this_tree_size > max_tree_size)
1016 FOR_EACH_VEC_ELT (children, j, child)
1017 vect_free_slp_tree (child);
1018 vect_free_oprnd_info (oprnds_info);
1019 return NULL;
1022 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1023 group_size, &this_max_nunits,
1024 &this_loads, matches, npermutes,
1025 &this_tree_size,
1026 max_tree_size)) != NULL)
1028 /* If we have all children of child built up from scalars then just
1029 throw that away and build it up this node from scalars. */
1030 if (!SLP_TREE_CHILDREN (child).is_empty ()
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1033 scalar version. */
1034 && !is_pattern_stmt_p
1035 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1037 slp_tree grandchild;
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1040 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1041 break;
1042 if (!grandchild)
1044 /* Roll back. */
1045 this_loads.truncate (old_nloads);
1046 this_tree_size = old_tree_size;
1047 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1048 vect_free_slp_tree (grandchild);
1049 SLP_TREE_CHILDREN (child).truncate (0);
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "Building parent vector operands from "
1053 "scalars instead\n");
1054 oprnd_info->def_stmts = vNULL;
1055 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1056 children.safe_push (child);
1057 continue;
1061 oprnd_info->def_stmts = vNULL;
1062 children.safe_push (child);
1063 continue;
1066 /* If the SLP build failed fatally and we analyze a basic-block
1067 simply treat nodes we fail to build as externally defined
1068 (and thus build vectors from the scalar defs).
1069 The cost model will reject outright expensive cases.
1070 ??? This doesn't treat cases where permutation ultimatively
1071 fails (or we don't try permutation below). Ideally we'd
1072 even compute a permutation that will end up with the maximum
1073 SLP tree size... */
1074 if (is_a <bb_vec_info> (vinfo)
1075 && !matches[0]
1076 /* ??? Rejecting patterns this way doesn't work. We'd have to
1077 do extra work to cancel the pattern so the uses see the
1078 scalar version. */
1079 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "Building vector operands from scalars\n");
1083 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1084 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1085 children.safe_push (child);
1086 oprnd_info->def_stmts = vNULL;
1087 continue;
1090 /* If the SLP build for operand zero failed and operand zero
1091 and one can be commutated try that for the scalar stmts
1092 that failed the match. */
1093 if (i == 0
1094 /* A first scalar stmt mismatch signals a fatal mismatch. */
1095 && matches[0]
1096 /* ??? For COND_EXPRs we can swap the comparison operands
1097 as well as the arms under some constraints. */
1098 && nops == 2
1099 && oprnds_info[1]->first_dt == vect_internal_def
1100 && is_gimple_assign (stmt)
1101 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1102 && ! two_operators
1103 /* Do so only if the number of not successful permutes was nor more
1104 than a cut-ff as re-trying the recursive match on
1105 possibly each level of the tree would expose exponential
1106 behavior. */
1107 && *npermutes < 4)
1109 /* Verify if we can safely swap or if we committed to a specific
1110 operand order already. */
1111 for (j = 0; j < group_size; ++j)
1112 if (!matches[j]
1113 && (swap[j] != 0
1114 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1116 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1119 "Build SLP failed: cannot swap operands "
1120 "of shared stmt ");
1121 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1122 stmts[j], 0);
1124 goto fail;
1127 /* Swap mismatched definition stmts. */
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "Re-trying with swapped operands of stmts ");
1130 for (j = 0; j < group_size; ++j)
1131 if (!matches[j])
1133 std::swap (oprnds_info[0]->def_stmts[j],
1134 oprnds_info[1]->def_stmts[j]);
1135 dump_printf (MSG_NOTE, "%d ", j);
1137 dump_printf (MSG_NOTE, "\n");
1138 /* And try again with scratch 'matches' ... */
1139 bool *tem = XALLOCAVEC (bool, group_size);
1140 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1141 group_size, &this_max_nunits,
1142 &this_loads, tem, npermutes,
1143 &this_tree_size,
1144 max_tree_size)) != NULL)
1146 /* ... so if successful we can apply the operand swapping
1147 to the GIMPLE IL. This is necessary because for example
1148 vect_get_slp_defs uses operand indexes and thus expects
1149 canonical operand order. This is also necessary even
1150 if we end up building the operand from scalars as
1151 we'll continue to process swapped operand two. */
1152 for (j = 0; j < group_size; ++j)
1154 gimple *stmt = stmts[j];
1155 gimple_set_plf (stmt, GF_PLF_1, false);
1157 for (j = 0; j < group_size; ++j)
1159 gimple *stmt = stmts[j];
1160 if (!matches[j])
1162 /* Avoid swapping operands twice. */
1163 if (gimple_plf (stmt, GF_PLF_1))
1164 continue;
1165 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1166 gimple_assign_rhs2_ptr (stmt));
1167 gimple_set_plf (stmt, GF_PLF_1, true);
1170 /* Verify we swap all duplicates or none. */
1171 if (flag_checking)
1172 for (j = 0; j < group_size; ++j)
1174 gimple *stmt = stmts[j];
1175 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1178 /* If we have all children of child built up from scalars then
1179 just throw that away and build it up this node from scalars. */
1180 if (!SLP_TREE_CHILDREN (child).is_empty ()
1181 /* ??? Rejecting patterns this way doesn't work. We'd have
1182 to do extra work to cancel the pattern so the uses see the
1183 scalar version. */
1184 && !is_pattern_stmt_p
1185 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1187 unsigned int j;
1188 slp_tree grandchild;
1190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1191 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1192 break;
1193 if (!grandchild)
1195 /* Roll back. */
1196 this_loads.truncate (old_nloads);
1197 this_tree_size = old_tree_size;
1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1199 vect_free_slp_tree (grandchild);
1200 SLP_TREE_CHILDREN (child).truncate (0);
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "Building parent vector operands from "
1204 "scalars instead\n");
1205 oprnd_info->def_stmts = vNULL;
1206 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1207 children.safe_push (child);
1208 continue;
1212 oprnd_info->def_stmts = vNULL;
1213 children.safe_push (child);
1214 continue;
1217 ++*npermutes;
1220 fail:
1221 gcc_assert (child == NULL);
1222 FOR_EACH_VEC_ELT (children, j, child)
1223 vect_free_slp_tree (child);
1224 vect_free_oprnd_info (oprnds_info);
1225 return NULL;
1228 vect_free_oprnd_info (oprnds_info);
1230 if (tree_size)
1231 *tree_size += this_tree_size;
1232 *max_nunits = this_max_nunits;
1233 loads->safe_splice (this_loads);
1235 node = vect_create_new_slp_node (stmts);
1236 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1237 SLP_TREE_CHILDREN (node).splice (children);
1238 return node;
1241 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1243 static void
1244 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1246 int i;
1247 gimple *stmt;
1248 slp_tree child;
1250 dump_printf_loc (dump_kind, loc, "node%s\n",
1251 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1252 ? " (external)" : "");
1253 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1255 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1256 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1258 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1259 vect_print_slp_tree (dump_kind, loc, child);
1263 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1264 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1265 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1266 stmts in NODE are to be marked. */
1268 static void
1269 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1271 int i;
1272 gimple *stmt;
1273 slp_tree child;
1275 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1276 return;
1278 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1279 if (j < 0 || i == j)
1280 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1282 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1283 vect_mark_slp_stmts (child, mark, j);
1287 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1289 static void
1290 vect_mark_slp_stmts_relevant (slp_tree node)
1292 int i;
1293 gimple *stmt;
1294 stmt_vec_info stmt_info;
1295 slp_tree child;
1297 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1298 return;
1300 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1302 stmt_info = vinfo_for_stmt (stmt);
1303 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1304 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1305 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1309 vect_mark_slp_stmts_relevant (child);
1313 /* Rearrange the statements of NODE according to PERMUTATION. */
1315 static void
1316 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1317 vec<unsigned> permutation)
1319 gimple *stmt;
1320 vec<gimple *> tmp_stmts;
1321 unsigned int i;
1322 slp_tree child;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1325 vect_slp_rearrange_stmts (child, group_size, permutation);
1327 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1328 tmp_stmts.create (group_size);
1329 tmp_stmts.quick_grow_cleared (group_size);
1331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1332 tmp_stmts[permutation[i]] = stmt;
1334 SLP_TREE_SCALAR_STMTS (node).release ();
1335 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1339 /* Attempt to reorder stmts in a reduction chain so that we don't
1340 require any load permutation. Return true if that was possible,
1341 otherwise return false. */
1343 static bool
1344 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1346 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1347 unsigned int i, j;
1348 unsigned int lidx;
1349 slp_tree node, load;
1351 /* Compare all the permutation sequences to the first one. We know
1352 that at least one load is permuted. */
1353 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1354 if (!node->load_permutation.exists ())
1355 return false;
1356 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1358 if (!load->load_permutation.exists ())
1359 return false;
1360 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1361 if (lidx != node->load_permutation[j])
1362 return false;
1365 /* Check that the loads in the first sequence are different and there
1366 are no gaps between them. */
1367 auto_sbitmap load_index (group_size);
1368 bitmap_clear (load_index);
1369 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1371 if (lidx >= group_size)
1372 return false;
1373 if (bitmap_bit_p (load_index, lidx))
1374 return false;
1376 bitmap_set_bit (load_index, lidx);
1378 for (i = 0; i < group_size; i++)
1379 if (!bitmap_bit_p (load_index, i))
1380 return false;
1382 /* This permutation is valid for reduction. Since the order of the
1383 statements in the nodes is not important unless they are memory
1384 accesses, we can rearrange the statements in all the nodes
1385 according to the order of the loads. */
1386 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1387 node->load_permutation);
1389 /* We are done, no actual permutations need to be generated. */
1390 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1391 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1393 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1394 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1395 /* But we have to keep those permutations that are required because
1396 of handling of gaps. */
1397 if (unrolling_factor == 1
1398 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1399 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1400 SLP_TREE_LOAD_PERMUTATION (node).release ();
1401 else
1402 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1403 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1406 return true;
1409 /* Check if the required load permutations in the SLP instance
1410 SLP_INSTN are supported. */
1412 static bool
1413 vect_supported_load_permutation_p (slp_instance slp_instn)
1415 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1416 unsigned int i, j, k, next;
1417 slp_tree node;
1418 gimple *stmt, *load, *next_load;
1420 if (dump_enabled_p ())
1422 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1423 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1424 if (node->load_permutation.exists ())
1425 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1426 dump_printf (MSG_NOTE, "%d ", next);
1427 else
1428 for (k = 0; k < group_size; ++k)
1429 dump_printf (MSG_NOTE, "%d ", k);
1430 dump_printf (MSG_NOTE, "\n");
1433 /* In case of reduction every load permutation is allowed, since the order
1434 of the reduction statements is not important (as opposed to the case of
1435 grouped stores). The only condition we need to check is that all the
1436 load nodes are of the same size and have the same permutation (and then
1437 rearrange all the nodes of the SLP instance according to this
1438 permutation). */
1440 /* Check that all the load nodes are of the same size. */
1441 /* ??? Can't we assert this? */
1442 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1443 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1444 return false;
1446 node = SLP_INSTANCE_TREE (slp_instn);
1447 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1449 /* Reduction (there are no data-refs in the root).
1450 In reduction chain the order of the loads is not important. */
1451 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1452 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1453 vect_attempt_slp_rearrange_stmts (slp_instn);
1455 /* In basic block vectorization we allow any subchain of an interleaving
1456 chain.
1457 FORNOW: not supported in loop SLP because of realignment compications. */
1458 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1460 /* Check whether the loads in an instance form a subchain and thus
1461 no permutation is necessary. */
1462 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1464 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1465 continue;
1466 bool subchain_p = true;
1467 next_load = NULL;
1468 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1470 if (j != 0
1471 && (next_load != load
1472 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1474 subchain_p = false;
1475 break;
1477 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1479 if (subchain_p)
1480 SLP_TREE_LOAD_PERMUTATION (node).release ();
1481 else
1483 stmt_vec_info group_info
1484 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1485 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1486 unsigned nunits
1487 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1488 unsigned k, maxk = 0;
1489 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1490 if (k > maxk)
1491 maxk = k;
1492 /* In BB vectorization we may not actually use a loaded vector
1493 accessing elements in excess of GROUP_SIZE. */
1494 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1497 "BB vectorization with gaps at the end of "
1498 "a load is not supported\n");
1499 return false;
1502 /* Verify the permutation can be generated. */
1503 vec<tree> tem;
1504 unsigned n_perms;
1505 if (!vect_transform_slp_perm_load (node, tem, NULL,
1506 1, slp_instn, true, &n_perms))
1508 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1509 vect_location,
1510 "unsupported load permutation\n");
1511 return false;
1515 return true;
1518 /* For loop vectorization verify we can generate the permutation. */
1519 unsigned n_perms;
1520 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1521 if (node->load_permutation.exists ()
1522 && !vect_transform_slp_perm_load
1523 (node, vNULL, NULL,
1524 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1525 &n_perms))
1526 return false;
1528 return true;
1532 /* Find the last store in SLP INSTANCE. */
1534 gimple *
1535 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1537 gimple *last = NULL, *stmt;
1539 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1541 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1542 if (is_pattern_stmt_p (stmt_vinfo))
1543 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1544 else
1545 last = get_later_stmt (stmt, last);
1548 return last;
1551 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1553 static void
1554 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1555 stmt_vector_for_cost *prologue_cost_vec,
1556 stmt_vector_for_cost *body_cost_vec,
1557 unsigned ncopies_for_cost)
1559 unsigned i, j;
1560 slp_tree child;
1561 gimple *stmt;
1562 stmt_vec_info stmt_info;
1563 tree lhs;
1565 /* Recurse down the SLP tree. */
1566 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1567 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1568 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1569 body_cost_vec, ncopies_for_cost);
1571 /* Look at the first scalar stmt to determine the cost. */
1572 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1573 stmt_info = vinfo_for_stmt (stmt);
1574 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1576 vect_memory_access_type memory_access_type
1577 = (STMT_VINFO_STRIDED_P (stmt_info)
1578 ? VMAT_STRIDED_SLP
1579 : VMAT_CONTIGUOUS);
1580 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1581 vect_model_store_cost (stmt_info, ncopies_for_cost,
1582 memory_access_type, vect_uninitialized_def,
1583 node, prologue_cost_vec, body_cost_vec);
1584 else
1586 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1587 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1589 /* If the load is permuted then the alignment is determined by
1590 the first group element not by the first scalar stmt DR. */
1591 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1592 stmt_info = vinfo_for_stmt (stmt);
1593 /* Record the cost for the permutation. */
1594 unsigned n_perms;
1595 vect_transform_slp_perm_load (node, vNULL, NULL,
1596 ncopies_for_cost, instance, true,
1597 &n_perms);
1598 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1599 stmt_info, 0, vect_body);
1600 unsigned nunits
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1602 /* And adjust the number of loads performed. This handles
1603 redundancies as well as loads that are later dead. */
1604 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1605 bitmap_clear (perm);
1606 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1607 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1608 ncopies_for_cost = 0;
1609 bool load_seen = false;
1610 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1612 if (i % nunits == 0)
1614 if (load_seen)
1615 ncopies_for_cost++;
1616 load_seen = false;
1618 if (bitmap_bit_p (perm, i))
1619 load_seen = true;
1621 if (load_seen)
1622 ncopies_for_cost++;
1623 gcc_assert (ncopies_for_cost
1624 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1625 + nunits - 1) / nunits);
1626 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1628 /* Record the cost for the vector loads. */
1629 vect_model_load_cost (stmt_info, ncopies_for_cost,
1630 memory_access_type, node, prologue_cost_vec,
1631 body_cost_vec);
1632 return;
1635 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1637 /* ncopies_for_cost is the number of IVs we generate. */
1638 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1639 stmt_info, 0, vect_body);
1641 /* Prologue cost for the initial values and step vector. */
1642 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1643 CONSTANT_CLASS_P
1644 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1645 (stmt_info))
1646 ? vector_load : vec_construct,
1647 stmt_info, 0, vect_prologue);
1648 record_stmt_cost (prologue_cost_vec, 1,
1649 CONSTANT_CLASS_P
1650 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1651 ? vector_load : vec_construct,
1652 stmt_info, 0, vect_prologue);
1654 /* ??? No easy way to get at the actual number of vector stmts
1655 to be geneated and thus the derived IVs. */
1657 else
1659 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1660 stmt_info, 0, vect_body);
1661 if (SLP_TREE_TWO_OPERATORS (node))
1663 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1664 stmt_info, 0, vect_body);
1665 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1666 stmt_info, 0, vect_body);
1670 /* Push SLP node def-type to stmts. */
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1672 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1673 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1674 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1676 /* Scan operands and account for prologue cost of constants/externals.
1677 ??? This over-estimates cost for multiple uses and should be
1678 re-engineered. */
1679 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1680 lhs = gimple_get_lhs (stmt);
1681 for (i = 0; i < gimple_num_ops (stmt); ++i)
1683 tree op = gimple_op (stmt, i);
1684 gimple *def_stmt;
1685 enum vect_def_type dt;
1686 if (!op || op == lhs)
1687 continue;
1688 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1690 /* Without looking at the actual initializer a vector of
1691 constants can be implemented as load from the constant pool.
1692 ??? We need to pass down stmt_info for a vector type
1693 even if it points to the wrong stmt. */
1694 if (dt == vect_constant_def)
1695 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1696 stmt_info, 0, vect_prologue);
1697 else if (dt == vect_external_def)
1698 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1699 stmt_info, 0, vect_prologue);
1703 /* Restore stmt def-types. */
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1705 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1706 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1707 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1710 /* Compute the cost for the SLP instance INSTANCE. */
1712 static void
1713 vect_analyze_slp_cost (slp_instance instance, void *data)
1715 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1716 unsigned ncopies_for_cost;
1717 stmt_info_for_cost *si;
1718 unsigned i;
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE, vect_location,
1722 "=== vect_analyze_slp_cost ===\n");
1724 /* Calculate the number of vector stmts to create based on the unrolling
1725 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1726 GROUP_SIZE / NUNITS otherwise. */
1727 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1728 slp_tree node = SLP_INSTANCE_TREE (instance);
1729 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1730 /* Adjust the group_size by the vectorization factor which is always one
1731 for basic-block vectorization. */
1732 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1733 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1734 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1735 /* For reductions look at a reduction operand in case the reduction
1736 operation is widening like DOT_PROD or SAD. */
1737 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1739 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1740 switch (gimple_assign_rhs_code (stmt))
1742 case DOT_PROD_EXPR:
1743 case SAD_EXPR:
1744 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1745 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1746 break;
1747 default:;
1750 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1752 prologue_cost_vec.create (10);
1753 body_cost_vec.create (10);
1754 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1755 &prologue_cost_vec, &body_cost_vec,
1756 ncopies_for_cost);
1758 /* Record the prologue costs, which were delayed until we were
1759 sure that SLP was successful. */
1760 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1762 struct _stmt_vec_info *stmt_info
1763 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1764 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1765 si->misalign, vect_prologue);
1768 /* Record the instance's instructions in the target cost model. */
1769 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1771 struct _stmt_vec_info *stmt_info
1772 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1773 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1774 si->misalign, vect_body);
1777 prologue_cost_vec.release ();
1778 body_cost_vec.release ();
1781 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1782 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1783 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1784 containing the remainder.
1785 Return the first stmt in the second group. */
1787 static gimple *
1788 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1790 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1791 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1792 gcc_assert (group1_size > 0);
1793 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1794 gcc_assert (group2_size > 0);
1795 GROUP_SIZE (first_vinfo) = group1_size;
1797 gimple *stmt = first_stmt;
1798 for (unsigned i = group1_size; i > 1; i--)
1800 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1801 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1803 /* STMT is now the last element of the first group. */
1804 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1805 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1807 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1808 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1810 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1811 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1814 /* For the second group, the GROUP_GAP is that before the original group,
1815 plus skipping over the first vector. */
1816 GROUP_GAP (vinfo_for_stmt (group2)) =
1817 GROUP_GAP (first_vinfo) + group1_size;
1819 /* GROUP_GAP of the first group now has to skip over the second group too. */
1820 GROUP_GAP (first_vinfo) += group2_size;
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1824 group1_size, group2_size);
1826 return group2;
1829 /* Analyze an SLP instance starting from a group of grouped stores. Call
1830 vect_build_slp_tree to build a tree of packed stmts if possible.
1831 Return FALSE if it's impossible to SLP any stmt in the loop. */
1833 static bool
1834 vect_analyze_slp_instance (vec_info *vinfo,
1835 gimple *stmt, unsigned max_tree_size)
1837 slp_instance new_instance;
1838 slp_tree node;
1839 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1840 unsigned int unrolling_factor = 1, nunits;
1841 tree vectype, scalar_type = NULL_TREE;
1842 gimple *next;
1843 unsigned int i;
1844 unsigned int max_nunits = 0;
1845 vec<slp_tree> loads;
1846 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1847 vec<gimple *> scalar_stmts;
1849 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1851 if (dr)
1853 scalar_type = TREE_TYPE (DR_REF (dr));
1854 vectype = get_vectype_for_scalar_type (scalar_type);
1856 else
1858 gcc_assert (is_a <loop_vec_info> (vinfo));
1859 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1862 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1864 else
1866 gcc_assert (is_a <loop_vec_info> (vinfo));
1867 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1868 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1871 if (!vectype)
1873 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1876 "Build SLP failed: unsupported data-type ");
1877 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1878 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1881 return false;
1883 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1885 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1886 scalar_stmts.create (group_size);
1887 next = stmt;
1888 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1890 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1891 while (next)
1893 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1894 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1895 scalar_stmts.safe_push (
1896 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1897 else
1898 scalar_stmts.safe_push (next);
1899 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1901 /* Mark the first element of the reduction chain as reduction to properly
1902 transform the node. In the reduction analysis phase only the last
1903 element of the chain is marked as reduction. */
1904 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1905 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1907 else
1909 /* Collect reduction statements. */
1910 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1911 for (i = 0; reductions.iterate (i, &next); i++)
1912 scalar_stmts.safe_push (next);
1915 loads.create (group_size);
1917 /* Build the tree for the SLP instance. */
1918 bool *matches = XALLOCAVEC (bool, group_size);
1919 unsigned npermutes = 0;
1920 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1921 &max_nunits, &loads, matches, &npermutes,
1922 NULL, max_tree_size);
1923 if (node != NULL)
1925 /* Calculate the unrolling factor based on the smallest type. */
1926 unrolling_factor
1927 = least_common_multiple (max_nunits, group_size) / group_size;
1929 if (unrolling_factor != 1
1930 && is_a <bb_vec_info> (vinfo))
1933 if (max_nunits > group_size)
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "Build SLP failed: store group "
1937 "size not a multiple of the vector size "
1938 "in basic block SLP\n");
1939 vect_free_slp_tree (node);
1940 loads.release ();
1941 return false;
1943 /* Fatal mismatch. */
1944 matches[group_size/max_nunits * max_nunits] = false;
1945 vect_free_slp_tree (node);
1946 loads.release ();
1948 else
1950 /* Create a new SLP instance. */
1951 new_instance = XNEW (struct _slp_instance);
1952 SLP_INSTANCE_TREE (new_instance) = node;
1953 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1954 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1955 SLP_INSTANCE_LOADS (new_instance) = loads;
1957 /* Compute the load permutation. */
1958 slp_tree load_node;
1959 bool loads_permuted = false;
1960 FOR_EACH_VEC_ELT (loads, i, load_node)
1962 vec<unsigned> load_permutation;
1963 int j;
1964 gimple *load, *first_stmt;
1965 bool this_load_permuted = false;
1966 load_permutation.create (group_size);
1967 first_stmt = GROUP_FIRST_ELEMENT
1968 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1969 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1971 int load_place = vect_get_place_in_interleaving_chain
1972 (load, first_stmt);
1973 gcc_assert (load_place != -1);
1974 if (load_place != j)
1975 this_load_permuted = true;
1976 load_permutation.safe_push (load_place);
1978 if (!this_load_permuted
1979 /* The load requires permutation when unrolling exposes
1980 a gap either because the group is larger than the SLP
1981 group-size or because there is a gap between the groups. */
1982 && (unrolling_factor == 1
1983 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1984 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1986 load_permutation.release ();
1987 continue;
1989 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1990 loads_permuted = true;
1993 if (loads_permuted)
1995 if (!vect_supported_load_permutation_p (new_instance))
1997 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 "Build SLP failed: unsupported load "
2001 "permutation ");
2002 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2003 TDF_SLIM, stmt, 0);
2005 vect_free_slp_instance (new_instance);
2006 return false;
2010 /* If the loads and stores can be handled with load/store-lan
2011 instructions do not generate this SLP instance. */
2012 if (is_a <loop_vec_info> (vinfo)
2013 && loads_permuted
2014 && dr && vect_store_lanes_supported (vectype, group_size))
2016 slp_tree load_node;
2017 FOR_EACH_VEC_ELT (loads, i, load_node)
2019 gimple *first_stmt = GROUP_FIRST_ELEMENT
2020 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2022 /* Use SLP for strided accesses (or if we
2023 can't load-lanes). */
2024 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2025 || ! vect_load_lanes_supported
2026 (STMT_VINFO_VECTYPE (stmt_vinfo),
2027 GROUP_SIZE (stmt_vinfo)))
2028 break;
2030 if (i == loads.length ())
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2034 "Built SLP cancelled: can use "
2035 "load/store-lanes\n");
2036 vect_free_slp_instance (new_instance);
2037 return false;
2041 vinfo->slp_instances.safe_push (new_instance);
2043 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE, vect_location,
2046 "Final SLP tree for instance:\n");
2047 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2050 return true;
2053 else
2055 /* Failed to SLP. */
2056 /* Free the allocated memory. */
2057 scalar_stmts.release ();
2058 loads.release ();
2061 /* For basic block SLP, try to break the group up into multiples of the
2062 vector size. */
2063 if (is_a <bb_vec_info> (vinfo)
2064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2065 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2067 /* We consider breaking the group only on VF boundaries from the existing
2068 start. */
2069 for (i = 0; i < group_size; i++)
2070 if (!matches[i]) break;
2072 if (i >= nunits && i < group_size)
2074 /* Split into two groups at the first vector boundary before i. */
2075 gcc_assert ((nunits & (nunits - 1)) == 0);
2076 unsigned group1_size = i & ~(nunits - 1);
2078 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2079 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2080 /* If the first non-match was in the middle of a vector,
2081 skip the rest of that vector. */
2082 if (group1_size < i)
2084 i = group1_size + nunits;
2085 if (i < group_size)
2086 rest = vect_split_slp_store_group (rest, nunits);
2088 if (i < group_size)
2089 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2090 return res;
2092 /* Even though the first vector did not all match, we might be able to SLP
2093 (some) of the remainder. FORNOW ignore this possibility. */
2096 return false;
2100 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2101 trees of packed scalar stmts if SLP is possible. */
2103 bool
2104 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2106 unsigned int i;
2107 gimple *first_element;
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2112 /* Find SLP sequences starting from groups of grouped stores. */
2113 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2114 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2116 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2118 if (loop_vinfo->reduction_chains.length () > 0)
2120 /* Find SLP sequences starting from reduction chains. */
2121 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2122 if (! vect_analyze_slp_instance (vinfo, first_element,
2123 max_tree_size))
2125 /* Dissolve reduction chain group. */
2126 gimple *next, *stmt = first_element;
2127 while (stmt)
2129 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2130 next = GROUP_NEXT_ELEMENT (vinfo);
2131 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2132 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2133 stmt = next;
2135 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2136 = vect_internal_def;
2140 /* Find SLP sequences starting from groups of reductions. */
2141 if (loop_vinfo->reductions.length () > 1)
2142 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2143 max_tree_size);
2146 return true;
2150 /* For each possible SLP instance decide whether to SLP it and calculate overall
2151 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2152 least one instance. */
2154 bool
2155 vect_make_slp_decision (loop_vec_info loop_vinfo)
2157 unsigned int i, unrolling_factor = 1;
2158 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2159 slp_instance instance;
2160 int decided_to_slp = 0;
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2164 "\n");
2166 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2168 /* FORNOW: SLP if you can. */
2169 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2170 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2172 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2173 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2174 loop-based vectorization. Such stmts will be marked as HYBRID. */
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2176 decided_to_slp++;
2179 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2181 if (decided_to_slp && dump_enabled_p ())
2182 dump_printf_loc (MSG_NOTE, vect_location,
2183 "Decided to SLP %d instances. Unrolling factor %d\n",
2184 decided_to_slp, unrolling_factor);
2186 return (decided_to_slp > 0);
2190 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2191 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2193 static void
2194 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2196 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2197 imm_use_iterator imm_iter;
2198 gimple *use_stmt;
2199 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2200 slp_tree child;
2201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2202 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2203 int j;
2205 /* Propagate hybrid down the SLP tree. */
2206 if (stype == hybrid)
2208 else if (HYBRID_SLP_STMT (stmt_vinfo))
2209 stype = hybrid;
2210 else
2212 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2213 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2214 /* If we get a pattern stmt here we have to use the LHS of the
2215 original stmt for immediate uses. */
2216 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2217 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2218 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2219 tree def;
2220 if (gimple_code (stmt) == GIMPLE_PHI)
2221 def = gimple_phi_result (stmt);
2222 else
2223 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2224 if (def)
2225 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2228 continue;
2229 use_vinfo = vinfo_for_stmt (use_stmt);
2230 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2231 && STMT_VINFO_RELATED_STMT (use_vinfo))
2232 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2233 if (!STMT_SLP_TYPE (use_vinfo)
2234 && (STMT_VINFO_RELEVANT (use_vinfo)
2235 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2236 && !(gimple_code (use_stmt) == GIMPLE_PHI
2237 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2239 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2242 "def in non-SLP stmt: ");
2243 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2245 stype = hybrid;
2250 if (stype == hybrid
2251 && !HYBRID_SLP_STMT (stmt_vinfo))
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2256 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2258 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2261 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2262 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2263 vect_detect_hybrid_slp_stmts (child, i, stype);
2266 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2268 static tree
2269 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2271 walk_stmt_info *wi = (walk_stmt_info *)data;
2272 struct loop *loopp = (struct loop *)wi->info;
2274 if (wi->is_lhs)
2275 return NULL_TREE;
2277 if (TREE_CODE (*tp) == SSA_NAME
2278 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2280 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2281 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2282 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2287 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2289 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2293 return NULL_TREE;
2296 static tree
2297 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2298 walk_stmt_info *)
2300 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2301 /* If the stmt is in a SLP instance then this isn't a reason
2302 to mark use definitions in other SLP instances as hybrid. */
2303 if (! STMT_SLP_TYPE (use_vinfo)
2304 && (STMT_VINFO_RELEVANT (use_vinfo)
2305 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2306 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2307 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2309 else
2310 *handled = true;
2311 return NULL_TREE;
2314 /* Find stmts that must be both vectorized and SLPed. */
2316 void
2317 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2319 unsigned int i;
2320 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2321 slp_instance instance;
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2325 "\n");
2327 /* First walk all pattern stmt in the loop and mark defs of uses as
2328 hybrid because immediate uses in them are not recorded. */
2329 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2331 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2332 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2333 gsi_next (&gsi))
2335 gimple *stmt = gsi_stmt (gsi);
2336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2337 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2339 walk_stmt_info wi;
2340 memset (&wi, 0, sizeof (wi));
2341 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2342 gimple_stmt_iterator gsi2
2343 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2344 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2345 vect_detect_hybrid_slp_1, &wi);
2346 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2347 vect_detect_hybrid_slp_2,
2348 vect_detect_hybrid_slp_1, &wi);
2353 /* Then walk the SLP instance trees marking stmts with uses in
2354 non-SLP stmts as hybrid, also propagating hybrid down the
2355 SLP tree, collecting the above info on-the-fly. */
2356 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2358 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2359 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2360 i, pure_slp);
2365 /* Create and initialize a new bb_vec_info struct for BB, as well as
2366 stmt_vec_info structs for all the stmts in it. */
2368 static bb_vec_info
2369 new_bb_vec_info (gimple_stmt_iterator region_begin,
2370 gimple_stmt_iterator region_end)
2372 basic_block bb = gsi_bb (region_begin);
2373 bb_vec_info res = NULL;
2374 gimple_stmt_iterator gsi;
2376 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2377 res->kind = vec_info::bb;
2378 BB_VINFO_BB (res) = bb;
2379 res->region_begin = region_begin;
2380 res->region_end = region_end;
2382 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2383 gsi_next (&gsi))
2385 gimple *stmt = gsi_stmt (gsi);
2386 gimple_set_uid (stmt, 0);
2387 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2390 BB_VINFO_GROUPED_STORES (res).create (10);
2391 BB_VINFO_SLP_INSTANCES (res).create (2);
2392 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2394 bb->aux = res;
2395 return res;
2399 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2400 stmts in the basic block. */
2402 static void
2403 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2405 slp_instance instance;
2406 unsigned i;
2408 if (!bb_vinfo)
2409 return;
2411 vect_destroy_datarefs (bb_vinfo);
2412 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2413 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2414 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2415 vect_free_slp_instance (instance);
2416 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2417 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2419 for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2420 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2422 gimple *stmt = gsi_stmt (si);
2423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2425 if (stmt_info)
2426 /* Free stmt_vec_info. */
2427 free_stmt_vec_info (stmt);
2429 /* Reset region marker. */
2430 gimple_set_uid (stmt, -1);
2433 BB_VINFO_BB (bb_vinfo)->aux = NULL;
2434 free (bb_vinfo);
2438 /* Analyze statements contained in SLP tree node after recursively analyzing
2439 the subtree. Return TRUE if the operations are supported. */
2441 static bool
2442 vect_slp_analyze_node_operations (slp_tree node)
2444 bool dummy;
2445 int i, j;
2446 gimple *stmt;
2447 slp_tree child;
2449 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2450 return true;
2452 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2453 if (!vect_slp_analyze_node_operations (child))
2454 return false;
2456 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2457 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2458 gcc_assert (stmt_info);
2459 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2461 /* For BB vectorization vector types are assigned here.
2462 Memory accesses already got their vector type assigned
2463 in vect_analyze_data_refs. */
2464 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2465 if (bb_vinfo
2466 && ! STMT_VINFO_DATA_REF (stmt_info))
2468 gcc_assert (PURE_SLP_STMT (stmt_info));
2470 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2471 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE, vect_location,
2474 "get vectype for scalar type: ");
2475 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2476 dump_printf (MSG_NOTE, "\n");
2479 tree vectype = get_vectype_for_scalar_type (scalar_type);
2480 if (!vectype)
2482 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485 "not SLPed: unsupported data-type ");
2486 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2487 scalar_type);
2488 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2490 return false;
2493 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2496 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2497 dump_printf (MSG_NOTE, "\n");
2500 gimple *sstmt;
2501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2502 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2505 /* Push SLP node def-type to stmt operands. */
2506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2507 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2508 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2509 = SLP_TREE_DEF_TYPE (child);
2510 bool res = vect_analyze_stmt (stmt, &dummy, node);
2511 /* Restore def-types. */
2512 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2513 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2514 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2515 = vect_internal_def;
2516 if (! res)
2517 return false;
2519 return true;
2523 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2524 operations are supported. */
2526 bool
2527 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2529 slp_instance instance;
2530 int i;
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_NOTE, vect_location,
2534 "=== vect_slp_analyze_operations ===\n");
2536 for (i = 0; slp_instances.iterate (i, &instance); )
2538 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2540 dump_printf_loc (MSG_NOTE, vect_location,
2541 "removing SLP instance operations starting from: ");
2542 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2543 SLP_TREE_SCALAR_STMTS
2544 (SLP_INSTANCE_TREE (instance))[0], 0);
2545 vect_free_slp_instance (instance);
2546 slp_instances.ordered_remove (i);
2548 else
2550 /* Compute the costs of the SLP instance. */
2551 vect_analyze_slp_cost (instance, data);
2552 i++;
2556 if (!slp_instances.length ())
2557 return false;
2559 return true;
2563 /* Compute the scalar cost of the SLP node NODE and its children
2564 and return it. Do not account defs that are marked in LIFE and
2565 update LIFE according to uses of NODE. */
2567 static unsigned
2568 vect_bb_slp_scalar_cost (basic_block bb,
2569 slp_tree node, vec<bool, va_heap> *life)
2571 unsigned scalar_cost = 0;
2572 unsigned i;
2573 gimple *stmt;
2574 slp_tree child;
2576 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2578 unsigned stmt_cost;
2579 ssa_op_iter op_iter;
2580 def_operand_p def_p;
2581 stmt_vec_info stmt_info;
2583 if ((*life)[i])
2584 continue;
2586 /* If there is a non-vectorized use of the defs then the scalar
2587 stmt is kept live in which case we do not account it or any
2588 required defs in the SLP children in the scalar cost. This
2589 way we make the vectorization more costly when compared to
2590 the scalar cost. */
2591 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2593 imm_use_iterator use_iter;
2594 gimple *use_stmt;
2595 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2596 if (!is_gimple_debug (use_stmt)
2597 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2598 use_stmt)
2599 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2601 (*life)[i] = true;
2602 BREAK_FROM_IMM_USE_STMT (use_iter);
2605 if ((*life)[i])
2606 continue;
2608 /* Count scalar stmts only once. */
2609 if (gimple_visited_p (stmt))
2610 continue;
2611 gimple_set_visited (stmt, true);
2613 stmt_info = vinfo_for_stmt (stmt);
2614 if (STMT_VINFO_DATA_REF (stmt_info))
2616 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2617 stmt_cost = vect_get_stmt_cost (scalar_load);
2618 else
2619 stmt_cost = vect_get_stmt_cost (scalar_store);
2621 else
2622 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2624 scalar_cost += stmt_cost;
2627 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2628 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2629 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2631 return scalar_cost;
2634 /* Check if vectorization of the basic block is profitable. */
2636 static bool
2637 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2639 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2640 slp_instance instance;
2641 int i;
2642 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2643 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2645 /* Calculate scalar cost. */
2646 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2648 auto_vec<bool, 20> life;
2649 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2650 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2651 SLP_INSTANCE_TREE (instance),
2652 &life);
2655 /* Unset visited flag. */
2656 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2657 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2658 gimple_set_visited (gsi_stmt (gsi), false);
2660 /* Complete the target-specific cost calculation. */
2661 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2662 &vec_inside_cost, &vec_epilogue_cost);
2664 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2666 if (dump_enabled_p ())
2668 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2669 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2670 vec_inside_cost);
2671 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2672 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2673 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2676 /* Vectorization is profitable if its cost is more than the cost of scalar
2677 version. Note that we err on the vector side for equal cost because
2678 the cost estimate is otherwise quite pessimistic (constant uses are
2679 free on the scalar side but cost a load on the vector side for
2680 example). */
2681 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2682 return false;
2684 return true;
2687 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2688 if so and sets fatal to true if failure is independent of
2689 current_vector_size. */
2691 static bb_vec_info
2692 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2693 gimple_stmt_iterator region_end,
2694 vec<data_reference_p> datarefs, int n_stmts,
2695 bool &fatal)
2697 bb_vec_info bb_vinfo;
2698 slp_instance instance;
2699 int i;
2700 int min_vf = 2;
2702 /* The first group of checks is independent of the vector size. */
2703 fatal = true;
2705 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2707 if (dump_enabled_p ())
2708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2709 "not vectorized: too many instructions in "
2710 "basic block.\n");
2711 free_data_refs (datarefs);
2712 return NULL;
2715 bb_vinfo = new_bb_vec_info (region_begin, region_end);
2716 if (!bb_vinfo)
2717 return NULL;
2719 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2721 /* Analyze the data references. */
2723 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2727 "not vectorized: unhandled data-ref in basic "
2728 "block.\n");
2730 destroy_bb_vec_info (bb_vinfo);
2731 return NULL;
2734 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2736 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2738 "not vectorized: not enough data-refs in "
2739 "basic block.\n");
2741 destroy_bb_vec_info (bb_vinfo);
2742 return NULL;
2745 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2747 if (dump_enabled_p ())
2748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2749 "not vectorized: unhandled data access in "
2750 "basic block.\n");
2752 destroy_bb_vec_info (bb_vinfo);
2753 return NULL;
2756 /* If there are no grouped stores in the region there is no need
2757 to continue with pattern recog as vect_analyze_slp will fail
2758 anyway. */
2759 if (bb_vinfo->grouped_stores.is_empty ())
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763 "not vectorized: no grouped stores in "
2764 "basic block.\n");
2766 destroy_bb_vec_info (bb_vinfo);
2767 return NULL;
2770 /* While the rest of the analysis below depends on it in some way. */
2771 fatal = false;
2773 vect_pattern_recog (bb_vinfo);
2775 /* Check the SLP opportunities in the basic block, analyze and build SLP
2776 trees. */
2777 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2779 if (dump_enabled_p ())
2781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2782 "Failed to SLP the basic block.\n");
2783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2784 "not vectorized: failed to find SLP opportunities "
2785 "in basic block.\n");
2788 destroy_bb_vec_info (bb_vinfo);
2789 return NULL;
2792 /* Analyze and verify the alignment of data references and the
2793 dependence in the SLP instances. */
2794 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2796 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2797 || ! vect_slp_analyze_instance_dependence (instance))
2799 dump_printf_loc (MSG_NOTE, vect_location,
2800 "removing SLP instance operations starting from: ");
2801 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2802 SLP_TREE_SCALAR_STMTS
2803 (SLP_INSTANCE_TREE (instance))[0], 0);
2804 vect_free_slp_instance (instance);
2805 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2806 continue;
2809 /* Mark all the statements that we want to vectorize as pure SLP and
2810 relevant. */
2811 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2812 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2814 i++;
2816 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2818 destroy_bb_vec_info (bb_vinfo);
2819 return NULL;
2822 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2823 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2825 if (dump_enabled_p ())
2826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2827 "not vectorized: bad operation in basic block.\n");
2829 destroy_bb_vec_info (bb_vinfo);
2830 return NULL;
2833 /* Cost model: check if the vectorization is worthwhile. */
2834 if (!unlimited_cost_model (NULL)
2835 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2839 "not vectorized: vectorization is not "
2840 "profitable.\n");
2842 destroy_bb_vec_info (bb_vinfo);
2843 return NULL;
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE, vect_location,
2848 "Basic block will be vectorized using SLP\n");
2850 return bb_vinfo;
2854 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2855 true if anything in the basic-block was vectorized. */
2857 bool
2858 vect_slp_bb (basic_block bb)
2860 bb_vec_info bb_vinfo;
2861 gimple_stmt_iterator gsi;
2862 unsigned int vector_sizes;
2863 bool any_vectorized = false;
2865 if (dump_enabled_p ())
2866 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2868 /* Autodetect first vector size we try. */
2869 current_vector_size = 0;
2870 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2872 gsi = gsi_start_bb (bb);
2874 while (1)
2876 if (gsi_end_p (gsi))
2877 break;
2879 gimple_stmt_iterator region_begin = gsi;
2880 vec<data_reference_p> datarefs = vNULL;
2881 int insns = 0;
2883 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2885 gimple *stmt = gsi_stmt (gsi);
2886 if (is_gimple_debug (stmt))
2887 continue;
2888 insns++;
2890 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2891 vect_location = gimple_location (stmt);
2893 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2894 break;
2897 /* Skip leading unhandled stmts. */
2898 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2900 gsi_next (&gsi);
2901 continue;
2904 gimple_stmt_iterator region_end = gsi;
2906 bool vectorized = false;
2907 bool fatal = false;
2908 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2909 datarefs, insns, fatal);
2910 if (bb_vinfo
2911 && dbg_cnt (vect_slp))
2913 if (dump_enabled_p ())
2914 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2916 vect_schedule_slp (bb_vinfo);
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "basic block part vectorized\n");
2922 destroy_bb_vec_info (bb_vinfo);
2924 vectorized = true;
2926 else
2927 destroy_bb_vec_info (bb_vinfo);
2929 any_vectorized |= vectorized;
2931 vector_sizes &= ~current_vector_size;
2932 if (vectorized
2933 || vector_sizes == 0
2934 || current_vector_size == 0
2935 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2936 vector sizes will fail do not bother iterating. */
2937 || fatal)
2939 if (gsi_end_p (region_end))
2940 break;
2942 /* Skip the unhandled stmt. */
2943 gsi_next (&gsi);
2945 /* And reset vector sizes. */
2946 current_vector_size = 0;
2947 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2949 else
2951 /* Try the next biggest vector size. */
2952 current_vector_size = 1 << floor_log2 (vector_sizes);
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_NOTE, vect_location,
2955 "***** Re-trying analysis with "
2956 "vector size %d\n", current_vector_size);
2958 /* Start over. */
2959 gsi = region_begin;
2963 return any_vectorized;
2967 /* Return 1 if vector type of boolean constant which is OPNUM
2968 operand in statement STMT is a boolean vector. */
2970 static bool
2971 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2973 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2974 enum tree_code code = gimple_expr_code (stmt);
2975 tree op, vectype;
2976 gimple *def_stmt;
2977 enum vect_def_type dt;
2979 /* For comparison and COND_EXPR type is chosen depending
2980 on the other comparison operand. */
2981 if (TREE_CODE_CLASS (code) == tcc_comparison)
2983 if (opnum)
2984 op = gimple_assign_rhs1 (stmt);
2985 else
2986 op = gimple_assign_rhs2 (stmt);
2988 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2989 &dt, &vectype))
2990 gcc_unreachable ();
2992 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2995 if (code == COND_EXPR)
2997 tree cond = gimple_assign_rhs1 (stmt);
2999 if (TREE_CODE (cond) == SSA_NAME)
3000 op = cond;
3001 else if (opnum)
3002 op = TREE_OPERAND (cond, 1);
3003 else
3004 op = TREE_OPERAND (cond, 0);
3006 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3007 &dt, &vectype))
3008 gcc_unreachable ();
3010 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3013 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3017 /* For constant and loop invariant defs of SLP_NODE this function returns
3018 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3019 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3020 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3021 REDUC_INDEX is the index of the reduction operand in the statements, unless
3022 it is -1. */
3024 static void
3025 vect_get_constant_vectors (tree op, slp_tree slp_node,
3026 vec<tree> *vec_oprnds,
3027 unsigned int op_num, unsigned int number_of_vectors)
3029 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3030 gimple *stmt = stmts[0];
3031 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3032 unsigned nunits;
3033 tree vec_cst;
3034 tree *elts;
3035 unsigned j, number_of_places_left_in_vector;
3036 tree vector_type;
3037 tree vop;
3038 int group_size = stmts.length ();
3039 unsigned int vec_num, i;
3040 unsigned number_of_copies = 1;
3041 vec<tree> voprnds;
3042 voprnds.create (number_of_vectors);
3043 bool constant_p, is_store;
3044 tree neutral_op = NULL;
3045 enum tree_code code = gimple_expr_code (stmt);
3046 gimple_seq ctor_seq = NULL;
3048 /* Check if vector type is a boolean vector. */
3049 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3050 && vect_mask_constant_operand_p (stmt, op_num))
3051 vector_type
3052 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3053 else
3054 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3055 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3057 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3059 is_store = true;
3060 op = gimple_assign_rhs1 (stmt);
3062 else
3063 is_store = false;
3065 gcc_assert (op);
3067 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3068 created vectors. It is greater than 1 if unrolling is performed.
3070 For example, we have two scalar operands, s1 and s2 (e.g., group of
3071 strided accesses of size two), while NUNITS is four (i.e., four scalars
3072 of this type can be packed in a vector). The output vector will contain
3073 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3074 will be 2).
3076 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3077 containing the operands.
3079 For example, NUNITS is four as before, and the group size is 8
3080 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3081 {s5, s6, s7, s8}. */
3083 number_of_copies = nunits * number_of_vectors / group_size;
3085 number_of_places_left_in_vector = nunits;
3086 constant_p = true;
3087 elts = XALLOCAVEC (tree, nunits);
3088 bool place_after_defs = false;
3089 for (j = 0; j < number_of_copies; j++)
3091 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3093 if (is_store)
3094 op = gimple_assign_rhs1 (stmt);
3095 else
3097 switch (code)
3099 case COND_EXPR:
3101 tree cond = gimple_assign_rhs1 (stmt);
3102 if (TREE_CODE (cond) == SSA_NAME)
3103 op = gimple_op (stmt, op_num + 1);
3104 else if (op_num == 0 || op_num == 1)
3105 op = TREE_OPERAND (cond, op_num);
3106 else
3108 if (op_num == 2)
3109 op = gimple_assign_rhs2 (stmt);
3110 else
3111 op = gimple_assign_rhs3 (stmt);
3114 break;
3116 case CALL_EXPR:
3117 op = gimple_call_arg (stmt, op_num);
3118 break;
3120 case LSHIFT_EXPR:
3121 case RSHIFT_EXPR:
3122 case LROTATE_EXPR:
3123 case RROTATE_EXPR:
3124 op = gimple_op (stmt, op_num + 1);
3125 /* Unlike the other binary operators, shifts/rotates have
3126 the shift count being int, instead of the same type as
3127 the lhs, so make sure the scalar is the right type if
3128 we are dealing with vectors of
3129 long long/long/short/char. */
3130 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3131 op = fold_convert (TREE_TYPE (vector_type), op);
3132 break;
3134 default:
3135 op = gimple_op (stmt, op_num + 1);
3136 break;
3140 /* Create 'vect_ = {op0,op1,...,opn}'. */
3141 number_of_places_left_in_vector--;
3142 tree orig_op = op;
3143 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3145 if (CONSTANT_CLASS_P (op))
3147 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3149 /* Can't use VIEW_CONVERT_EXPR for booleans because
3150 of possibly different sizes of scalar value and
3151 vector element. */
3152 if (integer_zerop (op))
3153 op = build_int_cst (TREE_TYPE (vector_type), 0);
3154 else if (integer_onep (op))
3155 op = build_all_ones_cst (TREE_TYPE (vector_type));
3156 else
3157 gcc_unreachable ();
3159 else
3160 op = fold_unary (VIEW_CONVERT_EXPR,
3161 TREE_TYPE (vector_type), op);
3162 gcc_assert (op && CONSTANT_CLASS_P (op));
3164 else
3166 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3167 gimple *init_stmt;
3168 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3170 tree true_val
3171 = build_all_ones_cst (TREE_TYPE (vector_type));
3172 tree false_val
3173 = build_zero_cst (TREE_TYPE (vector_type));
3174 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3175 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3176 op, true_val,
3177 false_val);
3179 else
3181 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3182 op);
3183 init_stmt
3184 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3185 op);
3187 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3188 op = new_temp;
3191 elts[number_of_places_left_in_vector] = op;
3192 if (!CONSTANT_CLASS_P (op))
3193 constant_p = false;
3194 if (TREE_CODE (orig_op) == SSA_NAME
3195 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3196 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3197 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3198 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3199 place_after_defs = true;
3201 if (number_of_places_left_in_vector == 0)
3203 if (constant_p)
3204 vec_cst = build_vector (vector_type, elts);
3205 else
3207 vec<constructor_elt, va_gc> *v;
3208 unsigned k;
3209 vec_alloc (v, nunits);
3210 for (k = 0; k < nunits; ++k)
3211 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3212 vec_cst = build_constructor (vector_type, v);
3214 tree init;
3215 gimple_stmt_iterator gsi;
3216 if (place_after_defs)
3218 gsi = gsi_for_stmt
3219 (vect_find_last_scalar_stmt_in_slp (slp_node));
3220 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3222 else
3223 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3224 if (ctor_seq != NULL)
3226 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3227 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3228 GSI_SAME_STMT);
3229 ctor_seq = NULL;
3231 voprnds.quick_push (init);
3232 place_after_defs = false;
3233 number_of_places_left_in_vector = nunits;
3234 constant_p = true;
3239 /* Since the vectors are created in the reverse order, we should invert
3240 them. */
3241 vec_num = voprnds.length ();
3242 for (j = vec_num; j != 0; j--)
3244 vop = voprnds[j - 1];
3245 vec_oprnds->quick_push (vop);
3248 voprnds.release ();
3250 /* In case that VF is greater than the unrolling factor needed for the SLP
3251 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3252 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3253 to replicate the vectors. */
3254 while (number_of_vectors > vec_oprnds->length ())
3256 tree neutral_vec = NULL;
3258 if (neutral_op)
3260 if (!neutral_vec)
3261 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3263 vec_oprnds->quick_push (neutral_vec);
3265 else
3267 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3268 vec_oprnds->quick_push (vop);
3274 /* Get vectorized definitions from SLP_NODE that contains corresponding
3275 vectorized def-stmts. */
3277 static void
3278 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3280 tree vec_oprnd;
3281 gimple *vec_def_stmt;
3282 unsigned int i;
3284 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3286 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3288 gcc_assert (vec_def_stmt);
3289 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3290 vec_oprnd = gimple_phi_result (vec_def_stmt);
3291 else
3292 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3293 vec_oprnds->quick_push (vec_oprnd);
3298 /* Get vectorized definitions for SLP_NODE.
3299 If the scalar definitions are loop invariants or constants, collect them and
3300 call vect_get_constant_vectors() to create vector stmts.
3301 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3302 must be stored in the corresponding child of SLP_NODE, and we call
3303 vect_get_slp_vect_defs () to retrieve them. */
3305 void
3306 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3307 vec<vec<tree> > *vec_oprnds)
3309 gimple *first_stmt;
3310 int number_of_vects = 0, i;
3311 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3312 slp_tree child = NULL;
3313 vec<tree> vec_defs;
3314 tree oprnd;
3315 bool first_iteration = true;
3317 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3318 FOR_EACH_VEC_ELT (ops, i, oprnd)
3320 bool vectorized_defs = false;
3322 if (oprnd == NULL)
3324 vec_defs = vNULL;
3325 vec_defs.create (0);
3326 vec_oprnds->quick_push (vec_defs);
3327 continue;
3330 /* For each operand we check if it has vectorized definitions in a child
3331 node or we need to create them (for invariants and constants). We
3332 check if the LHS of the first stmt of the next child matches OPRND.
3333 If it does, we found the correct child. Otherwise, we call
3334 vect_get_constant_vectors (). */
3335 for (unsigned int child_index = 0;
3336 child_index < SLP_TREE_CHILDREN (slp_node).length (); child_index++)
3338 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3340 /* We have to check both pattern and original def, if available. */
3341 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3343 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3344 gimple *related
3345 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3346 tree first_def_op;
3348 if (gimple_code (first_def) == GIMPLE_PHI)
3349 first_def_op = gimple_phi_result (first_def);
3350 else
3351 first_def_op = gimple_get_lhs (first_def);
3352 if (operand_equal_p (oprnd, first_def_op, 0)
3353 || (related
3354 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3356 /* The number of vector defs is determined by the number of
3357 vector statements in the node from which we get those
3358 statements. */
3359 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3360 vectorized_defs = true;
3361 break;
3366 if (!vectorized_defs && first_iteration)
3368 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3369 /* Number of vector stmts was calculated according to LHS in
3370 vect_schedule_slp_instance (), fix it by replacing LHS with
3371 RHS, if necessary. See vect_get_smallest_scalar_type () for
3372 details. */
3373 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3374 &rhs_size_unit);
3375 if (rhs_size_unit != lhs_size_unit)
3377 number_of_vects *= rhs_size_unit;
3378 number_of_vects /= lhs_size_unit;
3382 /* Allocate memory for vectorized defs. */
3383 vec_defs = vNULL;
3384 vec_defs.create (number_of_vects);
3386 /* For reduction defs we call vect_get_constant_vectors (), since we are
3387 looking for initial loop invariant values. */
3388 if (vectorized_defs)
3389 /* The defs are already vectorized. */
3390 vect_get_slp_vect_defs (child, &vec_defs);
3391 else
3392 /* Build vectors from scalar defs. */
3393 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3394 number_of_vects);
3396 vec_oprnds->quick_push (vec_defs);
3398 first_iteration = false;
3402 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3403 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3404 permute statements for the SLP node NODE of the SLP instance
3405 SLP_NODE_INSTANCE. */
3407 bool
3408 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3409 gimple_stmt_iterator *gsi, int vf,
3410 slp_instance slp_node_instance, bool analyze_only,
3411 unsigned *n_perms)
3413 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3415 tree mask_element_type = NULL_TREE, mask_type;
3416 int nunits, vec_index = 0;
3417 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3418 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3419 int mask_element;
3420 unsigned char *mask;
3421 machine_mode mode;
3423 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3424 return false;
3426 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3428 mode = TYPE_MODE (vectype);
3430 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3431 same size as the vector element being permuted. */
3432 mask_element_type = lang_hooks.types.type_for_mode
3433 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3434 mask_type = get_vectype_for_scalar_type (mask_element_type);
3435 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3436 mask = XALLOCAVEC (unsigned char, nunits);
3438 /* Initialize the vect stmts of NODE to properly insert the generated
3439 stmts later. */
3440 if (! analyze_only)
3441 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3442 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3443 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3445 /* Generate permutation masks for every NODE. Number of masks for each NODE
3446 is equal to GROUP_SIZE.
3447 E.g., we have a group of three nodes with three loads from the same
3448 location in each node, and the vector size is 4. I.e., we have a
3449 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3450 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3451 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3454 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3455 The last mask is illegal since we assume two operands for permute
3456 operation, and the mask element values can't be outside that range.
3457 Hence, the last mask must be converted into {2,5,5,5}.
3458 For the first two permutations we need the first and the second input
3459 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3460 we need the second and the third vectors: {b1,c1,a2,b2} and
3461 {c2,a3,b3,c3}. */
3463 int vect_stmts_counter = 0;
3464 int index = 0;
3465 int first_vec_index = -1;
3466 int second_vec_index = -1;
3467 bool noop_p = true;
3468 *n_perms = 0;
3470 for (int j = 0; j < vf; j++)
3472 for (int k = 0; k < group_size; k++)
3474 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3475 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3476 vec_index = i / nunits;
3477 mask_element = i % nunits;
3478 if (vec_index == first_vec_index
3479 || first_vec_index == -1)
3481 first_vec_index = vec_index;
3483 else if (vec_index == second_vec_index
3484 || second_vec_index == -1)
3486 second_vec_index = vec_index;
3487 mask_element += nunits;
3489 else
3491 if (dump_enabled_p ())
3493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3494 "permutation requires at "
3495 "least three vectors ");
3496 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3497 stmt, 0);
3499 return false;
3502 gcc_assert (mask_element >= 0
3503 && mask_element < 2 * nunits);
3504 if (mask_element != index)
3505 noop_p = false;
3506 mask[index++] = mask_element;
3508 if (index == nunits)
3510 if (! noop_p
3511 && ! can_vec_perm_p (mode, false, mask))
3513 if (dump_enabled_p ())
3515 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3516 vect_location,
3517 "unsupported vect permute { ");
3518 for (i = 0; i < nunits; ++i)
3519 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3520 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3522 return false;
3525 if (! noop_p)
3526 ++*n_perms;
3528 if (!analyze_only)
3530 tree mask_vec = NULL_TREE;
3532 if (! noop_p)
3534 tree *mask_elts = XALLOCAVEC (tree, nunits);
3535 for (int l = 0; l < nunits; ++l)
3536 mask_elts[l] = build_int_cst (mask_element_type,
3537 mask[l]);
3538 mask_vec = build_vector (mask_type, mask_elts);
3541 if (second_vec_index == -1)
3542 second_vec_index = first_vec_index;
3544 /* Generate the permute statement if necessary. */
3545 tree first_vec = dr_chain[first_vec_index];
3546 tree second_vec = dr_chain[second_vec_index];
3547 gimple *perm_stmt;
3548 if (! noop_p)
3550 tree perm_dest
3551 = vect_create_destination_var (gimple_assign_lhs (stmt),
3552 vectype);
3553 perm_dest = make_ssa_name (perm_dest);
3554 perm_stmt = gimple_build_assign (perm_dest,
3555 VEC_PERM_EXPR,
3556 first_vec, second_vec,
3557 mask_vec);
3558 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3560 else
3561 /* If mask was NULL_TREE generate the requested
3562 identity transform. */
3563 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3565 /* Store the vector statement in NODE. */
3566 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3569 index = 0;
3570 first_vec_index = -1;
3571 second_vec_index = -1;
3572 noop_p = true;
3577 return true;
3582 /* Vectorize SLP instance tree in postorder. */
3584 static bool
3585 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3586 unsigned int vectorization_factor)
3588 gimple *stmt;
3589 bool grouped_store, is_store;
3590 gimple_stmt_iterator si;
3591 stmt_vec_info stmt_info;
3592 unsigned int vec_stmts_size, nunits, group_size;
3593 tree vectype;
3594 int i, j;
3595 slp_tree child;
3597 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3598 return false;
3600 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3601 vect_schedule_slp_instance (child, instance, vectorization_factor);
3603 /* Push SLP node def-type to stmts. */
3604 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3605 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3606 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3607 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3609 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3610 stmt_info = vinfo_for_stmt (stmt);
3612 /* VECTYPE is the type of the destination. */
3613 vectype = STMT_VINFO_VECTYPE (stmt_info);
3614 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3615 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3617 /* For each SLP instance calculate number of vector stmts to be created
3618 for the scalar stmts in each node of the SLP tree. Number of vector
3619 elements in one vector iteration is the number of scalar elements in
3620 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3621 size.
3622 Unless this is a SLP reduction in which case the number of vector
3623 stmts is equal to the number of vector stmts of the children. */
3624 if (GROUP_FIRST_ELEMENT (stmt_info)
3625 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3626 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3627 else
3628 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3630 if (!SLP_TREE_VEC_STMTS (node).exists ())
3632 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3633 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3636 if (dump_enabled_p ())
3638 dump_printf_loc (MSG_NOTE,vect_location,
3639 "------>vectorizing SLP node starting from: ");
3640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3643 /* Vectorized stmts go before the last scalar stmt which is where
3644 all uses are ready. */
3645 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3647 /* Mark the first element of the reduction chain as reduction to properly
3648 transform the node. In the analysis phase only the last element of the
3649 chain is marked as reduction. */
3650 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3651 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3653 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3654 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3657 /* Handle two-operation SLP nodes by vectorizing the group with
3658 both operations and then performing a merge. */
3659 if (SLP_TREE_TWO_OPERATORS (node))
3661 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3662 enum tree_code ocode = ERROR_MARK;
3663 gimple *ostmt;
3664 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3665 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3666 if (gimple_assign_rhs_code (ostmt) != code0)
3668 mask[i] = 1;
3669 ocode = gimple_assign_rhs_code (ostmt);
3671 else
3672 mask[i] = 0;
3673 if (ocode != ERROR_MARK)
3675 vec<gimple *> v0;
3676 vec<gimple *> v1;
3677 unsigned j;
3678 tree tmask = NULL_TREE;
3679 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3680 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3681 SLP_TREE_VEC_STMTS (node).truncate (0);
3682 gimple_assign_set_rhs_code (stmt, ocode);
3683 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3684 gimple_assign_set_rhs_code (stmt, code0);
3685 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3686 SLP_TREE_VEC_STMTS (node).truncate (0);
3687 tree meltype = build_nonstandard_integer_type
3688 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3689 tree mvectype = get_same_sized_vectype (meltype, vectype);
3690 unsigned k = 0, l;
3691 for (j = 0; j < v0.length (); ++j)
3693 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3694 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3696 if (k >= group_size)
3697 k = 0;
3698 melts[l] = build_int_cst
3699 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3701 tmask = build_vector (mvectype, melts);
3703 /* ??? Not all targets support a VEC_PERM_EXPR with a
3704 constant mask that would translate to a vec_merge RTX
3705 (with their vec_perm_const_ok). We can either not
3706 vectorize in that case or let veclower do its job.
3707 Unfortunately that isn't too great and at least for
3708 plus/minus we'd eventually like to match targets
3709 vector addsub instructions. */
3710 gimple *vstmt;
3711 vstmt = gimple_build_assign (make_ssa_name (vectype),
3712 VEC_PERM_EXPR,
3713 gimple_assign_lhs (v0[j]),
3714 gimple_assign_lhs (v1[j]), tmask);
3715 vect_finish_stmt_generation (stmt, vstmt, &si);
3716 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3718 v0.release ();
3719 v1.release ();
3720 return false;
3723 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3725 /* Restore stmt def-types. */
3726 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3727 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3728 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3729 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3731 return is_store;
3734 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3735 For loop vectorization this is done in vectorizable_call, but for SLP
3736 it needs to be deferred until end of vect_schedule_slp, because multiple
3737 SLP instances may refer to the same scalar stmt. */
3739 static void
3740 vect_remove_slp_scalar_calls (slp_tree node)
3742 gimple *stmt, *new_stmt;
3743 gimple_stmt_iterator gsi;
3744 int i;
3745 slp_tree child;
3746 tree lhs;
3747 stmt_vec_info stmt_info;
3749 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3750 return;
3752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3753 vect_remove_slp_scalar_calls (child);
3755 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3757 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3758 continue;
3759 stmt_info = vinfo_for_stmt (stmt);
3760 if (stmt_info == NULL
3761 || is_pattern_stmt_p (stmt_info)
3762 || !PURE_SLP_STMT (stmt_info))
3763 continue;
3764 lhs = gimple_call_lhs (stmt);
3765 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3766 set_vinfo_for_stmt (new_stmt, stmt_info);
3767 set_vinfo_for_stmt (stmt, NULL);
3768 STMT_VINFO_STMT (stmt_info) = new_stmt;
3769 gsi = gsi_for_stmt (stmt);
3770 gsi_replace (&gsi, new_stmt, false);
3771 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3775 /* Generate vector code for all SLP instances in the loop/basic block. */
3777 bool
3778 vect_schedule_slp (vec_info *vinfo)
3780 vec<slp_instance> slp_instances;
3781 slp_instance instance;
3782 unsigned int i, vf;
3783 bool is_store = false;
3785 slp_instances = vinfo->slp_instances;
3786 if (is_a <loop_vec_info> (vinfo))
3787 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3788 else
3789 vf = 1;
3791 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3793 /* Schedule the tree of INSTANCE. */
3794 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3795 instance, vf);
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_NOTE, vect_location,
3798 "vectorizing stmts using SLP.\n");
3801 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3803 slp_tree root = SLP_INSTANCE_TREE (instance);
3804 gimple *store;
3805 unsigned int j;
3806 gimple_stmt_iterator gsi;
3808 /* Remove scalar call stmts. Do not do this for basic-block
3809 vectorization as not all uses may be vectorized.
3810 ??? Why should this be necessary? DCE should be able to
3811 remove the stmts itself.
3812 ??? For BB vectorization we can as well remove scalar
3813 stmts starting from the SLP tree root if they have no
3814 uses. */
3815 if (is_a <loop_vec_info> (vinfo))
3816 vect_remove_slp_scalar_calls (root);
3818 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3819 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3821 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3822 break;
3824 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3825 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3826 /* Free the attached stmt_vec_info and remove the stmt. */
3827 gsi = gsi_for_stmt (store);
3828 unlink_stmt_vdef (store);
3829 gsi_remove (&gsi, true);
3830 release_defs (store);
3831 free_stmt_vec_info (store);
3835 return is_store;