2017-07-20 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-slp.c
blob7cfeeb989786385174d0e5c2a9908508b0438196
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
105 else
106 return NULL;
108 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120 return node;
124 /* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127 typedef struct _slp_oprnd_info
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
134 tree first_op_type;
135 enum vect_def_type first_dt;
136 bool first_pattern;
137 bool second_pattern;
138 } *slp_oprnd_info;
141 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
143 static vec<slp_oprnd_info>
144 vect_create_oprnd_info (int nops, int group_size)
146 int i;
147 slp_oprnd_info oprnd_info;
148 vec<slp_oprnd_info> oprnds_info;
150 oprnds_info.create (nops);
151 for (i = 0; i < nops; i++)
153 oprnd_info = XNEW (struct _slp_oprnd_info);
154 oprnd_info->def_stmts.create (group_size);
155 oprnd_info->first_dt = vect_uninitialized_def;
156 oprnd_info->first_op_type = NULL_TREE;
157 oprnd_info->first_pattern = false;
158 oprnd_info->second_pattern = false;
159 oprnds_info.quick_push (oprnd_info);
162 return oprnds_info;
166 /* Free operands info. */
168 static void
169 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171 int i;
172 slp_oprnd_info oprnd_info;
174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 oprnd_info->def_stmts.release ();
177 XDELETE (oprnd_info);
180 oprnds_info.release ();
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187 static int
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190 gimple *next_stmt = first_stmt;
191 int result = 0;
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
198 if (next_stmt == stmt)
199 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 while (next_stmt);
206 return -1;
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
222 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info)
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false;
232 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
239 if (is_gimple_call (stmt))
241 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3;
244 else if (is_gimple_assign (stmt))
246 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 first_op_cond = true;
254 number_of_oprnds++;
256 else
257 commutative = commutative_tree_code (code);
259 else
260 return -1;
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
264 for (i = 0; i < number_of_oprnds; i++)
266 again:
267 if (first_op_cond)
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
275 else
276 oprnd = gimple_op (stmt, map[i]);
278 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281 oprnd_info = (*oprnds_info)[i];
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 if (dump_enabled_p ())
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: can't analyze def for ");
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
293 return -1;
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
313 if (i == 0
314 && !swapped
315 && commutative)
317 swapped = true;
318 goto again;
321 if (dump_enabled_p ())
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
330 return 1;
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336 if (dt == vect_unknown_def_type)
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
344 switch (gimple_code (def_stmt))
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
358 if (second)
359 oprnd_info->second_pattern = pattern;
361 if (first)
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 else
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
389 swapped = true;
390 goto again;
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n");
397 return 1;
401 /* Check the types of the definitions. */
402 switch (dt)
404 case vect_constant_def:
405 case vect_external_def:
406 break;
408 case vect_reduction_def:
409 case vect_induction_def:
410 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt);
412 break;
414 default:
415 /* FORNOW: Not supported. */
416 if (dump_enabled_p ())
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
424 return -1;
428 /* Swap operands. */
429 if (swapped)
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 return -1;
445 if (first_op_cond)
447 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond);
450 /* Swap. */
451 if (*swap == 1)
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 /* Invert. */
458 else
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
471 if (dump_enabled_p ())
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
479 *swap = swapped;
480 return 0;
484 /* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
489 carried out or the stmts will never be vectorized by SLP.
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
498 static bool
499 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
500 vec<gimple *> stmts, unsigned int group_size,
501 unsigned nops, unsigned int *max_nunits,
502 bool *matches, bool *two_operators)
504 unsigned int i;
505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
509 enum tree_code first_cond_code = ERROR_MARK;
510 tree lhs;
511 bool need_same_oprnds = false;
512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
513 optab optab;
514 int icode;
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
517 HOST_WIDE_INT dummy;
518 gimple *first_load = NULL, *prev_first_load = NULL;
520 /* For every stmt in NODE find its def stmt/s. */
521 FOR_EACH_VEC_ELT (stmts, i, stmt)
523 swap[i] = 0;
524 matches[i] = false;
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
535 if (dump_enabled_p ())
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
541 /* Fatal mismatch. */
542 matches[0] = false;
543 return false;
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
549 if (dump_enabled_p ())
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
556 /* Fatal mismatch. */
557 matches[0] = false;
558 return false;
561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
573 /* Fatal mismatch. */
574 matches[0] = false;
575 return false;
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
580 if (is_a <bb_vec_info> (vinfo)
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
597 rhs_code = CALL_EXPR;
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
611 /* Fatal mismatch. */
612 matches[0] = false;
613 return false;
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
619 /* Check the operation. */
620 if (i == 0)
622 first_stmt_code = rhs_code;
624 /* Shift arguments should be equal in all the packed stmts for a
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
630 vec_mode = TYPE_MODE (vectype);
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
636 if (!optab
637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
643 if (!optab)
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: no optab.\n");
648 /* Fatal mismatch. */
649 matches[0] = false;
650 return false;
652 icode = (int) optab_handler (optab, vec_mode);
653 if (icode == CODE_FOR_nothing)
655 if (dump_enabled_p ())
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
658 "op not supported by target.\n");
659 /* Fatal mismatch. */
660 matches[0] = false;
661 return false;
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
677 else
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
686 || rhs_code != IMAGPART_EXPR)
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
695 && (first_stmt_code == ARRAY_REF
696 || first_stmt_code == BIT_FIELD_REF
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
712 /* Mismatch. */
713 continue;
716 if (need_same_oprnds
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
719 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
726 /* Mismatch. */
727 continue;
730 if (rhs_code == CALL_EXPR)
732 gimple *first_stmt = stmts[0];
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
746 /* Mismatch. */
747 continue;
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
755 if (REFERENCE_CLASS_P (lhs))
757 /* Store. */
760 else
762 /* Load. */
763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
764 if (prev_first_load)
766 /* Check that there are no loads from different interleaving
767 chains in the same node. */
768 if (prev_first_load != first_load)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
779 /* Mismatch. */
780 continue;
783 else
784 prev_first_load = first_load;
786 } /* Grouped access. */
787 else
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
791 /* Not grouped load. */
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
799 /* FORNOW: Not grouped loads are not supported. */
800 /* Fatal mismatch. */
801 matches[0] = false;
802 return false;
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
810 && rhs_code != CALL_EXPR)
812 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
824 if (rhs_code == COND_EXPR)
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
840 if (first_cond_code == cond_code)
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
850 if (dump_enabled_p ())
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
853 "Build SLP failed: different"
854 " operation");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 stmt, 0);
858 /* Mismatch. */
859 continue;
864 matches[i] = true;
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
876 unsigned char *sel
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
878 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
880 sel[i] = i;
881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
882 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
884 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
886 for (i = 0; i < group_size; ++i)
887 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
889 matches[i] = false;
890 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
896 stmts[i], 0);
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "original stmt ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
900 first_stmt, 0);
903 return false;
905 *two_operators = true;
908 return true;
911 /* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
916 was found. */
918 static slp_tree
919 vect_build_slp_tree (vec_info *vinfo,
920 vec<gimple *> stmts, unsigned int group_size,
921 unsigned int *max_nunits,
922 vec<slp_tree> *loads,
923 bool *matches, unsigned *npermutes, unsigned *tree_size,
924 unsigned max_tree_size)
926 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
927 gimple *stmt;
928 slp_tree node;
930 matches[0] = false;
932 stmt = stmts[0];
933 if (is_gimple_call (stmt))
934 nops = gimple_call_num_args (stmt);
935 else if (is_gimple_assign (stmt))
937 nops = gimple_num_ops (stmt) - 1;
938 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
939 nops++;
941 else if (gimple_code (stmt) == GIMPLE_PHI)
942 nops = 0;
943 else
944 return NULL;
946 /* If the SLP node is a PHI (induction or reduction), terminate
947 the recursion. */
948 if (gimple_code (stmt) == GIMPLE_PHI)
950 /* Induction from different IVs is not supported. */
951 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) == vect_induction_def)
952 FOR_EACH_VEC_ELT (stmts, i, stmt)
953 if (stmt != stmts[0])
954 return NULL;
955 node = vect_create_new_slp_node (stmts);
956 return node;
960 bool two_operators = false;
961 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
962 if (!vect_build_slp_tree_1 (vinfo, swap,
963 stmts, group_size, nops,
964 &this_max_nunits, matches, &two_operators))
965 return NULL;
967 /* If the SLP node is a load, terminate the recursion. */
968 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
969 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
971 *max_nunits = this_max_nunits;
972 node = vect_create_new_slp_node (stmts);
973 loads->safe_push (node);
974 return node;
977 /* Get at the operands, verifying they are compatible. */
978 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
979 slp_oprnd_info oprnd_info;
980 FOR_EACH_VEC_ELT (stmts, i, stmt)
982 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
983 stmt, i, &oprnds_info);
984 if (res != 0)
985 matches[(res == -1) ? 0 : i] = false;
986 if (!matches[0])
987 break;
989 for (i = 0; i < group_size; ++i)
990 if (!matches[i])
992 vect_free_oprnd_info (oprnds_info);
993 return NULL;
996 auto_vec<slp_tree, 4> children;
997 auto_vec<slp_tree> this_loads;
999 stmt = stmts[0];
1001 /* Create SLP_TREE nodes for the definition node/s. */
1002 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1004 slp_tree child;
1005 unsigned old_nloads = this_loads.length ();
1006 unsigned old_tree_size = this_tree_size;
1007 unsigned int j;
1009 if (oprnd_info->first_dt != vect_internal_def
1010 && oprnd_info->first_dt != vect_reduction_def
1011 && oprnd_info->first_dt != vect_induction_def)
1012 continue;
1014 if (++this_tree_size > max_tree_size)
1016 FOR_EACH_VEC_ELT (children, j, child)
1017 vect_free_slp_tree (child);
1018 vect_free_oprnd_info (oprnds_info);
1019 return NULL;
1022 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1023 group_size, &this_max_nunits,
1024 &this_loads, matches, npermutes,
1025 &this_tree_size,
1026 max_tree_size)) != NULL)
1028 /* If we have all children of child built up from scalars then just
1029 throw that away and build it up this node from scalars. */
1030 if (!SLP_TREE_CHILDREN (child).is_empty ()
1031 /* ??? Rejecting patterns this way doesn't work. We'd have to
1032 do extra work to cancel the pattern so the uses see the
1033 scalar version. */
1034 && !is_pattern_stmt_p
1035 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1037 slp_tree grandchild;
1039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1040 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1041 break;
1042 if (!grandchild)
1044 /* Roll back. */
1045 this_loads.truncate (old_nloads);
1046 this_tree_size = old_tree_size;
1047 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1048 vect_free_slp_tree (grandchild);
1049 SLP_TREE_CHILDREN (child).truncate (0);
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "Building parent vector operands from "
1053 "scalars instead\n");
1054 oprnd_info->def_stmts = vNULL;
1055 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1056 children.safe_push (child);
1057 continue;
1061 oprnd_info->def_stmts = vNULL;
1062 children.safe_push (child);
1063 continue;
1066 /* If the SLP build failed fatally and we analyze a basic-block
1067 simply treat nodes we fail to build as externally defined
1068 (and thus build vectors from the scalar defs).
1069 The cost model will reject outright expensive cases.
1070 ??? This doesn't treat cases where permutation ultimatively
1071 fails (or we don't try permutation below). Ideally we'd
1072 even compute a permutation that will end up with the maximum
1073 SLP tree size... */
1074 if (is_a <bb_vec_info> (vinfo)
1075 && !matches[0]
1076 /* ??? Rejecting patterns this way doesn't work. We'd have to
1077 do extra work to cancel the pattern so the uses see the
1078 scalar version. */
1079 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1081 dump_printf_loc (MSG_NOTE, vect_location,
1082 "Building vector operands from scalars\n");
1083 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1084 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1085 children.safe_push (child);
1086 oprnd_info->def_stmts = vNULL;
1087 continue;
1090 /* If the SLP build for operand zero failed and operand zero
1091 and one can be commutated try that for the scalar stmts
1092 that failed the match. */
1093 if (i == 0
1094 /* A first scalar stmt mismatch signals a fatal mismatch. */
1095 && matches[0]
1096 /* ??? For COND_EXPRs we can swap the comparison operands
1097 as well as the arms under some constraints. */
1098 && nops == 2
1099 && oprnds_info[1]->first_dt == vect_internal_def
1100 && is_gimple_assign (stmt)
1101 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1102 && ! two_operators
1103 /* Do so only if the number of not successful permutes was nor more
1104 than a cut-ff as re-trying the recursive match on
1105 possibly each level of the tree would expose exponential
1106 behavior. */
1107 && *npermutes < 4)
1109 /* Verify if we can safely swap or if we committed to a specific
1110 operand order already. */
1111 for (j = 0; j < group_size; ++j)
1112 if (!matches[j]
1113 && (swap[j] != 0
1114 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1116 if (dump_enabled_p ())
1118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1119 "Build SLP failed: cannot swap operands "
1120 "of shared stmt ");
1121 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1122 stmts[j], 0);
1124 goto fail;
1127 /* Swap mismatched definition stmts. */
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "Re-trying with swapped operands of stmts ");
1130 for (j = 0; j < group_size; ++j)
1131 if (!matches[j])
1133 std::swap (oprnds_info[0]->def_stmts[j],
1134 oprnds_info[1]->def_stmts[j]);
1135 dump_printf (MSG_NOTE, "%d ", j);
1137 dump_printf (MSG_NOTE, "\n");
1138 /* And try again with scratch 'matches' ... */
1139 bool *tem = XALLOCAVEC (bool, group_size);
1140 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1141 group_size, &this_max_nunits,
1142 &this_loads, tem, npermutes,
1143 &this_tree_size,
1144 max_tree_size)) != NULL)
1146 /* ... so if successful we can apply the operand swapping
1147 to the GIMPLE IL. This is necessary because for example
1148 vect_get_slp_defs uses operand indexes and thus expects
1149 canonical operand order. This is also necessary even
1150 if we end up building the operand from scalars as
1151 we'll continue to process swapped operand two. */
1152 for (j = 0; j < group_size; ++j)
1154 gimple *stmt = stmts[j];
1155 gimple_set_plf (stmt, GF_PLF_1, false);
1157 for (j = 0; j < group_size; ++j)
1159 gimple *stmt = stmts[j];
1160 if (!matches[j])
1162 /* Avoid swapping operands twice. */
1163 if (gimple_plf (stmt, GF_PLF_1))
1164 continue;
1165 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1166 gimple_assign_rhs2_ptr (stmt));
1167 gimple_set_plf (stmt, GF_PLF_1, true);
1170 /* Verify we swap all duplicates or none. */
1171 if (flag_checking)
1172 for (j = 0; j < group_size; ++j)
1174 gimple *stmt = stmts[j];
1175 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1178 /* If we have all children of child built up from scalars then
1179 just throw that away and build it up this node from scalars. */
1180 if (!SLP_TREE_CHILDREN (child).is_empty ()
1181 /* ??? Rejecting patterns this way doesn't work. We'd have
1182 to do extra work to cancel the pattern so the uses see the
1183 scalar version. */
1184 && !is_pattern_stmt_p
1185 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1187 unsigned int j;
1188 slp_tree grandchild;
1190 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1191 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1192 break;
1193 if (!grandchild)
1195 /* Roll back. */
1196 this_loads.truncate (old_nloads);
1197 this_tree_size = old_tree_size;
1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1199 vect_free_slp_tree (grandchild);
1200 SLP_TREE_CHILDREN (child).truncate (0);
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "Building parent vector operands from "
1204 "scalars instead\n");
1205 oprnd_info->def_stmts = vNULL;
1206 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1207 children.safe_push (child);
1208 continue;
1212 oprnd_info->def_stmts = vNULL;
1213 children.safe_push (child);
1214 continue;
1217 ++*npermutes;
1220 fail:
1221 gcc_assert (child == NULL);
1222 FOR_EACH_VEC_ELT (children, j, child)
1223 vect_free_slp_tree (child);
1224 vect_free_oprnd_info (oprnds_info);
1225 return NULL;
1228 vect_free_oprnd_info (oprnds_info);
1230 if (tree_size)
1231 *tree_size += this_tree_size;
1232 *max_nunits = this_max_nunits;
1233 loads->safe_splice (this_loads);
1235 node = vect_create_new_slp_node (stmts);
1236 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1237 SLP_TREE_CHILDREN (node).splice (children);
1238 return node;
1241 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1243 static void
1244 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1246 int i;
1247 gimple *stmt;
1248 slp_tree child;
1250 dump_printf_loc (dump_kind, loc, "node%s\n",
1251 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1252 ? " (external)" : "");
1253 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1255 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1256 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1258 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1259 vect_print_slp_tree (dump_kind, loc, child);
1263 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1264 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1265 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1266 stmts in NODE are to be marked. */
1268 static void
1269 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1271 int i;
1272 gimple *stmt;
1273 slp_tree child;
1275 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1276 return;
1278 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1279 if (j < 0 || i == j)
1280 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1282 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1283 vect_mark_slp_stmts (child, mark, j);
1287 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1289 static void
1290 vect_mark_slp_stmts_relevant (slp_tree node)
1292 int i;
1293 gimple *stmt;
1294 stmt_vec_info stmt_info;
1295 slp_tree child;
1297 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1298 return;
1300 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1302 stmt_info = vinfo_for_stmt (stmt);
1303 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1304 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1305 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1308 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1309 vect_mark_slp_stmts_relevant (child);
1313 /* Rearrange the statements of NODE according to PERMUTATION. */
1315 static void
1316 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1317 vec<unsigned> permutation)
1319 gimple *stmt;
1320 vec<gimple *> tmp_stmts;
1321 unsigned int i;
1322 slp_tree child;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1325 vect_slp_rearrange_stmts (child, group_size, permutation);
1327 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1328 tmp_stmts.create (group_size);
1329 tmp_stmts.quick_grow_cleared (group_size);
1331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1332 tmp_stmts[permutation[i]] = stmt;
1334 SLP_TREE_SCALAR_STMTS (node).release ();
1335 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1339 /* Attempt to reorder stmts in a reduction chain so that we don't
1340 require any load permutation. Return true if that was possible,
1341 otherwise return false. */
1343 static bool
1344 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1346 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1347 unsigned int i, j;
1348 unsigned int lidx;
1349 slp_tree node, load;
1351 /* Compare all the permutation sequences to the first one. We know
1352 that at least one load is permuted. */
1353 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1354 if (!node->load_permutation.exists ())
1355 return false;
1356 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1358 if (!load->load_permutation.exists ())
1359 return false;
1360 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1361 if (lidx != node->load_permutation[j])
1362 return false;
1365 /* Check that the loads in the first sequence are different and there
1366 are no gaps between them. */
1367 auto_sbitmap load_index (group_size);
1368 bitmap_clear (load_index);
1369 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1371 if (lidx >= group_size)
1372 return false;
1373 if (bitmap_bit_p (load_index, lidx))
1374 return false;
1376 bitmap_set_bit (load_index, lidx);
1378 for (i = 0; i < group_size; i++)
1379 if (!bitmap_bit_p (load_index, i))
1380 return false;
1382 /* This permutation is valid for reduction. Since the order of the
1383 statements in the nodes is not important unless they are memory
1384 accesses, we can rearrange the statements in all the nodes
1385 according to the order of the loads. */
1386 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1387 node->load_permutation);
1389 /* We are done, no actual permutations need to be generated. */
1390 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1391 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1393 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1394 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1395 /* But we have to keep those permutations that are required because
1396 of handling of gaps. */
1397 if (unrolling_factor == 1
1398 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1399 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1400 SLP_TREE_LOAD_PERMUTATION (node).release ();
1401 else
1402 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1403 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1406 return true;
1409 /* Check if the required load permutations in the SLP instance
1410 SLP_INSTN are supported. */
1412 static bool
1413 vect_supported_load_permutation_p (slp_instance slp_instn)
1415 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1416 unsigned int i, j, k, next;
1417 slp_tree node;
1418 gimple *stmt, *load, *next_load;
1420 if (dump_enabled_p ())
1422 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1423 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1424 if (node->load_permutation.exists ())
1425 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1426 dump_printf (MSG_NOTE, "%d ", next);
1427 else
1428 for (k = 0; k < group_size; ++k)
1429 dump_printf (MSG_NOTE, "%d ", k);
1430 dump_printf (MSG_NOTE, "\n");
1433 /* In case of reduction every load permutation is allowed, since the order
1434 of the reduction statements is not important (as opposed to the case of
1435 grouped stores). The only condition we need to check is that all the
1436 load nodes are of the same size and have the same permutation (and then
1437 rearrange all the nodes of the SLP instance according to this
1438 permutation). */
1440 /* Check that all the load nodes are of the same size. */
1441 /* ??? Can't we assert this? */
1442 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1443 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1444 return false;
1446 node = SLP_INSTANCE_TREE (slp_instn);
1447 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1449 /* Reduction (there are no data-refs in the root).
1450 In reduction chain the order of the loads is not important. */
1451 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1452 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1453 vect_attempt_slp_rearrange_stmts (slp_instn);
1455 /* In basic block vectorization we allow any subchain of an interleaving
1456 chain.
1457 FORNOW: not supported in loop SLP because of realignment compications. */
1458 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1460 /* Check whether the loads in an instance form a subchain and thus
1461 no permutation is necessary. */
1462 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1464 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1465 continue;
1466 bool subchain_p = true;
1467 next_load = NULL;
1468 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1470 if (j != 0
1471 && (next_load != load
1472 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1474 subchain_p = false;
1475 break;
1477 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1479 if (subchain_p)
1480 SLP_TREE_LOAD_PERMUTATION (node).release ();
1481 else
1483 stmt_vec_info group_info
1484 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1485 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1486 unsigned nunits
1487 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1488 unsigned k, maxk = 0;
1489 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1490 if (k > maxk)
1491 maxk = k;
1492 /* In BB vectorization we may not actually use a loaded vector
1493 accessing elements in excess of GROUP_SIZE. */
1494 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1497 "BB vectorization with gaps at the end of "
1498 "a load is not supported\n");
1499 return false;
1502 /* Verify the permutation can be generated. */
1503 vec<tree> tem;
1504 unsigned n_perms;
1505 if (!vect_transform_slp_perm_load (node, tem, NULL,
1506 1, slp_instn, true, &n_perms))
1508 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1509 vect_location,
1510 "unsupported load permutation\n");
1511 return false;
1515 return true;
1518 /* For loop vectorization verify we can generate the permutation. */
1519 unsigned n_perms;
1520 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1521 if (node->load_permutation.exists ()
1522 && !vect_transform_slp_perm_load
1523 (node, vNULL, NULL,
1524 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1525 &n_perms))
1526 return false;
1528 return true;
1532 /* Find the last store in SLP INSTANCE. */
1534 gimple *
1535 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1537 gimple *last = NULL, *stmt;
1539 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1541 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1542 if (is_pattern_stmt_p (stmt_vinfo))
1543 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1544 else
1545 last = get_later_stmt (stmt, last);
1548 return last;
1551 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1553 static void
1554 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1555 stmt_vector_for_cost *prologue_cost_vec,
1556 stmt_vector_for_cost *body_cost_vec,
1557 unsigned ncopies_for_cost)
1559 unsigned i, j;
1560 slp_tree child;
1561 gimple *stmt;
1562 stmt_vec_info stmt_info;
1563 tree lhs;
1565 /* Recurse down the SLP tree. */
1566 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1567 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1568 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1569 body_cost_vec, ncopies_for_cost);
1571 /* Look at the first scalar stmt to determine the cost. */
1572 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1573 stmt_info = vinfo_for_stmt (stmt);
1574 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1576 vect_memory_access_type memory_access_type
1577 = (STMT_VINFO_STRIDED_P (stmt_info)
1578 ? VMAT_STRIDED_SLP
1579 : VMAT_CONTIGUOUS);
1580 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1581 vect_model_store_cost (stmt_info, ncopies_for_cost,
1582 memory_access_type, vect_uninitialized_def,
1583 node, prologue_cost_vec, body_cost_vec);
1584 else
1586 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1587 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1589 /* If the load is permuted then the alignment is determined by
1590 the first group element not by the first scalar stmt DR. */
1591 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1592 stmt_info = vinfo_for_stmt (stmt);
1593 /* Record the cost for the permutation. */
1594 unsigned n_perms;
1595 vect_transform_slp_perm_load (node, vNULL, NULL,
1596 ncopies_for_cost, instance, true,
1597 &n_perms);
1598 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1599 stmt_info, 0, vect_body);
1600 unsigned nunits
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1602 /* And adjust the number of loads performed. This handles
1603 redundancies as well as loads that are later dead. */
1604 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1605 bitmap_clear (perm);
1606 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1607 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1608 ncopies_for_cost = 0;
1609 bool load_seen = false;
1610 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1612 if (i % nunits == 0)
1614 if (load_seen)
1615 ncopies_for_cost++;
1616 load_seen = false;
1618 if (bitmap_bit_p (perm, i))
1619 load_seen = true;
1621 if (load_seen)
1622 ncopies_for_cost++;
1623 gcc_assert (ncopies_for_cost
1624 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1625 + nunits - 1) / nunits);
1626 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1628 /* Record the cost for the vector loads. */
1629 vect_model_load_cost (stmt_info, ncopies_for_cost,
1630 memory_access_type, node, prologue_cost_vec,
1631 body_cost_vec);
1632 return;
1635 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1637 /* ncopies_for_cost is the number of IVs we generate. */
1638 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1639 stmt_info, 0, vect_body);
1641 /* Prologue cost for the initial values and step vector. */
1642 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1643 CONSTANT_CLASS_P
1644 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1645 (stmt_info))
1646 ? vector_load : vec_construct,
1647 stmt_info, 0, vect_prologue);
1648 record_stmt_cost (prologue_cost_vec, 1,
1649 CONSTANT_CLASS_P
1650 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1651 ? vector_load : vec_construct,
1652 stmt_info, 0, vect_prologue);
1654 /* ??? No easy way to get at the actual number of vector stmts
1655 to be geneated and thus the derived IVs. */
1657 else
1659 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1660 stmt_info, 0, vect_body);
1661 if (SLP_TREE_TWO_OPERATORS (node))
1663 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1664 stmt_info, 0, vect_body);
1665 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1666 stmt_info, 0, vect_body);
1670 /* Push SLP node def-type to stmts. */
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1672 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1673 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1674 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1676 /* Scan operands and account for prologue cost of constants/externals.
1677 ??? This over-estimates cost for multiple uses and should be
1678 re-engineered. */
1679 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1680 lhs = gimple_get_lhs (stmt);
1681 for (i = 0; i < gimple_num_ops (stmt); ++i)
1683 tree op = gimple_op (stmt, i);
1684 gimple *def_stmt;
1685 enum vect_def_type dt;
1686 if (!op || op == lhs)
1687 continue;
1688 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1690 /* Without looking at the actual initializer a vector of
1691 constants can be implemented as load from the constant pool.
1692 ??? We need to pass down stmt_info for a vector type
1693 even if it points to the wrong stmt. */
1694 if (dt == vect_constant_def)
1695 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1696 stmt_info, 0, vect_prologue);
1697 else if (dt == vect_external_def)
1698 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1699 stmt_info, 0, vect_prologue);
1703 /* Restore stmt def-types. */
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1705 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1706 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1707 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1710 /* Compute the cost for the SLP instance INSTANCE. */
1712 static void
1713 vect_analyze_slp_cost (slp_instance instance, void *data)
1715 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1716 unsigned ncopies_for_cost;
1717 stmt_info_for_cost *si;
1718 unsigned i;
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE, vect_location,
1722 "=== vect_analyze_slp_cost ===\n");
1724 /* Calculate the number of vector stmts to create based on the unrolling
1725 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1726 GROUP_SIZE / NUNITS otherwise. */
1727 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1728 slp_tree node = SLP_INSTANCE_TREE (instance);
1729 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1730 /* Adjust the group_size by the vectorization factor which is always one
1731 for basic-block vectorization. */
1732 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1733 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1734 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1735 /* For reductions look at a reduction operand in case the reduction
1736 operation is widening like DOT_PROD or SAD. */
1737 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1739 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1740 switch (gimple_assign_rhs_code (stmt))
1742 case DOT_PROD_EXPR:
1743 case SAD_EXPR:
1744 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1745 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1746 break;
1747 default:;
1750 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1752 prologue_cost_vec.create (10);
1753 body_cost_vec.create (10);
1754 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1755 &prologue_cost_vec, &body_cost_vec,
1756 ncopies_for_cost);
1758 /* Record the prologue costs, which were delayed until we were
1759 sure that SLP was successful. */
1760 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1762 struct _stmt_vec_info *stmt_info
1763 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1764 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1765 si->misalign, vect_prologue);
1768 /* Record the instance's instructions in the target cost model. */
1769 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1771 struct _stmt_vec_info *stmt_info
1772 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1773 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1774 si->misalign, vect_body);
1777 prologue_cost_vec.release ();
1778 body_cost_vec.release ();
1781 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1782 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1783 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1784 containing the remainder.
1785 Return the first stmt in the second group. */
1787 static gimple *
1788 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1790 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1791 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1792 gcc_assert (group1_size > 0);
1793 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1794 gcc_assert (group2_size > 0);
1795 GROUP_SIZE (first_vinfo) = group1_size;
1797 gimple *stmt = first_stmt;
1798 for (unsigned i = group1_size; i > 1; i--)
1800 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1801 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1803 /* STMT is now the last element of the first group. */
1804 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1805 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1807 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1808 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1810 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1811 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1814 /* For the second group, the GROUP_GAP is that before the original group,
1815 plus skipping over the first vector. */
1816 GROUP_GAP (vinfo_for_stmt (group2)) =
1817 GROUP_GAP (first_vinfo) + group1_size;
1819 /* GROUP_GAP of the first group now has to skip over the second group too. */
1820 GROUP_GAP (first_vinfo) += group2_size;
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1824 group1_size, group2_size);
1826 return group2;
1829 /* Analyze an SLP instance starting from a group of grouped stores. Call
1830 vect_build_slp_tree to build a tree of packed stmts if possible.
1831 Return FALSE if it's impossible to SLP any stmt in the loop. */
1833 static bool
1834 vect_analyze_slp_instance (vec_info *vinfo,
1835 gimple *stmt, unsigned max_tree_size)
1837 slp_instance new_instance;
1838 slp_tree node;
1839 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1840 unsigned int unrolling_factor = 1, nunits;
1841 tree vectype, scalar_type = NULL_TREE;
1842 gimple *next;
1843 unsigned int i;
1844 unsigned int max_nunits = 0;
1845 vec<slp_tree> loads;
1846 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1847 vec<gimple *> scalar_stmts;
1849 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1851 if (dr)
1853 scalar_type = TREE_TYPE (DR_REF (dr));
1854 vectype = get_vectype_for_scalar_type (scalar_type);
1856 else
1858 gcc_assert (is_a <loop_vec_info> (vinfo));
1859 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1862 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1864 else
1866 gcc_assert (is_a <loop_vec_info> (vinfo));
1867 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1868 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1871 if (!vectype)
1873 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1876 "Build SLP failed: unsupported data-type ");
1877 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1878 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1881 return false;
1883 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1885 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1886 scalar_stmts.create (group_size);
1887 next = stmt;
1888 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1890 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1891 while (next)
1893 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1894 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1895 scalar_stmts.safe_push (
1896 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1897 else
1898 scalar_stmts.safe_push (next);
1899 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1901 /* Mark the first element of the reduction chain as reduction to properly
1902 transform the node. In the reduction analysis phase only the last
1903 element of the chain is marked as reduction. */
1904 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1905 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1907 else
1909 /* Collect reduction statements. */
1910 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1911 for (i = 0; reductions.iterate (i, &next); i++)
1912 scalar_stmts.safe_push (next);
1915 loads.create (group_size);
1917 /* Build the tree for the SLP instance. */
1918 bool *matches = XALLOCAVEC (bool, group_size);
1919 unsigned npermutes = 0;
1920 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1921 &max_nunits, &loads, matches, &npermutes,
1922 NULL, max_tree_size);
1923 if (node != NULL)
1925 /* Calculate the unrolling factor based on the smallest type. */
1926 unrolling_factor
1927 = least_common_multiple (max_nunits, group_size) / group_size;
1929 if (unrolling_factor != 1
1930 && is_a <bb_vec_info> (vinfo))
1933 if (max_nunits > group_size)
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "Build SLP failed: store group "
1937 "size not a multiple of the vector size "
1938 "in basic block SLP\n");
1939 vect_free_slp_tree (node);
1940 loads.release ();
1941 return false;
1943 /* Fatal mismatch. */
1944 matches[group_size/max_nunits * max_nunits] = false;
1945 vect_free_slp_tree (node);
1946 loads.release ();
1948 else
1950 /* Create a new SLP instance. */
1951 new_instance = XNEW (struct _slp_instance);
1952 SLP_INSTANCE_TREE (new_instance) = node;
1953 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1954 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1955 SLP_INSTANCE_LOADS (new_instance) = loads;
1957 /* Compute the load permutation. */
1958 slp_tree load_node;
1959 bool loads_permuted = false;
1960 FOR_EACH_VEC_ELT (loads, i, load_node)
1962 vec<unsigned> load_permutation;
1963 int j;
1964 gimple *load, *first_stmt;
1965 bool this_load_permuted = false;
1966 load_permutation.create (group_size);
1967 first_stmt = GROUP_FIRST_ELEMENT
1968 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1969 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1971 int load_place = vect_get_place_in_interleaving_chain
1972 (load, first_stmt);
1973 gcc_assert (load_place != -1);
1974 if (load_place != j)
1975 this_load_permuted = true;
1976 load_permutation.safe_push (load_place);
1978 if (!this_load_permuted
1979 /* The load requires permutation when unrolling exposes
1980 a gap either because the group is larger than the SLP
1981 group-size or because there is a gap between the groups. */
1982 && (unrolling_factor == 1
1983 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1984 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1986 load_permutation.release ();
1987 continue;
1989 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1990 loads_permuted = true;
1993 if (loads_permuted)
1995 if (!vect_supported_load_permutation_p (new_instance))
1997 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 "Build SLP failed: unsupported load "
2001 "permutation ");
2002 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2003 TDF_SLIM, stmt, 0);
2005 vect_free_slp_instance (new_instance);
2006 return false;
2010 /* If the loads and stores can be handled with load/store-lan
2011 instructions do not generate this SLP instance. */
2012 if (is_a <loop_vec_info> (vinfo)
2013 && loads_permuted
2014 && dr && vect_store_lanes_supported (vectype, group_size))
2016 slp_tree load_node;
2017 FOR_EACH_VEC_ELT (loads, i, load_node)
2019 gimple *first_stmt = GROUP_FIRST_ELEMENT
2020 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2022 /* Use SLP for strided accesses (or if we
2023 can't load-lanes). */
2024 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2025 || ! vect_load_lanes_supported
2026 (STMT_VINFO_VECTYPE (stmt_vinfo),
2027 GROUP_SIZE (stmt_vinfo)))
2028 break;
2030 if (i == loads.length ())
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2034 "Built SLP cancelled: can use "
2035 "load/store-lanes\n");
2036 vect_free_slp_instance (new_instance);
2037 return false;
2041 vinfo->slp_instances.safe_push (new_instance);
2043 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE, vect_location,
2046 "Final SLP tree for instance:\n");
2047 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2050 return true;
2053 else
2055 /* Failed to SLP. */
2056 /* Free the allocated memory. */
2057 scalar_stmts.release ();
2058 loads.release ();
2061 /* For basic block SLP, try to break the group up into multiples of the
2062 vector size. */
2063 if (is_a <bb_vec_info> (vinfo)
2064 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2065 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2067 /* We consider breaking the group only on VF boundaries from the existing
2068 start. */
2069 for (i = 0; i < group_size; i++)
2070 if (!matches[i]) break;
2072 if (i >= nunits && i < group_size)
2074 /* Split into two groups at the first vector boundary before i. */
2075 gcc_assert ((nunits & (nunits - 1)) == 0);
2076 unsigned group1_size = i & ~(nunits - 1);
2078 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2079 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2080 /* If the first non-match was in the middle of a vector,
2081 skip the rest of that vector. */
2082 if (group1_size < i)
2084 i = group1_size + nunits;
2085 if (i < group_size)
2086 rest = vect_split_slp_store_group (rest, nunits);
2088 if (i < group_size)
2089 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2090 return res;
2092 /* Even though the first vector did not all match, we might be able to SLP
2093 (some) of the remainder. FORNOW ignore this possibility. */
2096 return false;
2100 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2101 trees of packed scalar stmts if SLP is possible. */
2103 bool
2104 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2106 unsigned int i;
2107 gimple *first_element;
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2112 /* Find SLP sequences starting from groups of grouped stores. */
2113 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2114 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2116 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2118 if (loop_vinfo->reduction_chains.length () > 0)
2120 /* Find SLP sequences starting from reduction chains. */
2121 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2122 if (! vect_analyze_slp_instance (vinfo, first_element,
2123 max_tree_size))
2125 /* Dissolve reduction chain group. */
2126 gimple *next, *stmt = first_element;
2127 while (stmt)
2129 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2130 next = GROUP_NEXT_ELEMENT (vinfo);
2131 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2132 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2133 stmt = next;
2135 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2136 = vect_internal_def;
2140 /* Find SLP sequences starting from groups of reductions. */
2141 if (loop_vinfo->reductions.length () > 1)
2142 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2143 max_tree_size);
2146 return true;
2150 /* For each possible SLP instance decide whether to SLP it and calculate overall
2151 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2152 least one instance. */
2154 bool
2155 vect_make_slp_decision (loop_vec_info loop_vinfo)
2157 unsigned int i, unrolling_factor = 1;
2158 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2159 slp_instance instance;
2160 int decided_to_slp = 0;
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2164 "\n");
2166 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2168 /* FORNOW: SLP if you can. */
2169 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2170 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2172 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2173 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2174 loop-based vectorization. Such stmts will be marked as HYBRID. */
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2176 decided_to_slp++;
2179 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2181 if (decided_to_slp && dump_enabled_p ())
2182 dump_printf_loc (MSG_NOTE, vect_location,
2183 "Decided to SLP %d instances. Unrolling factor %d\n",
2184 decided_to_slp, unrolling_factor);
2186 return (decided_to_slp > 0);
2190 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2191 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2193 static void
2194 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2196 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2197 imm_use_iterator imm_iter;
2198 gimple *use_stmt;
2199 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2200 slp_tree child;
2201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2202 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2203 int j;
2205 /* Propagate hybrid down the SLP tree. */
2206 if (stype == hybrid)
2208 else if (HYBRID_SLP_STMT (stmt_vinfo))
2209 stype = hybrid;
2210 else
2212 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2213 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2214 /* If we get a pattern stmt here we have to use the LHS of the
2215 original stmt for immediate uses. */
2216 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2217 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2218 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2219 tree def;
2220 if (gimple_code (stmt) == GIMPLE_PHI)
2221 def = gimple_phi_result (stmt);
2222 else
2223 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2224 if (def)
2225 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2228 continue;
2229 use_vinfo = vinfo_for_stmt (use_stmt);
2230 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2231 && STMT_VINFO_RELATED_STMT (use_vinfo))
2232 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2233 if (!STMT_SLP_TYPE (use_vinfo)
2234 && (STMT_VINFO_RELEVANT (use_vinfo)
2235 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2236 && !(gimple_code (use_stmt) == GIMPLE_PHI
2237 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2239 if (dump_enabled_p ())
2241 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2242 "def in non-SLP stmt: ");
2243 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2245 stype = hybrid;
2250 if (stype == hybrid
2251 && !HYBRID_SLP_STMT (stmt_vinfo))
2253 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2256 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2258 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2261 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2262 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2263 vect_detect_hybrid_slp_stmts (child, i, stype);
2266 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2268 static tree
2269 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2271 walk_stmt_info *wi = (walk_stmt_info *)data;
2272 struct loop *loopp = (struct loop *)wi->info;
2274 if (wi->is_lhs)
2275 return NULL_TREE;
2277 if (TREE_CODE (*tp) == SSA_NAME
2278 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2280 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2281 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2282 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2284 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2287 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2289 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2293 return NULL_TREE;
2296 static tree
2297 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2298 walk_stmt_info *)
2300 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2301 /* If the stmt is in a SLP instance then this isn't a reason
2302 to mark use definitions in other SLP instances as hybrid. */
2303 if (! STMT_SLP_TYPE (use_vinfo)
2304 && (STMT_VINFO_RELEVANT (use_vinfo)
2305 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2306 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2307 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2309 else
2310 *handled = true;
2311 return NULL_TREE;
2314 /* Find stmts that must be both vectorized and SLPed. */
2316 void
2317 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2319 unsigned int i;
2320 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2321 slp_instance instance;
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2325 "\n");
2327 /* First walk all pattern stmt in the loop and mark defs of uses as
2328 hybrid because immediate uses in them are not recorded. */
2329 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2331 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2332 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2333 gsi_next (&gsi))
2335 gimple *stmt = gsi_stmt (gsi);
2336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2337 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2339 walk_stmt_info wi;
2340 memset (&wi, 0, sizeof (wi));
2341 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2342 gimple_stmt_iterator gsi2
2343 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2344 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2345 vect_detect_hybrid_slp_1, &wi);
2346 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2347 vect_detect_hybrid_slp_2,
2348 vect_detect_hybrid_slp_1, &wi);
2353 /* Then walk the SLP instance trees marking stmts with uses in
2354 non-SLP stmts as hybrid, also propagating hybrid down the
2355 SLP tree, collecting the above info on-the-fly. */
2356 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2358 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2359 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2360 i, pure_slp);
2365 /* Create and initialize a new bb_vec_info struct for BB, as well as
2366 stmt_vec_info structs for all the stmts in it. */
2368 static bb_vec_info
2369 new_bb_vec_info (gimple_stmt_iterator region_begin,
2370 gimple_stmt_iterator region_end)
2372 basic_block bb = gsi_bb (region_begin);
2373 bb_vec_info res = NULL;
2374 gimple_stmt_iterator gsi;
2376 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2377 res->kind = vec_info::bb;
2378 BB_VINFO_BB (res) = bb;
2379 res->region_begin = region_begin;
2380 res->region_end = region_end;
2382 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2383 gsi_next (&gsi))
2385 gimple *stmt = gsi_stmt (gsi);
2386 gimple_set_uid (stmt, 0);
2387 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2390 BB_VINFO_GROUPED_STORES (res).create (10);
2391 BB_VINFO_SLP_INSTANCES (res).create (2);
2392 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2394 bb->aux = res;
2395 return res;
2399 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2400 stmts in the basic block. */
2402 static void
2403 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2405 slp_instance instance;
2406 unsigned i;
2408 if (!bb_vinfo)
2409 return;
2411 vect_destroy_datarefs (bb_vinfo);
2412 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2413 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2414 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2415 vect_free_slp_instance (instance);
2416 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2417 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2419 for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2420 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2422 gimple *stmt = gsi_stmt (si);
2423 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2425 if (stmt_info)
2426 /* Free stmt_vec_info. */
2427 free_stmt_vec_info (stmt);
2429 /* Reset region marker. */
2430 gimple_set_uid (stmt, -1);
2433 BB_VINFO_BB (bb_vinfo)->aux = NULL;
2434 free (bb_vinfo);
2438 /* Analyze statements contained in SLP tree node after recursively analyzing
2439 the subtree. Return TRUE if the operations are supported. */
2441 static bool
2442 vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance)
2444 bool dummy;
2445 int i, j;
2446 gimple *stmt;
2447 slp_tree child;
2449 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2450 return true;
2452 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2453 if (!vect_slp_analyze_node_operations (child, node_instance))
2454 return false;
2456 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2457 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2458 gcc_assert (stmt_info);
2459 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2461 /* For BB vectorization vector types are assigned here.
2462 Memory accesses already got their vector type assigned
2463 in vect_analyze_data_refs. */
2464 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2465 if (bb_vinfo
2466 && ! STMT_VINFO_DATA_REF (stmt_info))
2468 gcc_assert (PURE_SLP_STMT (stmt_info));
2470 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2471 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE, vect_location,
2474 "get vectype for scalar type: ");
2475 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2476 dump_printf (MSG_NOTE, "\n");
2479 tree vectype = get_vectype_for_scalar_type (scalar_type);
2480 if (!vectype)
2482 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485 "not SLPed: unsupported data-type ");
2486 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2487 scalar_type);
2488 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2490 return false;
2493 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2496 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2497 dump_printf (MSG_NOTE, "\n");
2500 gimple *sstmt;
2501 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2502 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2505 /* Push SLP node def-type to stmt operands. */
2506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2507 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2508 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2509 = SLP_TREE_DEF_TYPE (child);
2510 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2511 /* Restore def-types. */
2512 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2513 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2514 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2515 = vect_internal_def;
2516 if (! res)
2517 return false;
2519 return true;
2523 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2524 operations are supported. */
2526 bool
2527 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2529 slp_instance instance;
2530 int i;
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_NOTE, vect_location,
2534 "=== vect_slp_analyze_operations ===\n");
2536 for (i = 0; slp_instances.iterate (i, &instance); )
2538 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance),
2539 instance))
2541 dump_printf_loc (MSG_NOTE, vect_location,
2542 "removing SLP instance operations starting from: ");
2543 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2544 SLP_TREE_SCALAR_STMTS
2545 (SLP_INSTANCE_TREE (instance))[0], 0);
2546 vect_free_slp_instance (instance);
2547 slp_instances.ordered_remove (i);
2549 else
2551 /* Compute the costs of the SLP instance. */
2552 vect_analyze_slp_cost (instance, data);
2553 i++;
2557 if (!slp_instances.length ())
2558 return false;
2560 return true;
2564 /* Compute the scalar cost of the SLP node NODE and its children
2565 and return it. Do not account defs that are marked in LIFE and
2566 update LIFE according to uses of NODE. */
2568 static unsigned
2569 vect_bb_slp_scalar_cost (basic_block bb,
2570 slp_tree node, vec<bool, va_heap> *life)
2572 unsigned scalar_cost = 0;
2573 unsigned i;
2574 gimple *stmt;
2575 slp_tree child;
2577 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2579 unsigned stmt_cost;
2580 ssa_op_iter op_iter;
2581 def_operand_p def_p;
2582 stmt_vec_info stmt_info;
2584 if ((*life)[i])
2585 continue;
2587 /* If there is a non-vectorized use of the defs then the scalar
2588 stmt is kept live in which case we do not account it or any
2589 required defs in the SLP children in the scalar cost. This
2590 way we make the vectorization more costly when compared to
2591 the scalar cost. */
2592 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2594 imm_use_iterator use_iter;
2595 gimple *use_stmt;
2596 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2597 if (!is_gimple_debug (use_stmt)
2598 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2599 use_stmt)
2600 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2602 (*life)[i] = true;
2603 BREAK_FROM_IMM_USE_STMT (use_iter);
2606 if ((*life)[i])
2607 continue;
2609 /* Count scalar stmts only once. */
2610 if (gimple_visited_p (stmt))
2611 continue;
2612 gimple_set_visited (stmt, true);
2614 stmt_info = vinfo_for_stmt (stmt);
2615 if (STMT_VINFO_DATA_REF (stmt_info))
2617 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2618 stmt_cost = vect_get_stmt_cost (scalar_load);
2619 else
2620 stmt_cost = vect_get_stmt_cost (scalar_store);
2622 else
2623 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2625 scalar_cost += stmt_cost;
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2629 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2630 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2632 return scalar_cost;
2635 /* Check if vectorization of the basic block is profitable. */
2637 static bool
2638 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2640 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2641 slp_instance instance;
2642 int i;
2643 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2644 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2646 /* Calculate scalar cost. */
2647 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2649 auto_vec<bool, 20> life;
2650 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2651 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2652 SLP_INSTANCE_TREE (instance),
2653 &life);
2656 /* Unset visited flag. */
2657 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2658 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2659 gimple_set_visited (gsi_stmt (gsi), false);
2661 /* Complete the target-specific cost calculation. */
2662 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2663 &vec_inside_cost, &vec_epilogue_cost);
2665 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2667 if (dump_enabled_p ())
2669 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2670 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2671 vec_inside_cost);
2672 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2673 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2674 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2677 /* Vectorization is profitable if its cost is more than the cost of scalar
2678 version. Note that we err on the vector side for equal cost because
2679 the cost estimate is otherwise quite pessimistic (constant uses are
2680 free on the scalar side but cost a load on the vector side for
2681 example). */
2682 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2683 return false;
2685 return true;
2688 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2689 if so and sets fatal to true if failure is independent of
2690 current_vector_size. */
2692 static bb_vec_info
2693 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2694 gimple_stmt_iterator region_end,
2695 vec<data_reference_p> datarefs, int n_stmts,
2696 bool &fatal)
2698 bb_vec_info bb_vinfo;
2699 slp_instance instance;
2700 int i;
2701 int min_vf = 2;
2703 /* The first group of checks is independent of the vector size. */
2704 fatal = true;
2706 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "not vectorized: too many instructions in "
2711 "basic block.\n");
2712 free_data_refs (datarefs);
2713 return NULL;
2716 bb_vinfo = new_bb_vec_info (region_begin, region_end);
2717 if (!bb_vinfo)
2718 return NULL;
2720 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2722 /* Analyze the data references. */
2724 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2726 if (dump_enabled_p ())
2727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2728 "not vectorized: unhandled data-ref in basic "
2729 "block.\n");
2731 destroy_bb_vec_info (bb_vinfo);
2732 return NULL;
2735 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2737 if (dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2739 "not vectorized: not enough data-refs in "
2740 "basic block.\n");
2742 destroy_bb_vec_info (bb_vinfo);
2743 return NULL;
2746 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2748 if (dump_enabled_p ())
2749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2750 "not vectorized: unhandled data access in "
2751 "basic block.\n");
2753 destroy_bb_vec_info (bb_vinfo);
2754 return NULL;
2757 /* If there are no grouped stores in the region there is no need
2758 to continue with pattern recog as vect_analyze_slp will fail
2759 anyway. */
2760 if (bb_vinfo->grouped_stores.is_empty ())
2762 if (dump_enabled_p ())
2763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2764 "not vectorized: no grouped stores in "
2765 "basic block.\n");
2767 destroy_bb_vec_info (bb_vinfo);
2768 return NULL;
2771 /* While the rest of the analysis below depends on it in some way. */
2772 fatal = false;
2774 vect_pattern_recog (bb_vinfo);
2776 /* Check the SLP opportunities in the basic block, analyze and build SLP
2777 trees. */
2778 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2780 if (dump_enabled_p ())
2782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2783 "Failed to SLP the basic block.\n");
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2785 "not vectorized: failed to find SLP opportunities "
2786 "in basic block.\n");
2789 destroy_bb_vec_info (bb_vinfo);
2790 return NULL;
2793 /* Analyze and verify the alignment of data references and the
2794 dependence in the SLP instances. */
2795 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2797 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2798 || ! vect_slp_analyze_instance_dependence (instance))
2800 dump_printf_loc (MSG_NOTE, vect_location,
2801 "removing SLP instance operations starting from: ");
2802 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2803 SLP_TREE_SCALAR_STMTS
2804 (SLP_INSTANCE_TREE (instance))[0], 0);
2805 vect_free_slp_instance (instance);
2806 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2807 continue;
2810 /* Mark all the statements that we want to vectorize as pure SLP and
2811 relevant. */
2812 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2813 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2815 i++;
2817 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2819 destroy_bb_vec_info (bb_vinfo);
2820 return NULL;
2823 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2824 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2826 if (dump_enabled_p ())
2827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2828 "not vectorized: bad operation in basic block.\n");
2830 destroy_bb_vec_info (bb_vinfo);
2831 return NULL;
2834 /* Cost model: check if the vectorization is worthwhile. */
2835 if (!unlimited_cost_model (NULL)
2836 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 "not vectorized: vectorization is not "
2841 "profitable.\n");
2843 destroy_bb_vec_info (bb_vinfo);
2844 return NULL;
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_NOTE, vect_location,
2849 "Basic block will be vectorized using SLP\n");
2851 return bb_vinfo;
2855 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2856 true if anything in the basic-block was vectorized. */
2858 bool
2859 vect_slp_bb (basic_block bb)
2861 bb_vec_info bb_vinfo;
2862 gimple_stmt_iterator gsi;
2863 unsigned int vector_sizes;
2864 bool any_vectorized = false;
2866 if (dump_enabled_p ())
2867 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2869 /* Autodetect first vector size we try. */
2870 current_vector_size = 0;
2871 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2873 gsi = gsi_start_bb (bb);
2875 while (1)
2877 if (gsi_end_p (gsi))
2878 break;
2880 gimple_stmt_iterator region_begin = gsi;
2881 vec<data_reference_p> datarefs = vNULL;
2882 int insns = 0;
2884 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2886 gimple *stmt = gsi_stmt (gsi);
2887 if (is_gimple_debug (stmt))
2888 continue;
2889 insns++;
2891 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2892 vect_location = gimple_location (stmt);
2894 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2895 break;
2898 /* Skip leading unhandled stmts. */
2899 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2901 gsi_next (&gsi);
2902 continue;
2905 gimple_stmt_iterator region_end = gsi;
2907 bool vectorized = false;
2908 bool fatal = false;
2909 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2910 datarefs, insns, fatal);
2911 if (bb_vinfo
2912 && dbg_cnt (vect_slp))
2914 if (dump_enabled_p ())
2915 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2917 vect_schedule_slp (bb_vinfo);
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE, vect_location,
2921 "basic block part vectorized\n");
2923 destroy_bb_vec_info (bb_vinfo);
2925 vectorized = true;
2927 else
2928 destroy_bb_vec_info (bb_vinfo);
2930 any_vectorized |= vectorized;
2932 vector_sizes &= ~current_vector_size;
2933 if (vectorized
2934 || vector_sizes == 0
2935 || current_vector_size == 0
2936 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2937 vector sizes will fail do not bother iterating. */
2938 || fatal)
2940 if (gsi_end_p (region_end))
2941 break;
2943 /* Skip the unhandled stmt. */
2944 gsi_next (&gsi);
2946 /* And reset vector sizes. */
2947 current_vector_size = 0;
2948 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2950 else
2952 /* Try the next biggest vector size. */
2953 current_vector_size = 1 << floor_log2 (vector_sizes);
2954 if (dump_enabled_p ())
2955 dump_printf_loc (MSG_NOTE, vect_location,
2956 "***** Re-trying analysis with "
2957 "vector size %d\n", current_vector_size);
2959 /* Start over. */
2960 gsi = region_begin;
2964 return any_vectorized;
2968 /* Return 1 if vector type of boolean constant which is OPNUM
2969 operand in statement STMT is a boolean vector. */
2971 static bool
2972 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2974 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2975 enum tree_code code = gimple_expr_code (stmt);
2976 tree op, vectype;
2977 gimple *def_stmt;
2978 enum vect_def_type dt;
2980 /* For comparison and COND_EXPR type is chosen depending
2981 on the other comparison operand. */
2982 if (TREE_CODE_CLASS (code) == tcc_comparison)
2984 if (opnum)
2985 op = gimple_assign_rhs1 (stmt);
2986 else
2987 op = gimple_assign_rhs2 (stmt);
2989 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2990 &dt, &vectype))
2991 gcc_unreachable ();
2993 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2996 if (code == COND_EXPR)
2998 tree cond = gimple_assign_rhs1 (stmt);
3000 if (TREE_CODE (cond) == SSA_NAME)
3001 op = cond;
3002 else if (opnum)
3003 op = TREE_OPERAND (cond, 1);
3004 else
3005 op = TREE_OPERAND (cond, 0);
3007 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3008 &dt, &vectype))
3009 gcc_unreachable ();
3011 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3014 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3018 /* For constant and loop invariant defs of SLP_NODE this function returns
3019 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3020 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3021 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3022 REDUC_INDEX is the index of the reduction operand in the statements, unless
3023 it is -1. */
3025 static void
3026 vect_get_constant_vectors (tree op, slp_tree slp_node,
3027 vec<tree> *vec_oprnds,
3028 unsigned int op_num, unsigned int number_of_vectors)
3030 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3031 gimple *stmt = stmts[0];
3032 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3033 unsigned nunits;
3034 tree vec_cst;
3035 tree *elts;
3036 unsigned j, number_of_places_left_in_vector;
3037 tree vector_type;
3038 tree vop;
3039 int group_size = stmts.length ();
3040 unsigned int vec_num, i;
3041 unsigned number_of_copies = 1;
3042 vec<tree> voprnds;
3043 voprnds.create (number_of_vectors);
3044 bool constant_p, is_store;
3045 tree neutral_op = NULL;
3046 enum tree_code code = gimple_expr_code (stmt);
3047 gimple_seq ctor_seq = NULL;
3049 /* Check if vector type is a boolean vector. */
3050 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3051 && vect_mask_constant_operand_p (stmt, op_num))
3052 vector_type
3053 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3054 else
3055 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3056 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3058 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3060 is_store = true;
3061 op = gimple_assign_rhs1 (stmt);
3063 else
3064 is_store = false;
3066 gcc_assert (op);
3068 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3069 created vectors. It is greater than 1 if unrolling is performed.
3071 For example, we have two scalar operands, s1 and s2 (e.g., group of
3072 strided accesses of size two), while NUNITS is four (i.e., four scalars
3073 of this type can be packed in a vector). The output vector will contain
3074 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3075 will be 2).
3077 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3078 containing the operands.
3080 For example, NUNITS is four as before, and the group size is 8
3081 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3082 {s5, s6, s7, s8}. */
3084 number_of_copies = nunits * number_of_vectors / group_size;
3086 number_of_places_left_in_vector = nunits;
3087 constant_p = true;
3088 elts = XALLOCAVEC (tree, nunits);
3089 bool place_after_defs = false;
3090 for (j = 0; j < number_of_copies; j++)
3092 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3094 if (is_store)
3095 op = gimple_assign_rhs1 (stmt);
3096 else
3098 switch (code)
3100 case COND_EXPR:
3102 tree cond = gimple_assign_rhs1 (stmt);
3103 if (TREE_CODE (cond) == SSA_NAME)
3104 op = gimple_op (stmt, op_num + 1);
3105 else if (op_num == 0 || op_num == 1)
3106 op = TREE_OPERAND (cond, op_num);
3107 else
3109 if (op_num == 2)
3110 op = gimple_assign_rhs2 (stmt);
3111 else
3112 op = gimple_assign_rhs3 (stmt);
3115 break;
3117 case CALL_EXPR:
3118 op = gimple_call_arg (stmt, op_num);
3119 break;
3121 case LSHIFT_EXPR:
3122 case RSHIFT_EXPR:
3123 case LROTATE_EXPR:
3124 case RROTATE_EXPR:
3125 op = gimple_op (stmt, op_num + 1);
3126 /* Unlike the other binary operators, shifts/rotates have
3127 the shift count being int, instead of the same type as
3128 the lhs, so make sure the scalar is the right type if
3129 we are dealing with vectors of
3130 long long/long/short/char. */
3131 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3132 op = fold_convert (TREE_TYPE (vector_type), op);
3133 break;
3135 default:
3136 op = gimple_op (stmt, op_num + 1);
3137 break;
3141 /* Create 'vect_ = {op0,op1,...,opn}'. */
3142 number_of_places_left_in_vector--;
3143 tree orig_op = op;
3144 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3146 if (CONSTANT_CLASS_P (op))
3148 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3150 /* Can't use VIEW_CONVERT_EXPR for booleans because
3151 of possibly different sizes of scalar value and
3152 vector element. */
3153 if (integer_zerop (op))
3154 op = build_int_cst (TREE_TYPE (vector_type), 0);
3155 else if (integer_onep (op))
3156 op = build_all_ones_cst (TREE_TYPE (vector_type));
3157 else
3158 gcc_unreachable ();
3160 else
3161 op = fold_unary (VIEW_CONVERT_EXPR,
3162 TREE_TYPE (vector_type), op);
3163 gcc_assert (op && CONSTANT_CLASS_P (op));
3165 else
3167 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3168 gimple *init_stmt;
3169 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3171 tree true_val
3172 = build_all_ones_cst (TREE_TYPE (vector_type));
3173 tree false_val
3174 = build_zero_cst (TREE_TYPE (vector_type));
3175 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3176 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3177 op, true_val,
3178 false_val);
3180 else
3182 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3183 op);
3184 init_stmt
3185 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3186 op);
3188 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3189 op = new_temp;
3192 elts[number_of_places_left_in_vector] = op;
3193 if (!CONSTANT_CLASS_P (op))
3194 constant_p = false;
3195 if (TREE_CODE (orig_op) == SSA_NAME
3196 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3197 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3198 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3199 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3200 place_after_defs = true;
3202 if (number_of_places_left_in_vector == 0)
3204 if (constant_p)
3205 vec_cst = build_vector (vector_type, elts);
3206 else
3208 vec<constructor_elt, va_gc> *v;
3209 unsigned k;
3210 vec_alloc (v, nunits);
3211 for (k = 0; k < nunits; ++k)
3212 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3213 vec_cst = build_constructor (vector_type, v);
3215 tree init;
3216 gimple_stmt_iterator gsi;
3217 if (place_after_defs)
3219 gsi = gsi_for_stmt
3220 (vect_find_last_scalar_stmt_in_slp (slp_node));
3221 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3223 else
3224 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3225 if (ctor_seq != NULL)
3227 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3228 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3229 GSI_SAME_STMT);
3230 ctor_seq = NULL;
3232 voprnds.quick_push (init);
3233 place_after_defs = false;
3234 number_of_places_left_in_vector = nunits;
3235 constant_p = true;
3240 /* Since the vectors are created in the reverse order, we should invert
3241 them. */
3242 vec_num = voprnds.length ();
3243 for (j = vec_num; j != 0; j--)
3245 vop = voprnds[j - 1];
3246 vec_oprnds->quick_push (vop);
3249 voprnds.release ();
3251 /* In case that VF is greater than the unrolling factor needed for the SLP
3252 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3253 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3254 to replicate the vectors. */
3255 while (number_of_vectors > vec_oprnds->length ())
3257 tree neutral_vec = NULL;
3259 if (neutral_op)
3261 if (!neutral_vec)
3262 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3264 vec_oprnds->quick_push (neutral_vec);
3266 else
3268 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3269 vec_oprnds->quick_push (vop);
3275 /* Get vectorized definitions from SLP_NODE that contains corresponding
3276 vectorized def-stmts. */
3278 static void
3279 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3281 tree vec_oprnd;
3282 gimple *vec_def_stmt;
3283 unsigned int i;
3285 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3287 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3289 gcc_assert (vec_def_stmt);
3290 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3291 vec_oprnd = gimple_phi_result (vec_def_stmt);
3292 else
3293 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3294 vec_oprnds->quick_push (vec_oprnd);
3299 /* Get vectorized definitions for SLP_NODE.
3300 If the scalar definitions are loop invariants or constants, collect them and
3301 call vect_get_constant_vectors() to create vector stmts.
3302 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3303 must be stored in the corresponding child of SLP_NODE, and we call
3304 vect_get_slp_vect_defs () to retrieve them. */
3306 void
3307 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3308 vec<vec<tree> > *vec_oprnds)
3310 gimple *first_stmt;
3311 int number_of_vects = 0, i;
3312 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3313 slp_tree child = NULL;
3314 vec<tree> vec_defs;
3315 tree oprnd;
3316 bool first_iteration = true;
3318 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3319 FOR_EACH_VEC_ELT (ops, i, oprnd)
3321 bool vectorized_defs = false;
3323 if (oprnd == NULL)
3325 vec_defs = vNULL;
3326 vec_defs.create (0);
3327 vec_oprnds->quick_push (vec_defs);
3328 continue;
3331 /* For each operand we check if it has vectorized definitions in a child
3332 node or we need to create them (for invariants and constants). We
3333 check if the LHS of the first stmt of the next child matches OPRND.
3334 If it does, we found the correct child. Otherwise, we call
3335 vect_get_constant_vectors (). */
3336 for (unsigned int child_index = 0;
3337 child_index < SLP_TREE_CHILDREN (slp_node).length (); child_index++)
3339 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3341 /* We have to check both pattern and original def, if available. */
3342 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3344 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3345 gimple *related
3346 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3347 tree first_def_op;
3349 if (gimple_code (first_def) == GIMPLE_PHI)
3350 first_def_op = gimple_phi_result (first_def);
3351 else
3352 first_def_op = gimple_get_lhs (first_def);
3353 if (operand_equal_p (oprnd, first_def_op, 0)
3354 || (related
3355 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3357 /* The number of vector defs is determined by the number of
3358 vector statements in the node from which we get those
3359 statements. */
3360 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3361 vectorized_defs = true;
3362 break;
3367 if (!vectorized_defs && first_iteration)
3369 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3370 /* Number of vector stmts was calculated according to LHS in
3371 vect_schedule_slp_instance (), fix it by replacing LHS with
3372 RHS, if necessary. See vect_get_smallest_scalar_type () for
3373 details. */
3374 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3375 &rhs_size_unit);
3376 if (rhs_size_unit != lhs_size_unit)
3378 number_of_vects *= rhs_size_unit;
3379 number_of_vects /= lhs_size_unit;
3383 /* Allocate memory for vectorized defs. */
3384 vec_defs = vNULL;
3385 vec_defs.create (number_of_vects);
3387 /* For reduction defs we call vect_get_constant_vectors (), since we are
3388 looking for initial loop invariant values. */
3389 if (vectorized_defs)
3390 /* The defs are already vectorized. */
3391 vect_get_slp_vect_defs (child, &vec_defs);
3392 else
3393 /* Build vectors from scalar defs. */
3394 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3395 number_of_vects);
3397 vec_oprnds->quick_push (vec_defs);
3399 first_iteration = false;
3403 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3404 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3405 permute statements for the SLP node NODE of the SLP instance
3406 SLP_NODE_INSTANCE. */
3408 bool
3409 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3410 gimple_stmt_iterator *gsi, int vf,
3411 slp_instance slp_node_instance, bool analyze_only,
3412 unsigned *n_perms)
3414 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3416 tree mask_element_type = NULL_TREE, mask_type;
3417 int nunits, vec_index = 0;
3418 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3419 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3420 int mask_element;
3421 unsigned char *mask;
3422 machine_mode mode;
3424 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3425 return false;
3427 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3429 mode = TYPE_MODE (vectype);
3431 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3432 same size as the vector element being permuted. */
3433 mask_element_type = lang_hooks.types.type_for_mode
3434 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3435 mask_type = get_vectype_for_scalar_type (mask_element_type);
3436 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3437 mask = XALLOCAVEC (unsigned char, nunits);
3439 /* Initialize the vect stmts of NODE to properly insert the generated
3440 stmts later. */
3441 if (! analyze_only)
3442 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3443 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3444 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3446 /* Generate permutation masks for every NODE. Number of masks for each NODE
3447 is equal to GROUP_SIZE.
3448 E.g., we have a group of three nodes with three loads from the same
3449 location in each node, and the vector size is 4. I.e., we have a
3450 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3451 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3452 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3455 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3456 The last mask is illegal since we assume two operands for permute
3457 operation, and the mask element values can't be outside that range.
3458 Hence, the last mask must be converted into {2,5,5,5}.
3459 For the first two permutations we need the first and the second input
3460 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3461 we need the second and the third vectors: {b1,c1,a2,b2} and
3462 {c2,a3,b3,c3}. */
3464 int vect_stmts_counter = 0;
3465 int index = 0;
3466 int first_vec_index = -1;
3467 int second_vec_index = -1;
3468 bool noop_p = true;
3469 *n_perms = 0;
3471 for (int j = 0; j < vf; j++)
3473 for (int k = 0; k < group_size; k++)
3475 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3476 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3477 vec_index = i / nunits;
3478 mask_element = i % nunits;
3479 if (vec_index == first_vec_index
3480 || first_vec_index == -1)
3482 first_vec_index = vec_index;
3484 else if (vec_index == second_vec_index
3485 || second_vec_index == -1)
3487 second_vec_index = vec_index;
3488 mask_element += nunits;
3490 else
3492 if (dump_enabled_p ())
3494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3495 "permutation requires at "
3496 "least three vectors ");
3497 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3498 stmt, 0);
3500 return false;
3503 gcc_assert (mask_element >= 0
3504 && mask_element < 2 * nunits);
3505 if (mask_element != index)
3506 noop_p = false;
3507 mask[index++] = mask_element;
3509 if (index == nunits)
3511 if (! noop_p
3512 && ! can_vec_perm_p (mode, false, mask))
3514 if (dump_enabled_p ())
3516 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3517 vect_location,
3518 "unsupported vect permute { ");
3519 for (i = 0; i < nunits; ++i)
3520 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3521 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3523 return false;
3526 if (! noop_p)
3527 ++*n_perms;
3529 if (!analyze_only)
3531 tree mask_vec = NULL_TREE;
3533 if (! noop_p)
3535 tree *mask_elts = XALLOCAVEC (tree, nunits);
3536 for (int l = 0; l < nunits; ++l)
3537 mask_elts[l] = build_int_cst (mask_element_type,
3538 mask[l]);
3539 mask_vec = build_vector (mask_type, mask_elts);
3542 if (second_vec_index == -1)
3543 second_vec_index = first_vec_index;
3545 /* Generate the permute statement if necessary. */
3546 tree first_vec = dr_chain[first_vec_index];
3547 tree second_vec = dr_chain[second_vec_index];
3548 gimple *perm_stmt;
3549 if (! noop_p)
3551 tree perm_dest
3552 = vect_create_destination_var (gimple_assign_lhs (stmt),
3553 vectype);
3554 perm_dest = make_ssa_name (perm_dest);
3555 perm_stmt = gimple_build_assign (perm_dest,
3556 VEC_PERM_EXPR,
3557 first_vec, second_vec,
3558 mask_vec);
3559 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3561 else
3562 /* If mask was NULL_TREE generate the requested
3563 identity transform. */
3564 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3566 /* Store the vector statement in NODE. */
3567 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3570 index = 0;
3571 first_vec_index = -1;
3572 second_vec_index = -1;
3573 noop_p = true;
3578 return true;
3583 /* Vectorize SLP instance tree in postorder. */
3585 static bool
3586 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3587 unsigned int vectorization_factor)
3589 gimple *stmt;
3590 bool grouped_store, is_store;
3591 gimple_stmt_iterator si;
3592 stmt_vec_info stmt_info;
3593 unsigned int vec_stmts_size, nunits, group_size;
3594 tree vectype;
3595 int i, j;
3596 slp_tree child;
3598 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3599 return false;
3601 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3602 vect_schedule_slp_instance (child, instance, vectorization_factor);
3604 /* Push SLP node def-type to stmts. */
3605 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3606 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3607 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3608 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3610 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3611 stmt_info = vinfo_for_stmt (stmt);
3613 /* VECTYPE is the type of the destination. */
3614 vectype = STMT_VINFO_VECTYPE (stmt_info);
3615 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3616 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3618 /* For each SLP instance calculate number of vector stmts to be created
3619 for the scalar stmts in each node of the SLP tree. Number of vector
3620 elements in one vector iteration is the number of scalar elements in
3621 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3622 size.
3623 Unless this is a SLP reduction in which case the number of vector
3624 stmts is equal to the number of vector stmts of the children. */
3625 if (GROUP_FIRST_ELEMENT (stmt_info)
3626 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3627 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3628 else
3629 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3631 if (!SLP_TREE_VEC_STMTS (node).exists ())
3633 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3634 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3637 if (dump_enabled_p ())
3639 dump_printf_loc (MSG_NOTE,vect_location,
3640 "------>vectorizing SLP node starting from: ");
3641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3644 /* Vectorized stmts go before the last scalar stmt which is where
3645 all uses are ready. */
3646 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3648 /* Mark the first element of the reduction chain as reduction to properly
3649 transform the node. In the analysis phase only the last element of the
3650 chain is marked as reduction. */
3651 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3652 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3654 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3655 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3658 /* Handle two-operation SLP nodes by vectorizing the group with
3659 both operations and then performing a merge. */
3660 if (SLP_TREE_TWO_OPERATORS (node))
3662 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3663 enum tree_code ocode = ERROR_MARK;
3664 gimple *ostmt;
3665 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3666 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3667 if (gimple_assign_rhs_code (ostmt) != code0)
3669 mask[i] = 1;
3670 ocode = gimple_assign_rhs_code (ostmt);
3672 else
3673 mask[i] = 0;
3674 if (ocode != ERROR_MARK)
3676 vec<gimple *> v0;
3677 vec<gimple *> v1;
3678 unsigned j;
3679 tree tmask = NULL_TREE;
3680 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3681 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3682 SLP_TREE_VEC_STMTS (node).truncate (0);
3683 gimple_assign_set_rhs_code (stmt, ocode);
3684 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3685 gimple_assign_set_rhs_code (stmt, code0);
3686 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3687 SLP_TREE_VEC_STMTS (node).truncate (0);
3688 tree meltype = build_nonstandard_integer_type
3689 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3690 tree mvectype = get_same_sized_vectype (meltype, vectype);
3691 unsigned k = 0, l;
3692 for (j = 0; j < v0.length (); ++j)
3694 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3695 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3697 if (k >= group_size)
3698 k = 0;
3699 melts[l] = build_int_cst
3700 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3702 tmask = build_vector (mvectype, melts);
3704 /* ??? Not all targets support a VEC_PERM_EXPR with a
3705 constant mask that would translate to a vec_merge RTX
3706 (with their vec_perm_const_ok). We can either not
3707 vectorize in that case or let veclower do its job.
3708 Unfortunately that isn't too great and at least for
3709 plus/minus we'd eventually like to match targets
3710 vector addsub instructions. */
3711 gimple *vstmt;
3712 vstmt = gimple_build_assign (make_ssa_name (vectype),
3713 VEC_PERM_EXPR,
3714 gimple_assign_lhs (v0[j]),
3715 gimple_assign_lhs (v1[j]), tmask);
3716 vect_finish_stmt_generation (stmt, vstmt, &si);
3717 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3719 v0.release ();
3720 v1.release ();
3721 return false;
3724 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3726 /* Restore stmt def-types. */
3727 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3728 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3729 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3730 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3732 return is_store;
3735 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3736 For loop vectorization this is done in vectorizable_call, but for SLP
3737 it needs to be deferred until end of vect_schedule_slp, because multiple
3738 SLP instances may refer to the same scalar stmt. */
3740 static void
3741 vect_remove_slp_scalar_calls (slp_tree node)
3743 gimple *stmt, *new_stmt;
3744 gimple_stmt_iterator gsi;
3745 int i;
3746 slp_tree child;
3747 tree lhs;
3748 stmt_vec_info stmt_info;
3750 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3751 return;
3753 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3754 vect_remove_slp_scalar_calls (child);
3756 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3758 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3759 continue;
3760 stmt_info = vinfo_for_stmt (stmt);
3761 if (stmt_info == NULL
3762 || is_pattern_stmt_p (stmt_info)
3763 || !PURE_SLP_STMT (stmt_info))
3764 continue;
3765 lhs = gimple_call_lhs (stmt);
3766 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3767 set_vinfo_for_stmt (new_stmt, stmt_info);
3768 set_vinfo_for_stmt (stmt, NULL);
3769 STMT_VINFO_STMT (stmt_info) = new_stmt;
3770 gsi = gsi_for_stmt (stmt);
3771 gsi_replace (&gsi, new_stmt, false);
3772 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3776 /* Generate vector code for all SLP instances in the loop/basic block. */
3778 bool
3779 vect_schedule_slp (vec_info *vinfo)
3781 vec<slp_instance> slp_instances;
3782 slp_instance instance;
3783 unsigned int i, vf;
3784 bool is_store = false;
3786 slp_instances = vinfo->slp_instances;
3787 if (is_a <loop_vec_info> (vinfo))
3788 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3789 else
3790 vf = 1;
3792 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3794 /* Schedule the tree of INSTANCE. */
3795 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3796 instance, vf);
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_NOTE, vect_location,
3799 "vectorizing stmts using SLP.\n");
3802 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3804 slp_tree root = SLP_INSTANCE_TREE (instance);
3805 gimple *store;
3806 unsigned int j;
3807 gimple_stmt_iterator gsi;
3809 /* Remove scalar call stmts. Do not do this for basic-block
3810 vectorization as not all uses may be vectorized.
3811 ??? Why should this be necessary? DCE should be able to
3812 remove the stmts itself.
3813 ??? For BB vectorization we can as well remove scalar
3814 stmts starting from the SLP tree root if they have no
3815 uses. */
3816 if (is_a <loop_vec_info> (vinfo))
3817 vect_remove_slp_scalar_calls (root);
3819 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3820 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3822 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3823 break;
3825 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3826 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3827 /* Free the attached stmt_vec_info and remove the stmt. */
3828 gsi = gsi_for_stmt (store);
3829 unlink_stmt_vdef (store);
3830 gsi_remove (&gsi, true);
3831 release_defs (store);
3832 free_stmt_vec_info (store);
3836 return is_store;