bswap-2.c: Require int32plus.
[official-gcc.git] / gcc / tree-vect-slp.c
blob0239e12d4829ec29192d8f178678ee78e3dad460
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48 static void
49 vect_free_slp_tree (slp_tree node)
51 int i;
52 slp_tree child;
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child);
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release ();
71 free (node);
75 /* Free the memory allocated for the SLP instance. */
77 void
78 vect_free_slp_instance (slp_instance instance)
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance);
86 /* Create an SLP node for SCALAR_STMTS. */
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91 slp_tree node;
92 gimple *stmt = scalar_stmts[0];
93 unsigned int nops;
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
103 else
104 return NULL;
106 node = XNEW (struct _slp_tree);
107 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
108 SLP_TREE_VEC_STMTS (node).create (0);
109 SLP_TREE_CHILDREN (node).create (nops);
110 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
111 SLP_TREE_TWO_OPERATORS (node) = false;
112 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
114 unsigned i;
115 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
116 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
118 return node;
122 /* This structure is used in creation of an SLP tree. Each instance
123 corresponds to the same operand in a group of scalar stmts in an SLP
124 node. */
125 typedef struct _slp_oprnd_info
127 /* Def-stmts for the operands. */
128 vec<gimple *> def_stmts;
129 /* Information about the first statement, its vector def-type, type, the
130 operand itself in case it's constant, and an indication if it's a pattern
131 stmt. */
132 enum vect_def_type first_dt;
133 tree first_op_type;
134 bool first_pattern;
135 bool second_pattern;
136 } *slp_oprnd_info;
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140 operand. */
141 static vec<slp_oprnd_info>
142 vect_create_oprnd_info (int nops, int group_size)
144 int i;
145 slp_oprnd_info oprnd_info;
146 vec<slp_oprnd_info> oprnds_info;
148 oprnds_info.create (nops);
149 for (i = 0; i < nops; i++)
151 oprnd_info = XNEW (struct _slp_oprnd_info);
152 oprnd_info->def_stmts.create (group_size);
153 oprnd_info->first_dt = vect_uninitialized_def;
154 oprnd_info->first_op_type = NULL_TREE;
155 oprnd_info->first_pattern = false;
156 oprnd_info->second_pattern = false;
157 oprnds_info.quick_push (oprnd_info);
160 return oprnds_info;
164 /* Free operands info. */
166 static void
167 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
169 int i;
170 slp_oprnd_info oprnd_info;
172 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
174 oprnd_info->def_stmts.release ();
175 XDELETE (oprnd_info);
178 oprnds_info.release ();
182 /* Find the place of the data-ref in STMT in the interleaving chain that starts
183 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
185 static int
186 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
188 gimple *next_stmt = first_stmt;
189 int result = 0;
191 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
192 return -1;
196 if (next_stmt == stmt)
197 return result;
198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199 if (next_stmt)
200 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
202 while (next_stmt);
204 return -1;
208 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
209 they are of a valid type and that they match the defs of the first stmt of
210 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
211 return -1, if the error could be corrected by swapping operands of the
212 operation return 1, if everything is ok return 0. */
214 static int
215 vect_get_and_check_slp_defs (vec_info *vinfo,
216 gimple *stmt, unsigned stmt_num,
217 vec<slp_oprnd_info> *oprnds_info)
219 tree oprnd;
220 unsigned int i, number_of_oprnds;
221 gimple *def_stmt;
222 enum vect_def_type dt = vect_uninitialized_def;
223 bool pattern = false;
224 slp_oprnd_info oprnd_info;
225 int first_op_idx = 1;
226 bool commutative = false;
227 bool first_op_cond = false;
228 bool first = stmt_num == 0;
229 bool second = stmt_num == 1;
231 if (is_gimple_call (stmt))
233 number_of_oprnds = gimple_call_num_args (stmt);
234 first_op_idx = 3;
236 else if (is_gimple_assign (stmt))
238 enum tree_code code = gimple_assign_rhs_code (stmt);
239 number_of_oprnds = gimple_num_ops (stmt) - 1;
240 if (gimple_assign_rhs_code (stmt) == COND_EXPR
241 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
243 first_op_cond = true;
244 commutative = true;
245 number_of_oprnds++;
247 else
248 commutative = commutative_tree_code (code);
250 else
251 return -1;
253 bool swapped = false;
254 for (i = 0; i < number_of_oprnds; i++)
256 again:
257 if (first_op_cond)
259 if (i == 0 || i == 1)
260 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261 swapped ? !i : i);
262 else
263 oprnd = gimple_op (stmt, first_op_idx + i - 1);
265 else
266 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
268 oprnd_info = (*oprnds_info)[i];
270 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
272 if (dump_enabled_p ())
274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
275 "Build SLP failed: can't analyze def for ");
276 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
277 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
280 return -1;
283 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
284 from the pattern. Check that all the stmts of the node are in the
285 pattern. */
286 if (def_stmt && gimple_bb (def_stmt)
287 && vect_stmt_in_region_p (vinfo, def_stmt)
288 && vinfo_for_stmt (def_stmt)
289 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
290 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
291 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
293 pattern = true;
294 if (!first && !oprnd_info->first_pattern
295 /* Allow different pattern state for the defs of the
296 first stmt in reduction chains. */
297 && (oprnd_info->first_dt != vect_reduction_def
298 || (!second && !oprnd_info->second_pattern)))
300 if (i == 0
301 && !swapped
302 && commutative)
304 swapped = true;
305 goto again;
308 if (dump_enabled_p ())
310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311 "Build SLP failed: some of the stmts"
312 " are in a pattern, and others are not ");
313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
317 return 1;
320 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
321 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
323 if (dt == vect_unknown_def_type)
325 if (dump_enabled_p ())
326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 "Unsupported pattern.\n");
328 return -1;
331 switch (gimple_code (def_stmt))
333 case GIMPLE_PHI:
334 case GIMPLE_ASSIGN:
335 break;
337 default:
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "unsupported defining stmt:\n");
341 return -1;
345 if (second)
346 oprnd_info->second_pattern = pattern;
348 if (first)
350 oprnd_info->first_dt = dt;
351 oprnd_info->first_pattern = pattern;
352 oprnd_info->first_op_type = TREE_TYPE (oprnd);
354 else
356 /* Not first stmt of the group, check that the def-stmt/s match
357 the def-stmt/s of the first stmt. Allow different definition
358 types for reduction chains: the first stmt must be a
359 vect_reduction_def (a phi node), and the rest
360 vect_internal_def. */
361 if (((oprnd_info->first_dt != dt
362 && !(oprnd_info->first_dt == vect_reduction_def
363 && dt == vect_internal_def)
364 && !((oprnd_info->first_dt == vect_external_def
365 || oprnd_info->first_dt == vect_constant_def)
366 && (dt == vect_external_def
367 || dt == vect_constant_def)))
368 || !types_compatible_p (oprnd_info->first_op_type,
369 TREE_TYPE (oprnd))))
371 /* Try swapping operands if we got a mismatch. */
372 if (i == 0
373 && !swapped
374 && commutative)
376 swapped = true;
377 goto again;
380 if (dump_enabled_p ())
381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
382 "Build SLP failed: different types\n");
384 return 1;
388 /* Check the types of the definitions. */
389 switch (dt)
391 case vect_constant_def:
392 case vect_external_def:
393 case vect_reduction_def:
394 break;
396 case vect_internal_def:
397 oprnd_info->def_stmts.quick_push (def_stmt);
398 break;
400 default:
401 /* FORNOW: Not supported. */
402 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Build SLP failed: illegal type of def ");
406 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
407 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
410 return -1;
414 /* Swap operands. */
415 if (swapped)
417 /* If there are already uses of this stmt in a SLP instance then
418 we've committed to the operand order and can't swap it. */
419 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
421 if (dump_enabled_p ())
423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424 "Build SLP failed: cannot swap operands of "
425 "shared stmt ");
426 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
428 return -1;
431 if (first_op_cond)
433 tree cond = gimple_assign_rhs1 (stmt);
434 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
435 &TREE_OPERAND (cond, 1));
436 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
438 else
439 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
440 gimple_assign_rhs2_ptr (stmt));
441 if (dump_enabled_p ())
443 dump_printf_loc (MSG_NOTE, vect_location,
444 "swapped operands to match def types in ");
445 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
449 return 0;
453 /* Verify if the scalar stmts STMTS are isomorphic, require data
454 permutation or are of unsupported types of operation. Return
455 true if they are, otherwise return false and indicate in *MATCHES
456 which stmts are not isomorphic to the first one. If MATCHES[0]
457 is false then this indicates the comparison could not be
458 carried out or the stmts will never be vectorized by SLP. */
460 static bool
461 vect_build_slp_tree_1 (vec_info *vinfo,
462 vec<gimple *> stmts, unsigned int group_size,
463 unsigned nops, unsigned int *max_nunits,
464 bool *matches, bool *two_operators)
466 unsigned int i;
467 gimple *first_stmt = stmts[0], *stmt = stmts[0];
468 enum tree_code first_stmt_code = ERROR_MARK;
469 enum tree_code alt_stmt_code = ERROR_MARK;
470 enum tree_code rhs_code = ERROR_MARK;
471 enum tree_code first_cond_code = ERROR_MARK;
472 tree lhs;
473 bool need_same_oprnds = false;
474 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
475 optab optab;
476 int icode;
477 machine_mode optab_op2_mode;
478 machine_mode vec_mode;
479 HOST_WIDE_INT dummy;
480 gimple *first_load = NULL, *prev_first_load = NULL;
482 /* For every stmt in NODE find its def stmt/s. */
483 FOR_EACH_VEC_ELT (stmts, i, stmt)
485 matches[i] = false;
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
490 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
493 /* Fail to vectorize statements marked as unvectorizable. */
494 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
496 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "Build SLP failed: unvectorizable statement ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
502 /* Fatal mismatch. */
503 matches[0] = false;
504 return false;
507 lhs = gimple_get_lhs (stmt);
508 if (lhs == NULL_TREE)
510 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: not GIMPLE_ASSIGN nor "
514 "GIMPLE_CALL ");
515 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 /* Fatal mismatch. */
518 matches[0] = false;
519 return false;
522 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
523 vectype = get_vectype_for_scalar_type (scalar_type);
524 if (!vectype)
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: unsupported data-type ");
530 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
531 scalar_type);
532 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
534 /* Fatal mismatch. */
535 matches[0] = false;
536 return false;
539 /* If populating the vector type requires unrolling then fail
540 before adjusting *max_nunits for basic-block vectorization. */
541 if (is_a <bb_vec_info> (vinfo)
542 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
545 "Build SLP failed: unrolling required "
546 "in basic block SLP\n");
547 /* Fatal mismatch. */
548 matches[0] = false;
549 return false;
552 /* In case of multiple types we need to detect the smallest type. */
553 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
554 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
556 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
558 rhs_code = CALL_EXPR;
559 if (gimple_call_internal_p (call_stmt)
560 || gimple_call_tail_p (call_stmt)
561 || gimple_call_noreturn_p (call_stmt)
562 || !gimple_call_nothrow_p (call_stmt)
563 || gimple_call_chain (call_stmt))
565 if (dump_enabled_p ())
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported call type ");
569 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 call_stmt, 0);
572 /* Fatal mismatch. */
573 matches[0] = false;
574 return false;
577 else
578 rhs_code = gimple_assign_rhs_code (stmt);
580 /* Check the operation. */
581 if (i == 0)
583 first_stmt_code = rhs_code;
585 /* Shift arguments should be equal in all the packed stmts for a
586 vector shift with scalar shift operand. */
587 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
588 || rhs_code == LROTATE_EXPR
589 || rhs_code == RROTATE_EXPR)
591 vec_mode = TYPE_MODE (vectype);
593 /* First see if we have a vector/vector shift. */
594 optab = optab_for_tree_code (rhs_code, vectype,
595 optab_vector);
597 if (!optab
598 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
600 /* No vector/vector shift, try for a vector/scalar shift. */
601 optab = optab_for_tree_code (rhs_code, vectype,
602 optab_scalar);
604 if (!optab)
606 if (dump_enabled_p ())
607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
608 "Build SLP failed: no optab.\n");
609 /* Fatal mismatch. */
610 matches[0] = false;
611 return false;
613 icode = (int) optab_handler (optab, vec_mode);
614 if (icode == CODE_FOR_nothing)
616 if (dump_enabled_p ())
617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
618 "Build SLP failed: "
619 "op not supported by target.\n");
620 /* Fatal mismatch. */
621 matches[0] = false;
622 return false;
624 optab_op2_mode = insn_data[icode].operand[2].mode;
625 if (!VECTOR_MODE_P (optab_op2_mode))
627 need_same_oprnds = true;
628 first_op1 = gimple_assign_rhs2 (stmt);
632 else if (rhs_code == WIDEN_LSHIFT_EXPR)
634 need_same_oprnds = true;
635 first_op1 = gimple_assign_rhs2 (stmt);
638 else
640 if (first_stmt_code != rhs_code
641 && alt_stmt_code == ERROR_MARK)
642 alt_stmt_code = rhs_code;
643 if (first_stmt_code != rhs_code
644 && (first_stmt_code != IMAGPART_EXPR
645 || rhs_code != REALPART_EXPR)
646 && (first_stmt_code != REALPART_EXPR
647 || rhs_code != IMAGPART_EXPR)
648 /* Handle mismatches in plus/minus by computing both
649 and merging the results. */
650 && !((first_stmt_code == PLUS_EXPR
651 || first_stmt_code == MINUS_EXPR)
652 && (alt_stmt_code == PLUS_EXPR
653 || alt_stmt_code == MINUS_EXPR)
654 && rhs_code == alt_stmt_code)
655 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
656 && (first_stmt_code == ARRAY_REF
657 || first_stmt_code == BIT_FIELD_REF
658 || first_stmt_code == INDIRECT_REF
659 || first_stmt_code == COMPONENT_REF
660 || first_stmt_code == MEM_REF)))
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "Build SLP failed: different operation "
666 "in stmt ");
667 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
669 "original stmt ");
670 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
671 first_stmt, 0);
673 /* Mismatch. */
674 continue;
677 if (need_same_oprnds
678 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: different shift "
684 "arguments in ");
685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
687 /* Mismatch. */
688 continue;
691 if (rhs_code == CALL_EXPR)
693 gimple *first_stmt = stmts[0];
694 if (gimple_call_num_args (stmt) != nops
695 || !operand_equal_p (gimple_call_fn (first_stmt),
696 gimple_call_fn (stmt), 0)
697 || gimple_call_fntype (first_stmt)
698 != gimple_call_fntype (stmt))
700 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "Build SLP failed: different calls in ");
704 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
705 stmt, 0);
707 /* Mismatch. */
708 continue;
713 /* Grouped store or load. */
714 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
716 if (REFERENCE_CLASS_P (lhs))
718 /* Store. */
721 else
723 /* Load. */
724 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
725 if (prev_first_load)
727 /* Check that there are no loads from different interleaving
728 chains in the same node. */
729 if (prev_first_load != first_load)
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
734 vect_location,
735 "Build SLP failed: different "
736 "interleaving chains in one node ");
737 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
738 stmt, 0);
740 /* Mismatch. */
741 continue;
744 else
745 prev_first_load = first_load;
747 } /* Grouped access. */
748 else
750 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
752 /* Not grouped load. */
753 if (dump_enabled_p ())
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
756 "Build SLP failed: not grouped load ");
757 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
760 /* FORNOW: Not grouped loads are not supported. */
761 /* Fatal mismatch. */
762 matches[0] = false;
763 return false;
766 /* Not memory operation. */
767 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
768 && TREE_CODE_CLASS (rhs_code) != tcc_unary
769 && TREE_CODE_CLASS (rhs_code) != tcc_expression
770 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
771 && rhs_code != CALL_EXPR)
773 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: operation");
777 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
780 /* Fatal mismatch. */
781 matches[0] = false;
782 return false;
785 if (rhs_code == COND_EXPR)
787 tree cond_expr = gimple_assign_rhs1 (stmt);
789 if (i == 0)
790 first_cond_code = TREE_CODE (cond_expr);
791 else if (first_cond_code != TREE_CODE (cond_expr))
793 if (dump_enabled_p ())
795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796 "Build SLP failed: different"
797 " operation");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
801 /* Mismatch. */
802 continue;
807 matches[i] = true;
810 for (i = 0; i < group_size; ++i)
811 if (!matches[i])
812 return false;
814 /* If we allowed a two-operation SLP node verify the target can cope
815 with the permute we are going to use. */
816 if (alt_stmt_code != ERROR_MARK
817 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
819 unsigned char *sel
820 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
821 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
823 sel[i] = i;
824 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
825 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
827 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
829 for (i = 0; i < group_size; ++i)
830 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
832 matches[i] = false;
833 if (dump_enabled_p ())
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
836 "Build SLP failed: different operation "
837 "in stmt ");
838 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
839 stmts[i], 0);
840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
841 "original stmt ");
842 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
843 first_stmt, 0);
846 return false;
848 *two_operators = true;
851 return true;
854 /* Recursively build an SLP tree starting from NODE.
855 Fail (and return a value not equal to zero) if def-stmts are not
856 isomorphic, require data permutation or are of unsupported types of
857 operation. Otherwise, return 0.
858 The value returned is the depth in the SLP tree where a mismatch
859 was found. */
861 static slp_tree
862 vect_build_slp_tree (vec_info *vinfo,
863 vec<gimple *> stmts, unsigned int group_size,
864 unsigned int *max_nunits,
865 vec<slp_tree> *loads,
866 bool *matches, unsigned *npermutes, unsigned *tree_size,
867 unsigned max_tree_size)
869 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
870 gimple *stmt;
871 slp_tree node;
873 matches[0] = false;
875 stmt = stmts[0];
876 if (is_gimple_call (stmt))
877 nops = gimple_call_num_args (stmt);
878 else if (is_gimple_assign (stmt))
880 nops = gimple_num_ops (stmt) - 1;
881 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
882 nops++;
884 else
885 return NULL;
887 bool two_operators = false;
888 if (!vect_build_slp_tree_1 (vinfo,
889 stmts, group_size, nops,
890 &this_max_nunits, matches, &two_operators))
891 return NULL;
893 /* If the SLP node is a load, terminate the recursion. */
894 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
895 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
897 *max_nunits = this_max_nunits;
898 node = vect_create_new_slp_node (stmts);
899 loads->safe_push (node);
900 return node;
903 /* Get at the operands, verifying they are compatible. */
904 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
905 slp_oprnd_info oprnd_info;
906 FOR_EACH_VEC_ELT (stmts, i, stmt)
908 switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
910 case 0:
911 break;
912 case -1:
913 matches[0] = false;
914 vect_free_oprnd_info (oprnds_info);
915 return NULL;
916 case 1:
917 matches[i] = false;
918 break;
921 for (i = 0; i < group_size; ++i)
922 if (!matches[i])
924 vect_free_oprnd_info (oprnds_info);
925 return NULL;
928 auto_vec<slp_tree, 4> children;
929 auto_vec<slp_tree> this_loads;
931 stmt = stmts[0];
933 /* Create SLP_TREE nodes for the definition node/s. */
934 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
936 slp_tree child;
937 unsigned old_nloads = this_loads.length ();
938 unsigned old_tree_size = this_tree_size;
939 unsigned int j;
941 if (oprnd_info->first_dt != vect_internal_def)
942 continue;
944 if (++this_tree_size > max_tree_size)
946 FOR_EACH_VEC_ELT (children, j, child)
947 vect_free_slp_tree (child);
948 vect_free_oprnd_info (oprnds_info);
949 return NULL;
952 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
953 group_size, &this_max_nunits,
954 &this_loads, matches, npermutes,
955 &this_tree_size,
956 max_tree_size)) != NULL)
958 /* If we have all children of child built up from scalars then just
959 throw that away and build it up this node from scalars. */
960 if (!SLP_TREE_CHILDREN (child).is_empty ()
961 /* ??? Rejecting patterns this way doesn't work. We'd have to
962 do extra work to cancel the pattern so the uses see the
963 scalar version. */
964 && !is_pattern_stmt_p
965 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
967 slp_tree grandchild;
969 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
970 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
971 break;
972 if (!grandchild)
974 /* Roll back. */
975 this_loads.truncate (old_nloads);
976 this_tree_size = old_tree_size;
977 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
978 vect_free_slp_tree (grandchild);
979 SLP_TREE_CHILDREN (child).truncate (0);
981 dump_printf_loc (MSG_NOTE, vect_location,
982 "Building parent vector operands from "
983 "scalars instead\n");
984 oprnd_info->def_stmts = vNULL;
985 SLP_TREE_DEF_TYPE (child) = vect_external_def;
986 children.safe_push (child);
987 continue;
991 oprnd_info->def_stmts = vNULL;
992 children.safe_push (child);
993 continue;
996 /* If the SLP build failed fatally and we analyze a basic-block
997 simply treat nodes we fail to build as externally defined
998 (and thus build vectors from the scalar defs).
999 The cost model will reject outright expensive cases.
1000 ??? This doesn't treat cases where permutation ultimatively
1001 fails (or we don't try permutation below). Ideally we'd
1002 even compute a permutation that will end up with the maximum
1003 SLP tree size... */
1004 if (is_a <bb_vec_info> (vinfo)
1005 && !matches[0]
1006 /* ??? Rejecting patterns this way doesn't work. We'd have to
1007 do extra work to cancel the pattern so the uses see the
1008 scalar version. */
1009 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "Building vector operands from scalars\n");
1013 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1014 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1015 children.safe_push (child);
1016 oprnd_info->def_stmts = vNULL;
1017 continue;
1020 /* If the SLP build for operand zero failed and operand zero
1021 and one can be commutated try that for the scalar stmts
1022 that failed the match. */
1023 if (i == 0
1024 /* A first scalar stmt mismatch signals a fatal mismatch. */
1025 && matches[0]
1026 /* ??? For COND_EXPRs we can swap the comparison operands
1027 as well as the arms under some constraints. */
1028 && nops == 2
1029 && oprnds_info[1]->first_dt == vect_internal_def
1030 && is_gimple_assign (stmt)
1031 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1032 && ! two_operators
1033 /* Do so only if the number of not successful permutes was nor more
1034 than a cut-ff as re-trying the recursive match on
1035 possibly each level of the tree would expose exponential
1036 behavior. */
1037 && *npermutes < 4)
1039 /* Verify if we can safely swap or if we committed to a specific
1040 operand order already. */
1041 for (j = 0; j < group_size; ++j)
1042 if (!matches[j]
1043 && STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])) != 0)
1045 if (dump_enabled_p ())
1047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1048 "Build SLP failed: cannot swap operands "
1049 "of shared stmt ");
1050 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1051 stmts[j], 0);
1053 goto fail;
1056 /* Swap mismatched definition stmts. */
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "Re-trying with swapped operands of stmts ");
1059 for (j = 0; j < group_size; ++j)
1060 if (!matches[j])
1062 std::swap (oprnds_info[0]->def_stmts[j],
1063 oprnds_info[1]->def_stmts[j]);
1064 dump_printf (MSG_NOTE, "%d ", j);
1066 dump_printf (MSG_NOTE, "\n");
1067 /* And try again with scratch 'matches' ... */
1068 bool *tem = XALLOCAVEC (bool, group_size);
1069 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1070 group_size, &this_max_nunits,
1071 &this_loads, tem, npermutes,
1072 &this_tree_size,
1073 max_tree_size)) != NULL)
1075 /* ... so if successful we can apply the operand swapping
1076 to the GIMPLE IL. This is necessary because for example
1077 vect_get_slp_defs uses operand indexes and thus expects
1078 canonical operand order. This is also necessary even
1079 if we end up building the operand from scalars as
1080 we'll continue to process swapped operand two. */
1081 for (j = 0; j < group_size; ++j)
1083 gimple *stmt = stmts[j];
1084 gimple_set_plf (stmt, GF_PLF_1, false);
1086 for (j = 0; j < group_size; ++j)
1088 gimple *stmt = stmts[j];
1089 if (!matches[j])
1091 /* Avoid swapping operands twice. */
1092 if (gimple_plf (stmt, GF_PLF_1))
1093 continue;
1094 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1095 gimple_assign_rhs2_ptr (stmt));
1096 gimple_set_plf (stmt, GF_PLF_1, true);
1099 /* Verify we swap all duplicates or none. */
1100 if (flag_checking)
1101 for (j = 0; j < group_size; ++j)
1103 gimple *stmt = stmts[j];
1104 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1107 /* If we have all children of child built up from scalars then
1108 just throw that away and build it up this node from scalars. */
1109 if (!SLP_TREE_CHILDREN (child).is_empty ()
1110 /* ??? Rejecting patterns this way doesn't work. We'd have
1111 to do extra work to cancel the pattern so the uses see the
1112 scalar version. */
1113 && !is_pattern_stmt_p
1114 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1116 unsigned int j;
1117 slp_tree grandchild;
1119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1120 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1121 break;
1122 if (!grandchild)
1124 /* Roll back. */
1125 this_loads.truncate (old_nloads);
1126 this_tree_size = old_tree_size;
1127 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1128 vect_free_slp_tree (grandchild);
1129 SLP_TREE_CHILDREN (child).truncate (0);
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "Building parent vector operands from "
1133 "scalars instead\n");
1134 oprnd_info->def_stmts = vNULL;
1135 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1136 children.safe_push (child);
1137 continue;
1141 oprnd_info->def_stmts = vNULL;
1142 children.safe_push (child);
1143 continue;
1146 ++*npermutes;
1149 fail:
1150 gcc_assert (child == NULL);
1151 FOR_EACH_VEC_ELT (children, j, child)
1152 vect_free_slp_tree (child);
1153 vect_free_oprnd_info (oprnds_info);
1154 return NULL;
1157 vect_free_oprnd_info (oprnds_info);
1159 if (tree_size)
1160 *tree_size += this_tree_size;
1161 *max_nunits = this_max_nunits;
1162 loads->safe_splice (this_loads);
1164 node = vect_create_new_slp_node (stmts);
1165 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1166 SLP_TREE_CHILDREN (node).splice (children);
1167 return node;
1170 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1172 static void
1173 vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
1175 int i;
1176 gimple *stmt;
1177 slp_tree child;
1179 dump_printf_loc (dump_kind, loc, "node%s\n",
1180 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1181 ? " (external)" : "");
1182 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1184 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1185 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1187 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1188 vect_print_slp_tree (dump_kind, loc, child);
1192 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1193 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1194 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1195 stmts in NODE are to be marked. */
1197 static void
1198 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1200 int i;
1201 gimple *stmt;
1202 slp_tree child;
1204 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1205 return;
1207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1208 if (j < 0 || i == j)
1209 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1211 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1212 vect_mark_slp_stmts (child, mark, j);
1216 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1218 static void
1219 vect_mark_slp_stmts_relevant (slp_tree node)
1221 int i;
1222 gimple *stmt;
1223 stmt_vec_info stmt_info;
1224 slp_tree child;
1226 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1227 return;
1229 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1231 stmt_info = vinfo_for_stmt (stmt);
1232 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1233 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1234 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1237 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1238 vect_mark_slp_stmts_relevant (child);
1242 /* Rearrange the statements of NODE according to PERMUTATION. */
1244 static void
1245 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1246 vec<unsigned> permutation)
1248 gimple *stmt;
1249 vec<gimple *> tmp_stmts;
1250 unsigned int i;
1251 slp_tree child;
1253 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1254 vect_slp_rearrange_stmts (child, group_size, permutation);
1256 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1257 tmp_stmts.create (group_size);
1258 tmp_stmts.quick_grow_cleared (group_size);
1260 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1261 tmp_stmts[permutation[i]] = stmt;
1263 SLP_TREE_SCALAR_STMTS (node).release ();
1264 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1268 /* Attempt to reorder stmts in a reduction chain so that we don't
1269 require any load permutation. Return true if that was possible,
1270 otherwise return false. */
1272 static bool
1273 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1275 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1276 unsigned int i, j;
1277 sbitmap load_index;
1278 unsigned int lidx;
1279 slp_tree node, load;
1281 /* Compare all the permutation sequences to the first one. We know
1282 that at least one load is permuted. */
1283 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1284 if (!node->load_permutation.exists ())
1285 return false;
1286 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1288 if (!load->load_permutation.exists ())
1289 return false;
1290 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1291 if (lidx != node->load_permutation[j])
1292 return false;
1295 /* Check that the loads in the first sequence are different and there
1296 are no gaps between them. */
1297 load_index = sbitmap_alloc (group_size);
1298 bitmap_clear (load_index);
1299 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1301 if (lidx >= group_size)
1303 sbitmap_free (load_index);
1304 return false;
1306 if (bitmap_bit_p (load_index, lidx))
1308 sbitmap_free (load_index);
1309 return false;
1311 bitmap_set_bit (load_index, lidx);
1313 for (i = 0; i < group_size; i++)
1314 if (!bitmap_bit_p (load_index, i))
1316 sbitmap_free (load_index);
1317 return false;
1319 sbitmap_free (load_index);
1321 /* This permutation is valid for reduction. Since the order of the
1322 statements in the nodes is not important unless they are memory
1323 accesses, we can rearrange the statements in all the nodes
1324 according to the order of the loads. */
1325 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1326 node->load_permutation);
1328 /* We are done, no actual permutations need to be generated. */
1329 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1330 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1332 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1333 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1334 /* But we have to keep those permutations that are required because
1335 of handling of gaps. */
1336 if (unrolling_factor == 1
1337 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1338 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1339 SLP_TREE_LOAD_PERMUTATION (node).release ();
1340 else
1341 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1342 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1345 return true;
1348 /* Check if the required load permutations in the SLP instance
1349 SLP_INSTN are supported. */
1351 static bool
1352 vect_supported_load_permutation_p (slp_instance slp_instn)
1354 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1355 unsigned int i, j, k, next;
1356 slp_tree node;
1357 gimple *stmt, *load, *next_load;
1359 if (dump_enabled_p ())
1361 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1362 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1363 if (node->load_permutation.exists ())
1364 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1365 dump_printf (MSG_NOTE, "%d ", next);
1366 else
1367 for (k = 0; k < group_size; ++k)
1368 dump_printf (MSG_NOTE, "%d ", k);
1369 dump_printf (MSG_NOTE, "\n");
1372 /* In case of reduction every load permutation is allowed, since the order
1373 of the reduction statements is not important (as opposed to the case of
1374 grouped stores). The only condition we need to check is that all the
1375 load nodes are of the same size and have the same permutation (and then
1376 rearrange all the nodes of the SLP instance according to this
1377 permutation). */
1379 /* Check that all the load nodes are of the same size. */
1380 /* ??? Can't we assert this? */
1381 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1382 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1383 return false;
1385 node = SLP_INSTANCE_TREE (slp_instn);
1386 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1388 /* Reduction (there are no data-refs in the root).
1389 In reduction chain the order of the loads is not important. */
1390 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1391 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1392 vect_attempt_slp_rearrange_stmts (slp_instn);
1394 /* In basic block vectorization we allow any subchain of an interleaving
1395 chain.
1396 FORNOW: not supported in loop SLP because of realignment compications. */
1397 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1399 /* Check whether the loads in an instance form a subchain and thus
1400 no permutation is necessary. */
1401 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1403 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1404 continue;
1405 bool subchain_p = true;
1406 next_load = NULL;
1407 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1409 if (j != 0
1410 && (next_load != load
1411 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1413 subchain_p = false;
1414 break;
1416 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1418 if (subchain_p)
1419 SLP_TREE_LOAD_PERMUTATION (node).release ();
1420 else
1422 /* Verify the permutation can be generated. */
1423 vec<tree> tem;
1424 if (!vect_transform_slp_perm_load (node, tem, NULL,
1425 1, slp_instn, true))
1427 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1428 vect_location,
1429 "unsupported load permutation\n");
1430 return false;
1434 return true;
1437 /* For loop vectorization verify we can generate the permutation. */
1438 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1439 if (node->load_permutation.exists ()
1440 && !vect_transform_slp_perm_load
1441 (node, vNULL, NULL,
1442 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1443 return false;
1445 return true;
1449 /* Find the last store in SLP INSTANCE. */
1451 gimple *
1452 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1454 gimple *last = NULL, *stmt;
1456 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1459 if (is_pattern_stmt_p (stmt_vinfo))
1460 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1461 else
1462 last = get_later_stmt (stmt, last);
1465 return last;
1468 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1470 static void
1471 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1472 stmt_vector_for_cost *prologue_cost_vec,
1473 stmt_vector_for_cost *body_cost_vec,
1474 unsigned ncopies_for_cost)
1476 unsigned i, j;
1477 slp_tree child;
1478 gimple *stmt;
1479 stmt_vec_info stmt_info;
1480 tree lhs;
1482 /* Recurse down the SLP tree. */
1483 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1484 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1485 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1486 body_cost_vec, ncopies_for_cost);
1488 /* Look at the first scalar stmt to determine the cost. */
1489 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1490 stmt_info = vinfo_for_stmt (stmt);
1491 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1493 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1494 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1495 vect_uninitialized_def,
1496 node, prologue_cost_vec, body_cost_vec);
1497 else
1499 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1500 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1502 /* If the load is permuted then the alignment is determined by
1503 the first group element not by the first scalar stmt DR. */
1504 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1505 stmt_info = vinfo_for_stmt (stmt);
1506 /* Record the cost for the permutation. */
1507 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1508 stmt_info, 0, vect_body);
1509 /* And adjust the number of loads performed. */
1510 unsigned nunits
1511 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1512 ncopies_for_cost
1513 = (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1514 + nunits - 1) / nunits;
1515 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1517 /* Record the cost for the vector loads. */
1518 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1519 node, prologue_cost_vec, body_cost_vec);
1520 return;
1523 else
1525 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1526 stmt_info, 0, vect_body);
1527 if (SLP_TREE_TWO_OPERATORS (node))
1529 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1530 stmt_info, 0, vect_body);
1531 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1532 stmt_info, 0, vect_body);
1536 /* Push SLP node def-type to stmts. */
1537 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1538 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1539 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1540 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1542 /* Scan operands and account for prologue cost of constants/externals.
1543 ??? This over-estimates cost for multiple uses and should be
1544 re-engineered. */
1545 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1546 lhs = gimple_get_lhs (stmt);
1547 for (i = 0; i < gimple_num_ops (stmt); ++i)
1549 tree op = gimple_op (stmt, i);
1550 gimple *def_stmt;
1551 enum vect_def_type dt;
1552 if (!op || op == lhs)
1553 continue;
1554 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1556 /* Without looking at the actual initializer a vector of
1557 constants can be implemented as load from the constant pool.
1558 ??? We need to pass down stmt_info for a vector type
1559 even if it points to the wrong stmt. */
1560 if (dt == vect_constant_def)
1561 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1562 stmt_info, 0, vect_prologue);
1563 else if (dt == vect_external_def)
1564 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1565 stmt_info, 0, vect_prologue);
1569 /* Restore stmt def-types. */
1570 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1571 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1572 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1573 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1576 /* Compute the cost for the SLP instance INSTANCE. */
1578 static void
1579 vect_analyze_slp_cost (slp_instance instance, void *data)
1581 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1582 unsigned ncopies_for_cost;
1583 stmt_info_for_cost *si;
1584 unsigned i;
1586 if (dump_enabled_p ())
1587 dump_printf_loc (MSG_NOTE, vect_location,
1588 "=== vect_analyze_slp_cost ===\n");
1590 /* Calculate the number of vector stmts to create based on the unrolling
1591 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1592 GROUP_SIZE / NUNITS otherwise. */
1593 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1594 slp_tree node = SLP_INSTANCE_TREE (instance);
1595 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1596 /* Adjust the group_size by the vectorization factor which is always one
1597 for basic-block vectorization. */
1598 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1599 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1600 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1601 /* For reductions look at a reduction operand in case the reduction
1602 operation is widening like DOT_PROD or SAD. */
1603 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1605 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1606 switch (gimple_assign_rhs_code (stmt))
1608 case DOT_PROD_EXPR:
1609 case SAD_EXPR:
1610 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1611 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1612 break;
1613 default:;
1616 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1618 prologue_cost_vec.create (10);
1619 body_cost_vec.create (10);
1620 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1621 &prologue_cost_vec, &body_cost_vec,
1622 ncopies_for_cost);
1624 /* Record the prologue costs, which were delayed until we were
1625 sure that SLP was successful. */
1626 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1628 struct _stmt_vec_info *stmt_info
1629 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1630 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1631 si->misalign, vect_prologue);
1634 /* Record the instance's instructions in the target cost model. */
1635 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1637 struct _stmt_vec_info *stmt_info
1638 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1639 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1640 si->misalign, vect_body);
1643 prologue_cost_vec.release ();
1644 body_cost_vec.release ();
1647 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1648 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1649 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1650 containing the remainder.
1651 Return the first stmt in the second group. */
1653 static gimple *
1654 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1656 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1657 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1658 gcc_assert (group1_size > 0);
1659 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1660 gcc_assert (group2_size > 0);
1661 GROUP_SIZE (first_vinfo) = group1_size;
1663 gimple *stmt = first_stmt;
1664 for (unsigned i = group1_size; i > 1; i--)
1666 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1667 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1669 /* STMT is now the last element of the first group. */
1670 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1671 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1673 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1674 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1676 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1677 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1680 /* For the second group, the GROUP_GAP is that before the original group,
1681 plus skipping over the first vector. */
1682 GROUP_GAP (vinfo_for_stmt (group2)) =
1683 GROUP_GAP (first_vinfo) + group1_size;
1685 /* GROUP_GAP of the first group now has to skip over the second group too. */
1686 GROUP_GAP (first_vinfo) += group2_size;
1688 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1690 group1_size, group2_size);
1692 return group2;
1695 /* Analyze an SLP instance starting from a group of grouped stores. Call
1696 vect_build_slp_tree to build a tree of packed stmts if possible.
1697 Return FALSE if it's impossible to SLP any stmt in the loop. */
1699 static bool
1700 vect_analyze_slp_instance (vec_info *vinfo,
1701 gimple *stmt, unsigned max_tree_size)
1703 slp_instance new_instance;
1704 slp_tree node;
1705 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1706 unsigned int unrolling_factor = 1, nunits;
1707 tree vectype, scalar_type = NULL_TREE;
1708 gimple *next;
1709 unsigned int i;
1710 unsigned int max_nunits = 0;
1711 vec<slp_tree> loads;
1712 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1713 vec<gimple *> scalar_stmts;
1715 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1717 if (dr)
1719 scalar_type = TREE_TYPE (DR_REF (dr));
1720 vectype = get_vectype_for_scalar_type (scalar_type);
1722 else
1724 gcc_assert (is_a <loop_vec_info> (vinfo));
1725 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1728 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1730 else
1732 gcc_assert (is_a <loop_vec_info> (vinfo));
1733 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1734 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1737 if (!vectype)
1739 if (dump_enabled_p ())
1741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1742 "Build SLP failed: unsupported data-type ");
1743 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1744 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1747 return false;
1749 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1751 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1752 scalar_stmts.create (group_size);
1753 next = stmt;
1754 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1756 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1757 while (next)
1759 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1760 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1761 scalar_stmts.safe_push (
1762 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1763 else
1764 scalar_stmts.safe_push (next);
1765 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1767 /* Mark the first element of the reduction chain as reduction to properly
1768 transform the node. In the reduction analysis phase only the last
1769 element of the chain is marked as reduction. */
1770 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1771 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1773 else
1775 /* Collect reduction statements. */
1776 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1777 for (i = 0; reductions.iterate (i, &next); i++)
1778 scalar_stmts.safe_push (next);
1781 loads.create (group_size);
1783 /* Build the tree for the SLP instance. */
1784 bool *matches = XALLOCAVEC (bool, group_size);
1785 unsigned npermutes = 0;
1786 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1787 &max_nunits, &loads, matches, &npermutes,
1788 NULL, max_tree_size);
1789 if (node != NULL)
1791 /* Calculate the unrolling factor based on the smallest type. */
1792 unrolling_factor
1793 = least_common_multiple (max_nunits, group_size) / group_size;
1795 if (unrolling_factor != 1
1796 && is_a <bb_vec_info> (vinfo))
1799 if (max_nunits > group_size)
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "Build SLP failed: store group "
1803 "size not a multiple of the vector size "
1804 "in basic block SLP\n");
1805 vect_free_slp_tree (node);
1806 loads.release ();
1807 return false;
1809 /* Fatal mismatch. */
1810 matches[group_size/max_nunits * max_nunits] = false;
1811 vect_free_slp_tree (node);
1812 loads.release ();
1814 else
1816 /* Create a new SLP instance. */
1817 new_instance = XNEW (struct _slp_instance);
1818 SLP_INSTANCE_TREE (new_instance) = node;
1819 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1820 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1821 SLP_INSTANCE_LOADS (new_instance) = loads;
1823 /* Compute the load permutation. */
1824 slp_tree load_node;
1825 bool loads_permuted = false;
1826 FOR_EACH_VEC_ELT (loads, i, load_node)
1828 vec<unsigned> load_permutation;
1829 int j;
1830 gimple *load, *first_stmt;
1831 bool this_load_permuted = false;
1832 load_permutation.create (group_size);
1833 first_stmt = GROUP_FIRST_ELEMENT
1834 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1837 int load_place = vect_get_place_in_interleaving_chain
1838 (load, first_stmt);
1839 gcc_assert (load_place != -1);
1840 if (load_place != j)
1841 this_load_permuted = true;
1842 load_permutation.safe_push (load_place);
1844 if (!this_load_permuted
1845 /* The load requires permutation when unrolling exposes
1846 a gap either because the group is larger than the SLP
1847 group-size or because there is a gap between the groups. */
1848 && (unrolling_factor == 1
1849 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1850 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1852 load_permutation.release ();
1853 continue;
1855 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1856 loads_permuted = true;
1859 if (loads_permuted)
1861 if (!vect_supported_load_permutation_p (new_instance))
1863 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1866 "Build SLP failed: unsupported load "
1867 "permutation ");
1868 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1869 TDF_SLIM, stmt, 0);
1871 vect_free_slp_instance (new_instance);
1872 return false;
1876 /* If the loads and stores can be handled with load/store-lan
1877 instructions do not generate this SLP instance. */
1878 if (is_a <loop_vec_info> (vinfo)
1879 && loads_permuted
1880 && dr && vect_store_lanes_supported (vectype, group_size))
1882 slp_tree load_node;
1883 FOR_EACH_VEC_ELT (loads, i, load_node)
1885 gimple *first_stmt = GROUP_FIRST_ELEMENT
1886 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1887 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
1888 /* Use SLP for strided accesses (or if we
1889 can't load-lanes). */
1890 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1891 || ! vect_load_lanes_supported
1892 (STMT_VINFO_VECTYPE (stmt_vinfo),
1893 GROUP_SIZE (stmt_vinfo)))
1894 break;
1896 if (i == loads.length ())
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "Built SLP cancelled: can use "
1901 "load/store-lanes\n");
1902 vect_free_slp_instance (new_instance);
1903 return false;
1907 vinfo->slp_instances.safe_push (new_instance);
1909 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_NOTE, vect_location,
1912 "Final SLP tree for instance:\n");
1913 vect_print_slp_tree (MSG_NOTE, vect_location, node);
1916 return true;
1919 else
1921 /* Failed to SLP. */
1922 /* Free the allocated memory. */
1923 scalar_stmts.release ();
1924 loads.release ();
1927 /* For basic block SLP, try to break the group up into multiples of the
1928 vector size. */
1929 if (is_a <bb_vec_info> (vinfo)
1930 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1931 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1933 /* We consider breaking the group only on VF boundaries from the existing
1934 start. */
1935 for (i = 0; i < group_size; i++)
1936 if (!matches[i]) break;
1938 if (i >= nunits && i < group_size)
1940 /* Split into two groups at the first vector boundary before i. */
1941 gcc_assert ((nunits & (nunits - 1)) == 0);
1942 unsigned group1_size = i & ~(nunits - 1);
1944 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
1945 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
1946 /* If the first non-match was in the middle of a vector,
1947 skip the rest of that vector. */
1948 if (group1_size < i)
1950 i = group1_size + nunits;
1951 if (i < group_size)
1952 rest = vect_split_slp_store_group (rest, nunits);
1954 if (i < group_size)
1955 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
1956 return res;
1958 /* Even though the first vector did not all match, we might be able to SLP
1959 (some) of the remainder. FORNOW ignore this possibility. */
1962 return false;
1966 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1967 trees of packed scalar stmts if SLP is possible. */
1969 bool
1970 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
1972 unsigned int i;
1973 gimple *first_element;
1974 bool ok = false;
1976 if (dump_enabled_p ())
1977 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1979 /* Find SLP sequences starting from groups of grouped stores. */
1980 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1981 if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
1982 ok = true;
1984 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
1986 if (loop_vinfo->reduction_chains.length () > 0)
1988 /* Find SLP sequences starting from reduction chains. */
1989 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1990 if (vect_analyze_slp_instance (vinfo, first_element,
1991 max_tree_size))
1992 ok = true;
1993 else
1994 return false;
1996 /* Don't try to vectorize SLP reductions if reduction chain was
1997 detected. */
1998 return ok;
2001 /* Find SLP sequences starting from groups of reductions. */
2002 if (loop_vinfo->reductions.length () > 1
2003 && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2004 max_tree_size))
2005 ok = true;
2008 return true;
2012 /* For each possible SLP instance decide whether to SLP it and calculate overall
2013 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2014 least one instance. */
2016 bool
2017 vect_make_slp_decision (loop_vec_info loop_vinfo)
2019 unsigned int i, unrolling_factor = 1;
2020 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2021 slp_instance instance;
2022 int decided_to_slp = 0;
2024 if (dump_enabled_p ())
2025 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2026 "\n");
2028 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2030 /* FORNOW: SLP if you can. */
2031 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2032 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2034 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2035 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2036 loop-based vectorization. Such stmts will be marked as HYBRID. */
2037 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2038 decided_to_slp++;
2041 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2043 if (decided_to_slp && dump_enabled_p ())
2044 dump_printf_loc (MSG_NOTE, vect_location,
2045 "Decided to SLP %d instances. Unrolling factor %d\n",
2046 decided_to_slp, unrolling_factor);
2048 return (decided_to_slp > 0);
2052 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2053 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2055 static void
2056 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2058 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2059 imm_use_iterator imm_iter;
2060 gimple *use_stmt;
2061 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2062 slp_tree child;
2063 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2064 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2065 int j;
2067 /* Propagate hybrid down the SLP tree. */
2068 if (stype == hybrid)
2070 else if (HYBRID_SLP_STMT (stmt_vinfo))
2071 stype = hybrid;
2072 else
2074 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2075 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2076 /* If we get a pattern stmt here we have to use the LHS of the
2077 original stmt for immediate uses. */
2078 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2079 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2080 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2081 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2082 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2084 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2085 continue;
2086 use_vinfo = vinfo_for_stmt (use_stmt);
2087 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2088 && STMT_VINFO_RELATED_STMT (use_vinfo))
2089 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2090 if (!STMT_SLP_TYPE (use_vinfo)
2091 && (STMT_VINFO_RELEVANT (use_vinfo)
2092 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2093 && !(gimple_code (use_stmt) == GIMPLE_PHI
2094 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2096 if (dump_enabled_p ())
2098 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2099 "def in non-SLP stmt: ");
2100 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2102 stype = hybrid;
2107 if (stype == hybrid
2108 && !HYBRID_SLP_STMT (stmt_vinfo))
2110 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2113 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2115 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2118 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2119 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2120 vect_detect_hybrid_slp_stmts (child, i, stype);
2123 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2125 static tree
2126 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2128 walk_stmt_info *wi = (walk_stmt_info *)data;
2129 struct loop *loopp = (struct loop *)wi->info;
2131 if (wi->is_lhs)
2132 return NULL_TREE;
2134 if (TREE_CODE (*tp) == SSA_NAME
2135 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2137 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2138 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2139 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2141 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2144 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2146 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2150 return NULL_TREE;
2153 static tree
2154 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2155 walk_stmt_info *)
2157 /* If the stmt is in a SLP instance then this isn't a reason
2158 to mark use definitions in other SLP instances as hybrid. */
2159 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2160 *handled = true;
2161 return NULL_TREE;
2164 /* Find stmts that must be both vectorized and SLPed. */
2166 void
2167 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2169 unsigned int i;
2170 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2171 slp_instance instance;
2173 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2175 "\n");
2177 /* First walk all pattern stmt in the loop and mark defs of uses as
2178 hybrid because immediate uses in them are not recorded. */
2179 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2181 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2182 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2183 gsi_next (&gsi))
2185 gimple *stmt = gsi_stmt (gsi);
2186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2187 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2189 walk_stmt_info wi;
2190 memset (&wi, 0, sizeof (wi));
2191 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2192 gimple_stmt_iterator gsi2
2193 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2194 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2195 vect_detect_hybrid_slp_1, &wi);
2196 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2197 vect_detect_hybrid_slp_2,
2198 vect_detect_hybrid_slp_1, &wi);
2203 /* Then walk the SLP instance trees marking stmts with uses in
2204 non-SLP stmts as hybrid, also propagating hybrid down the
2205 SLP tree, collecting the above info on-the-fly. */
2206 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2208 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2209 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2210 i, pure_slp);
2215 /* Create and initialize a new bb_vec_info struct for BB, as well as
2216 stmt_vec_info structs for all the stmts in it. */
2218 static bb_vec_info
2219 new_bb_vec_info (gimple_stmt_iterator region_begin,
2220 gimple_stmt_iterator region_end)
2222 basic_block bb = gsi_bb (region_begin);
2223 bb_vec_info res = NULL;
2224 gimple_stmt_iterator gsi;
2226 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2227 res->kind = vec_info::bb;
2228 BB_VINFO_BB (res) = bb;
2229 res->region_begin = region_begin;
2230 res->region_end = region_end;
2232 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2233 gsi_next (&gsi))
2235 gimple *stmt = gsi_stmt (gsi);
2236 gimple_set_uid (stmt, 0);
2237 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2240 BB_VINFO_GROUPED_STORES (res).create (10);
2241 BB_VINFO_SLP_INSTANCES (res).create (2);
2242 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2244 bb->aux = res;
2245 return res;
2249 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2250 stmts in the basic block. */
2252 static void
2253 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2255 slp_instance instance;
2256 unsigned i;
2258 if (!bb_vinfo)
2259 return;
2261 vect_destroy_datarefs (bb_vinfo);
2262 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2263 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2264 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2265 vect_free_slp_instance (instance);
2266 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2267 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2269 for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2270 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2272 gimple *stmt = gsi_stmt (si);
2273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2275 if (stmt_info)
2276 /* Free stmt_vec_info. */
2277 free_stmt_vec_info (stmt);
2279 /* Reset region marker. */
2280 gimple_set_uid (stmt, -1);
2283 BB_VINFO_BB (bb_vinfo)->aux = NULL;
2284 free (bb_vinfo);
2288 /* Analyze statements contained in SLP tree node after recursively analyzing
2289 the subtree. Return TRUE if the operations are supported. */
2291 static bool
2292 vect_slp_analyze_node_operations (slp_tree node)
2294 bool dummy;
2295 int i, j;
2296 gimple *stmt;
2297 slp_tree child;
2299 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2300 return true;
2302 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2303 if (!vect_slp_analyze_node_operations (child))
2304 return false;
2306 bool res = true;
2307 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2309 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2310 gcc_assert (stmt_info);
2311 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2313 /* Push SLP node def-type to stmt operands. */
2314 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2315 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2316 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2317 = SLP_TREE_DEF_TYPE (child);
2318 res = vect_analyze_stmt (stmt, &dummy, node);
2319 /* Restore def-types. */
2320 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2321 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2322 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2323 = vect_internal_def;
2324 if (! res)
2325 break;
2328 return res;
2332 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2333 operations are supported. */
2335 bool
2336 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2338 slp_instance instance;
2339 int i;
2341 if (dump_enabled_p ())
2342 dump_printf_loc (MSG_NOTE, vect_location,
2343 "=== vect_slp_analyze_operations ===\n");
2345 for (i = 0; slp_instances.iterate (i, &instance); )
2347 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2349 dump_printf_loc (MSG_NOTE, vect_location,
2350 "removing SLP instance operations starting from: ");
2351 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2352 SLP_TREE_SCALAR_STMTS
2353 (SLP_INSTANCE_TREE (instance))[0], 0);
2354 vect_free_slp_instance (instance);
2355 slp_instances.ordered_remove (i);
2357 else
2359 /* Compute the costs of the SLP instance. */
2360 vect_analyze_slp_cost (instance, data);
2361 i++;
2365 if (!slp_instances.length ())
2366 return false;
2368 return true;
2372 /* Compute the scalar cost of the SLP node NODE and its children
2373 and return it. Do not account defs that are marked in LIFE and
2374 update LIFE according to uses of NODE. */
2376 static unsigned
2377 vect_bb_slp_scalar_cost (basic_block bb,
2378 slp_tree node, vec<bool, va_heap> *life)
2380 unsigned scalar_cost = 0;
2381 unsigned i;
2382 gimple *stmt;
2383 slp_tree child;
2385 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2387 unsigned stmt_cost;
2388 ssa_op_iter op_iter;
2389 def_operand_p def_p;
2390 stmt_vec_info stmt_info;
2392 if ((*life)[i])
2393 continue;
2395 /* If there is a non-vectorized use of the defs then the scalar
2396 stmt is kept live in which case we do not account it or any
2397 required defs in the SLP children in the scalar cost. This
2398 way we make the vectorization more costly when compared to
2399 the scalar cost. */
2400 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2402 imm_use_iterator use_iter;
2403 gimple *use_stmt;
2404 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2405 if (!is_gimple_debug (use_stmt)
2406 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2407 use_stmt)
2408 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2410 (*life)[i] = true;
2411 BREAK_FROM_IMM_USE_STMT (use_iter);
2414 if ((*life)[i])
2415 continue;
2417 /* Count scalar stmts only once. */
2418 if (gimple_visited_p (stmt))
2419 continue;
2420 gimple_set_visited (stmt, true);
2422 stmt_info = vinfo_for_stmt (stmt);
2423 if (STMT_VINFO_DATA_REF (stmt_info))
2425 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2426 stmt_cost = vect_get_stmt_cost (scalar_load);
2427 else
2428 stmt_cost = vect_get_stmt_cost (scalar_store);
2430 else
2431 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2433 scalar_cost += stmt_cost;
2436 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2437 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2438 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2440 return scalar_cost;
2443 /* Check if vectorization of the basic block is profitable. */
2445 static bool
2446 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2448 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2449 slp_instance instance;
2450 int i;
2451 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2452 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2454 /* Calculate scalar cost. */
2455 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2457 auto_vec<bool, 20> life;
2458 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2459 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2460 SLP_INSTANCE_TREE (instance),
2461 &life);
2464 /* Unset visited flag. */
2465 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2466 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2467 gimple_set_visited (gsi_stmt (gsi), false);
2469 /* Complete the target-specific cost calculation. */
2470 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2471 &vec_inside_cost, &vec_epilogue_cost);
2473 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2475 if (dump_enabled_p ())
2477 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2478 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2479 vec_inside_cost);
2480 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2481 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2482 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2485 /* Vectorization is profitable if its cost is more than the cost of scalar
2486 version. Note that we err on the vector side for equal cost because
2487 the cost estimate is otherwise quite pessimistic (constant uses are
2488 free on the scalar side but cost a load on the vector side for
2489 example). */
2490 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2491 return false;
2493 return true;
2496 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2497 if so and sets fatal to true if failure is independent of
2498 current_vector_size. */
2500 static bb_vec_info
2501 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2502 gimple_stmt_iterator region_end,
2503 vec<data_reference_p> datarefs, int n_stmts,
2504 bool &fatal)
2506 bb_vec_info bb_vinfo;
2507 slp_instance instance;
2508 int i;
2509 int min_vf = 2;
2511 /* The first group of checks is independent of the vector size. */
2512 fatal = true;
2514 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "not vectorized: too many instructions in "
2519 "basic block.\n");
2520 free_data_refs (datarefs);
2521 return NULL;
2524 bb_vinfo = new_bb_vec_info (region_begin, region_end);
2525 if (!bb_vinfo)
2526 return NULL;
2528 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2530 /* Analyze the data references. */
2532 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "not vectorized: unhandled data-ref in basic "
2537 "block.\n");
2539 destroy_bb_vec_info (bb_vinfo);
2540 return NULL;
2543 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2547 "not vectorized: not enough data-refs in "
2548 "basic block.\n");
2550 destroy_bb_vec_info (bb_vinfo);
2551 return NULL;
2554 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2558 "not vectorized: unhandled data access in "
2559 "basic block.\n");
2561 destroy_bb_vec_info (bb_vinfo);
2562 return NULL;
2565 /* If there are no grouped stores in the region there is no need
2566 to continue with pattern recog as vect_analyze_slp will fail
2567 anyway. */
2568 if (bb_vinfo->grouped_stores.is_empty ())
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2572 "not vectorized: no grouped stores in "
2573 "basic block.\n");
2575 destroy_bb_vec_info (bb_vinfo);
2576 return NULL;
2579 /* While the rest of the analysis below depends on it in some way. */
2580 fatal = false;
2582 vect_pattern_recog (bb_vinfo);
2584 /* Check the SLP opportunities in the basic block, analyze and build SLP
2585 trees. */
2586 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2588 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "Failed to SLP the basic block.\n");
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "not vectorized: failed to find SLP opportunities "
2594 "in basic block.\n");
2597 destroy_bb_vec_info (bb_vinfo);
2598 return NULL;
2601 /* Analyze and verify the alignment of data references and the
2602 dependence in the SLP instances. */
2603 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2605 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2606 || ! vect_slp_analyze_instance_dependence (instance))
2608 dump_printf_loc (MSG_NOTE, vect_location,
2609 "removing SLP instance operations starting from: ");
2610 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2611 SLP_TREE_SCALAR_STMTS
2612 (SLP_INSTANCE_TREE (instance))[0], 0);
2613 vect_free_slp_instance (instance);
2614 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2615 continue;
2618 /* Mark all the statements that we want to vectorize as pure SLP and
2619 relevant. */
2620 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2621 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2623 i++;
2625 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2627 destroy_bb_vec_info (bb_vinfo);
2628 return NULL;
2631 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2632 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2636 "not vectorized: bad operation in basic block.\n");
2638 destroy_bb_vec_info (bb_vinfo);
2639 return NULL;
2642 /* Cost model: check if the vectorization is worthwhile. */
2643 if (!unlimited_cost_model (NULL)
2644 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2646 if (dump_enabled_p ())
2647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2648 "not vectorized: vectorization is not "
2649 "profitable.\n");
2651 destroy_bb_vec_info (bb_vinfo);
2652 return NULL;
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_NOTE, vect_location,
2657 "Basic block will be vectorized using SLP\n");
2659 return bb_vinfo;
2663 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2664 true if anything in the basic-block was vectorized. */
2666 bool
2667 vect_slp_bb (basic_block bb)
2669 bb_vec_info bb_vinfo;
2670 gimple_stmt_iterator gsi;
2671 unsigned int vector_sizes;
2672 bool any_vectorized = false;
2674 if (dump_enabled_p ())
2675 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2677 /* Autodetect first vector size we try. */
2678 current_vector_size = 0;
2679 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2681 gsi = gsi_start_bb (bb);
2683 while (1)
2685 if (gsi_end_p (gsi))
2686 break;
2688 gimple_stmt_iterator region_begin = gsi;
2689 vec<data_reference_p> datarefs = vNULL;
2690 int insns = 0;
2692 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2694 gimple *stmt = gsi_stmt (gsi);
2695 if (is_gimple_debug (stmt))
2696 continue;
2697 insns++;
2699 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2700 vect_location = gimple_location (stmt);
2702 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2703 break;
2706 /* Skip leading unhandled stmts. */
2707 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2709 gsi_next (&gsi);
2710 continue;
2713 gimple_stmt_iterator region_end = gsi;
2715 bool vectorized = false;
2716 bool fatal = false;
2717 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2718 datarefs, insns, fatal);
2719 if (bb_vinfo
2720 && dbg_cnt (vect_slp))
2722 if (dump_enabled_p ())
2723 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2725 vect_schedule_slp (bb_vinfo);
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_NOTE, vect_location,
2729 "basic block part vectorized\n");
2731 destroy_bb_vec_info (bb_vinfo);
2733 vectorized = true;
2735 else
2736 destroy_bb_vec_info (bb_vinfo);
2738 any_vectorized |= vectorized;
2740 vector_sizes &= ~current_vector_size;
2741 if (vectorized
2742 || vector_sizes == 0
2743 || current_vector_size == 0
2744 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2745 vector sizes will fail do not bother iterating. */
2746 || fatal)
2748 if (gsi_end_p (region_end))
2749 break;
2751 /* Skip the unhandled stmt. */
2752 gsi_next (&gsi);
2754 /* And reset vector sizes. */
2755 current_vector_size = 0;
2756 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2758 else
2760 /* Try the next biggest vector size. */
2761 current_vector_size = 1 << floor_log2 (vector_sizes);
2762 if (dump_enabled_p ())
2763 dump_printf_loc (MSG_NOTE, vect_location,
2764 "***** Re-trying analysis with "
2765 "vector size %d\n", current_vector_size);
2767 /* Start over. */
2768 gsi = region_begin;
2772 return any_vectorized;
2776 /* Return 1 if vector type of boolean constant which is OPNUM
2777 operand in statement STMT is a boolean vector. */
2779 static bool
2780 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2782 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2783 enum tree_code code = gimple_expr_code (stmt);
2784 tree op, vectype;
2785 gimple *def_stmt;
2786 enum vect_def_type dt;
2788 /* For comparison and COND_EXPR type is chosen depending
2789 on the other comparison operand. */
2790 if (TREE_CODE_CLASS (code) == tcc_comparison)
2792 if (opnum)
2793 op = gimple_assign_rhs1 (stmt);
2794 else
2795 op = gimple_assign_rhs2 (stmt);
2797 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2798 &dt, &vectype))
2799 gcc_unreachable ();
2801 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2804 if (code == COND_EXPR)
2806 tree cond = gimple_assign_rhs1 (stmt);
2808 if (TREE_CODE (cond) == SSA_NAME)
2809 return false;
2811 if (opnum)
2812 op = TREE_OPERAND (cond, 1);
2813 else
2814 op = TREE_OPERAND (cond, 0);
2816 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2817 &dt, &vectype))
2818 gcc_unreachable ();
2820 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2823 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2827 /* For constant and loop invariant defs of SLP_NODE this function returns
2828 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2829 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2830 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2831 REDUC_INDEX is the index of the reduction operand in the statements, unless
2832 it is -1. */
2834 static void
2835 vect_get_constant_vectors (tree op, slp_tree slp_node,
2836 vec<tree> *vec_oprnds,
2837 unsigned int op_num, unsigned int number_of_vectors,
2838 int reduc_index)
2840 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2841 gimple *stmt = stmts[0];
2842 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2843 unsigned nunits;
2844 tree vec_cst;
2845 tree *elts;
2846 unsigned j, number_of_places_left_in_vector;
2847 tree vector_type;
2848 tree vop;
2849 int group_size = stmts.length ();
2850 unsigned int vec_num, i;
2851 unsigned number_of_copies = 1;
2852 vec<tree> voprnds;
2853 voprnds.create (number_of_vectors);
2854 bool constant_p, is_store;
2855 tree neutral_op = NULL;
2856 enum tree_code code = gimple_expr_code (stmt);
2857 gimple *def_stmt;
2858 struct loop *loop;
2859 gimple_seq ctor_seq = NULL;
2861 /* Check if vector type is a boolean vector. */
2862 if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
2863 && vect_mask_constant_operand_p (stmt, op_num))
2864 vector_type
2865 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2866 else
2867 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2868 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2870 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2871 && reduc_index != -1)
2873 op_num = reduc_index;
2874 op = gimple_op (stmt, op_num + 1);
2875 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2876 we need either neutral operands or the original operands. See
2877 get_initial_def_for_reduction() for details. */
2878 switch (code)
2880 case WIDEN_SUM_EXPR:
2881 case DOT_PROD_EXPR:
2882 case SAD_EXPR:
2883 case PLUS_EXPR:
2884 case MINUS_EXPR:
2885 case BIT_IOR_EXPR:
2886 case BIT_XOR_EXPR:
2887 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2888 neutral_op = build_real (TREE_TYPE (op), dconst0);
2889 else
2890 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2892 break;
2894 case MULT_EXPR:
2895 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2896 neutral_op = build_real (TREE_TYPE (op), dconst1);
2897 else
2898 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2900 break;
2902 case BIT_AND_EXPR:
2903 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2904 break;
2906 /* For MIN/MAX we don't have an easy neutral operand but
2907 the initial values can be used fine here. Only for
2908 a reduction chain we have to force a neutral element. */
2909 case MAX_EXPR:
2910 case MIN_EXPR:
2911 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2912 neutral_op = NULL;
2913 else
2915 def_stmt = SSA_NAME_DEF_STMT (op);
2916 loop = (gimple_bb (stmt))->loop_father;
2917 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2918 loop_preheader_edge (loop));
2920 break;
2922 default:
2923 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2924 neutral_op = NULL;
2928 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2930 is_store = true;
2931 op = gimple_assign_rhs1 (stmt);
2933 else
2934 is_store = false;
2936 gcc_assert (op);
2938 if (CONSTANT_CLASS_P (op))
2939 constant_p = true;
2940 else
2941 constant_p = false;
2943 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2944 created vectors. It is greater than 1 if unrolling is performed.
2946 For example, we have two scalar operands, s1 and s2 (e.g., group of
2947 strided accesses of size two), while NUNITS is four (i.e., four scalars
2948 of this type can be packed in a vector). The output vector will contain
2949 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2950 will be 2).
2952 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2953 containing the operands.
2955 For example, NUNITS is four as before, and the group size is 8
2956 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2957 {s5, s6, s7, s8}. */
2959 number_of_copies = nunits * number_of_vectors / group_size;
2961 number_of_places_left_in_vector = nunits;
2962 elts = XALLOCAVEC (tree, nunits);
2963 bool place_after_defs = false;
2964 for (j = 0; j < number_of_copies; j++)
2966 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2968 if (is_store)
2969 op = gimple_assign_rhs1 (stmt);
2970 else
2972 switch (code)
2974 case COND_EXPR:
2976 tree cond = gimple_assign_rhs1 (stmt);
2977 if (TREE_CODE (cond) == SSA_NAME)
2978 op = gimple_op (stmt, op_num + 1);
2979 else if (op_num == 0 || op_num == 1)
2980 op = TREE_OPERAND (cond, op_num);
2981 else
2983 if (op_num == 2)
2984 op = gimple_assign_rhs2 (stmt);
2985 else
2986 op = gimple_assign_rhs3 (stmt);
2989 break;
2991 case CALL_EXPR:
2992 op = gimple_call_arg (stmt, op_num);
2993 break;
2995 case LSHIFT_EXPR:
2996 case RSHIFT_EXPR:
2997 case LROTATE_EXPR:
2998 case RROTATE_EXPR:
2999 op = gimple_op (stmt, op_num + 1);
3000 /* Unlike the other binary operators, shifts/rotates have
3001 the shift count being int, instead of the same type as
3002 the lhs, so make sure the scalar is the right type if
3003 we are dealing with vectors of
3004 long long/long/short/char. */
3005 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3006 op = fold_convert (TREE_TYPE (vector_type), op);
3007 break;
3009 default:
3010 op = gimple_op (stmt, op_num + 1);
3011 break;
3015 if (reduc_index != -1)
3017 loop = (gimple_bb (stmt))->loop_father;
3018 def_stmt = SSA_NAME_DEF_STMT (op);
3020 gcc_assert (loop);
3022 /* Get the def before the loop. In reduction chain we have only
3023 one initial value. */
3024 if ((j != (number_of_copies - 1)
3025 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
3026 && i != 0))
3027 && neutral_op)
3028 op = neutral_op;
3029 else
3030 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3031 loop_preheader_edge (loop));
3034 /* Create 'vect_ = {op0,op1,...,opn}'. */
3035 number_of_places_left_in_vector--;
3036 tree orig_op = op;
3037 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3039 if (CONSTANT_CLASS_P (op))
3041 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3043 /* Can't use VIEW_CONVERT_EXPR for booleans because
3044 of possibly different sizes of scalar value and
3045 vector element. */
3046 if (integer_zerop (op))
3047 op = build_int_cst (TREE_TYPE (vector_type), 0);
3048 else if (integer_onep (op))
3049 op = build_all_ones_cst (TREE_TYPE (vector_type));
3050 else
3051 gcc_unreachable ();
3053 else
3054 op = fold_unary (VIEW_CONVERT_EXPR,
3055 TREE_TYPE (vector_type), op);
3056 gcc_assert (op && CONSTANT_CLASS_P (op));
3058 else
3060 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3061 gimple *init_stmt;
3062 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3064 tree true_val
3065 = build_all_ones_cst (TREE_TYPE (vector_type));
3066 tree false_val
3067 = build_zero_cst (TREE_TYPE (vector_type));
3068 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3069 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3070 op, true_val,
3071 false_val);
3073 else
3075 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3076 op);
3077 init_stmt
3078 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3079 op);
3081 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3082 op = new_temp;
3085 elts[number_of_places_left_in_vector] = op;
3086 if (!CONSTANT_CLASS_P (op))
3087 constant_p = false;
3088 if (TREE_CODE (orig_op) == SSA_NAME
3089 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3090 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3091 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3092 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3093 place_after_defs = true;
3095 if (number_of_places_left_in_vector == 0)
3097 number_of_places_left_in_vector = nunits;
3099 if (constant_p)
3100 vec_cst = build_vector (vector_type, elts);
3101 else
3103 vec<constructor_elt, va_gc> *v;
3104 unsigned k;
3105 vec_alloc (v, nunits);
3106 for (k = 0; k < nunits; ++k)
3107 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3108 vec_cst = build_constructor (vector_type, v);
3110 tree init;
3111 gimple_stmt_iterator gsi;
3112 if (place_after_defs)
3114 gsi = gsi_for_stmt
3115 (vect_find_last_scalar_stmt_in_slp (slp_node));
3116 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3118 else
3119 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3120 if (ctor_seq != NULL)
3122 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3123 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3124 GSI_SAME_STMT);
3125 ctor_seq = NULL;
3127 voprnds.quick_push (init);
3128 place_after_defs = false;
3133 /* Since the vectors are created in the reverse order, we should invert
3134 them. */
3135 vec_num = voprnds.length ();
3136 for (j = vec_num; j != 0; j--)
3138 vop = voprnds[j - 1];
3139 vec_oprnds->quick_push (vop);
3142 voprnds.release ();
3144 /* In case that VF is greater than the unrolling factor needed for the SLP
3145 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3146 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3147 to replicate the vectors. */
3148 while (number_of_vectors > vec_oprnds->length ())
3150 tree neutral_vec = NULL;
3152 if (neutral_op)
3154 if (!neutral_vec)
3155 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3157 vec_oprnds->quick_push (neutral_vec);
3159 else
3161 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3162 vec_oprnds->quick_push (vop);
3168 /* Get vectorized definitions from SLP_NODE that contains corresponding
3169 vectorized def-stmts. */
3171 static void
3172 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3174 tree vec_oprnd;
3175 gimple *vec_def_stmt;
3176 unsigned int i;
3178 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3180 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3182 gcc_assert (vec_def_stmt);
3183 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3184 vec_oprnds->quick_push (vec_oprnd);
3189 /* Get vectorized definitions for SLP_NODE.
3190 If the scalar definitions are loop invariants or constants, collect them and
3191 call vect_get_constant_vectors() to create vector stmts.
3192 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3193 must be stored in the corresponding child of SLP_NODE, and we call
3194 vect_get_slp_vect_defs () to retrieve them. */
3196 void
3197 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3198 vec<vec<tree> > *vec_oprnds, int reduc_index)
3200 gimple *first_stmt;
3201 int number_of_vects = 0, i;
3202 unsigned int child_index = 0;
3203 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3204 slp_tree child = NULL;
3205 vec<tree> vec_defs;
3206 tree oprnd;
3207 bool vectorized_defs;
3209 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3210 FOR_EACH_VEC_ELT (ops, i, oprnd)
3212 /* For each operand we check if it has vectorized definitions in a child
3213 node or we need to create them (for invariants and constants). We
3214 check if the LHS of the first stmt of the next child matches OPRND.
3215 If it does, we found the correct child. Otherwise, we call
3216 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3217 to check this child node for the next operand. */
3218 vectorized_defs = false;
3219 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3221 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3223 /* We have to check both pattern and original def, if available. */
3224 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3226 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3227 gimple *related
3228 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3230 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3231 || (related
3232 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3234 /* The number of vector defs is determined by the number of
3235 vector statements in the node from which we get those
3236 statements. */
3237 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3238 vectorized_defs = true;
3239 child_index++;
3242 else
3243 child_index++;
3246 if (!vectorized_defs)
3248 if (i == 0)
3250 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3251 /* Number of vector stmts was calculated according to LHS in
3252 vect_schedule_slp_instance (), fix it by replacing LHS with
3253 RHS, if necessary. See vect_get_smallest_scalar_type () for
3254 details. */
3255 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3256 &rhs_size_unit);
3257 if (rhs_size_unit != lhs_size_unit)
3259 number_of_vects *= rhs_size_unit;
3260 number_of_vects /= lhs_size_unit;
3265 /* Allocate memory for vectorized defs. */
3266 vec_defs = vNULL;
3267 vec_defs.create (number_of_vects);
3269 /* For reduction defs we call vect_get_constant_vectors (), since we are
3270 looking for initial loop invariant values. */
3271 if (vectorized_defs && reduc_index == -1)
3272 /* The defs are already vectorized. */
3273 vect_get_slp_vect_defs (child, &vec_defs);
3274 else
3275 /* Build vectors from scalar defs. */
3276 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3277 number_of_vects, reduc_index);
3279 vec_oprnds->quick_push (vec_defs);
3281 /* For reductions, we only need initial values. */
3282 if (reduc_index != -1)
3283 return;
3288 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3289 building a vector of type MASK_TYPE from it) and two input vectors placed in
3290 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3291 shifting by STRIDE elements of DR_CHAIN for every copy.
3292 (STRIDE is the number of vectorized stmts for NODE divided by the number of
3293 copies).
3294 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3295 the created stmts must be inserted. */
3297 static inline void
3298 vect_create_mask_and_perm (gimple *stmt,
3299 tree mask, int first_vec_indx, int second_vec_indx,
3300 gimple_stmt_iterator *gsi, slp_tree node,
3301 tree vectype, vec<tree> dr_chain,
3302 int ncopies, int vect_stmts_counter)
3304 tree perm_dest;
3305 gimple *perm_stmt = NULL;
3306 int i, stride_in, stride_out;
3307 tree first_vec, second_vec, data_ref;
3309 stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3310 stride_in = dr_chain.length () / ncopies;
3312 /* Initialize the vect stmts of NODE to properly insert the generated
3313 stmts later. */
3314 for (i = SLP_TREE_VEC_STMTS (node).length ();
3315 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3316 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3318 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3319 for (i = 0; i < ncopies; i++)
3321 first_vec = dr_chain[first_vec_indx];
3322 second_vec = dr_chain[second_vec_indx];
3324 /* Generate the permute statement if necessary. */
3325 if (mask)
3327 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3328 first_vec, second_vec, mask);
3329 data_ref = make_ssa_name (perm_dest, perm_stmt);
3330 gimple_set_lhs (perm_stmt, data_ref);
3331 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3333 else
3334 /* If mask was NULL_TREE generate the requested identity transform. */
3335 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3337 /* Store the vector statement in NODE. */
3338 SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
3339 = perm_stmt;
3341 first_vec_indx += stride_in;
3342 second_vec_indx += stride_in;
3347 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3348 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3349 permute statements for the SLP node NODE of the SLP instance
3350 SLP_NODE_INSTANCE. */
3352 bool
3353 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3354 gimple_stmt_iterator *gsi, int vf,
3355 slp_instance slp_node_instance, bool analyze_only)
3357 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3359 tree mask_element_type = NULL_TREE, mask_type;
3360 int nunits, vec_index = 0;
3361 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3362 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3363 int unroll_factor, mask_element, ncopies;
3364 unsigned char *mask;
3365 machine_mode mode;
3367 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3368 return false;
3370 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3372 mode = TYPE_MODE (vectype);
3374 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3375 same size as the vector element being permuted. */
3376 mask_element_type = lang_hooks.types.type_for_mode
3377 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3378 mask_type = get_vectype_for_scalar_type (mask_element_type);
3379 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3380 mask = XALLOCAVEC (unsigned char, nunits);
3381 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3383 /* Number of copies is determined by the final vectorization factor
3384 relatively to SLP_NODE_INSTANCE unrolling factor. */
3385 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3387 /* Generate permutation masks for every NODE. Number of masks for each NODE
3388 is equal to GROUP_SIZE.
3389 E.g., we have a group of three nodes with three loads from the same
3390 location in each node, and the vector size is 4. I.e., we have a
3391 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3392 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3393 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3396 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3397 The last mask is illegal since we assume two operands for permute
3398 operation, and the mask element values can't be outside that range.
3399 Hence, the last mask must be converted into {2,5,5,5}.
3400 For the first two permutations we need the first and the second input
3401 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3402 we need the second and the third vectors: {b1,c1,a2,b2} and
3403 {c2,a3,b3,c3}. */
3405 int vect_stmts_counter = 0;
3406 int index = 0;
3407 int first_vec_index = -1;
3408 int second_vec_index = -1;
3409 bool noop_p = true;
3411 for (int j = 0; j < unroll_factor; j++)
3413 for (int k = 0; k < group_size; k++)
3415 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3416 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3417 vec_index = i / nunits;
3418 mask_element = i % nunits;
3419 if (vec_index == first_vec_index
3420 || first_vec_index == -1)
3422 first_vec_index = vec_index;
3424 else if (vec_index == second_vec_index
3425 || second_vec_index == -1)
3427 second_vec_index = vec_index;
3428 mask_element += nunits;
3430 else
3432 if (dump_enabled_p ())
3434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3435 "permutation requires at "
3436 "least three vectors ");
3437 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3438 stmt, 0);
3440 return false;
3443 gcc_assert (mask_element >= 0
3444 && mask_element < 2 * nunits);
3445 if (mask_element != index)
3446 noop_p = false;
3447 mask[index++] = mask_element;
3449 if (index == nunits)
3451 if (! noop_p
3452 && ! can_vec_perm_p (mode, false, mask))
3454 if (dump_enabled_p ())
3456 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3457 vect_location,
3458 "unsupported vect permute { ");
3459 for (i = 0; i < nunits; ++i)
3460 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3461 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3463 return false;
3466 if (!analyze_only)
3468 tree mask_vec = NULL_TREE;
3470 if (! noop_p)
3472 tree *mask_elts = XALLOCAVEC (tree, nunits);
3473 for (int l = 0; l < nunits; ++l)
3474 mask_elts[l] = build_int_cst (mask_element_type,
3475 mask[l]);
3476 mask_vec = build_vector (mask_type, mask_elts);
3479 if (second_vec_index == -1)
3480 second_vec_index = first_vec_index;
3481 vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
3482 second_vec_index,
3483 gsi, node, vectype, dr_chain,
3484 ncopies, vect_stmts_counter++);
3487 index = 0;
3488 first_vec_index = -1;
3489 second_vec_index = -1;
3490 noop_p = true;
3495 return true;
3500 /* Vectorize SLP instance tree in postorder. */
3502 static bool
3503 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3504 unsigned int vectorization_factor)
3506 gimple *stmt;
3507 bool grouped_store, is_store;
3508 gimple_stmt_iterator si;
3509 stmt_vec_info stmt_info;
3510 unsigned int vec_stmts_size, nunits, group_size;
3511 tree vectype;
3512 int i, j;
3513 slp_tree child;
3515 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3516 return false;
3518 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3519 vect_schedule_slp_instance (child, instance, vectorization_factor);
3521 /* Push SLP node def-type to stmts. */
3522 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3523 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3524 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3525 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3527 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3528 stmt_info = vinfo_for_stmt (stmt);
3530 /* VECTYPE is the type of the destination. */
3531 vectype = STMT_VINFO_VECTYPE (stmt_info);
3532 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3533 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3535 /* For each SLP instance calculate number of vector stmts to be created
3536 for the scalar stmts in each node of the SLP tree. Number of vector
3537 elements in one vector iteration is the number of scalar elements in
3538 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3539 size.
3540 Unless this is a SLP reduction in which case the number of vector
3541 stmts is equal to the number of vector stmts of the children. */
3542 if (GROUP_FIRST_ELEMENT (stmt_info)
3543 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3544 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3545 else
3546 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3548 if (!SLP_TREE_VEC_STMTS (node).exists ())
3550 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3551 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3554 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_NOTE,vect_location,
3557 "------>vectorizing SLP node starting from: ");
3558 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3561 /* Vectorized stmts go before the last scalar stmt which is where
3562 all uses are ready. */
3563 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3565 /* Mark the first element of the reduction chain as reduction to properly
3566 transform the node. In the analysis phase only the last element of the
3567 chain is marked as reduction. */
3568 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3569 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3571 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3572 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3575 /* Handle two-operation SLP nodes by vectorizing the group with
3576 both operations and then performing a merge. */
3577 if (SLP_TREE_TWO_OPERATORS (node))
3579 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3580 enum tree_code ocode = ERROR_MARK;
3581 gimple *ostmt;
3582 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3583 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3584 if (gimple_assign_rhs_code (ostmt) != code0)
3586 mask[i] = 1;
3587 ocode = gimple_assign_rhs_code (ostmt);
3589 else
3590 mask[i] = 0;
3591 if (ocode != ERROR_MARK)
3593 vec<gimple *> v0;
3594 vec<gimple *> v1;
3595 unsigned j;
3596 tree tmask = NULL_TREE;
3597 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3598 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3599 SLP_TREE_VEC_STMTS (node).truncate (0);
3600 gimple_assign_set_rhs_code (stmt, ocode);
3601 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3602 gimple_assign_set_rhs_code (stmt, code0);
3603 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3604 SLP_TREE_VEC_STMTS (node).truncate (0);
3605 tree meltype = build_nonstandard_integer_type
3606 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3607 tree mvectype = get_same_sized_vectype (meltype, vectype);
3608 unsigned k = 0, l;
3609 for (j = 0; j < v0.length (); ++j)
3611 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3612 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3614 if (k >= group_size)
3615 k = 0;
3616 melts[l] = build_int_cst
3617 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3619 tmask = build_vector (mvectype, melts);
3621 /* ??? Not all targets support a VEC_PERM_EXPR with a
3622 constant mask that would translate to a vec_merge RTX
3623 (with their vec_perm_const_ok). We can either not
3624 vectorize in that case or let veclower do its job.
3625 Unfortunately that isn't too great and at least for
3626 plus/minus we'd eventually like to match targets
3627 vector addsub instructions. */
3628 gimple *vstmt;
3629 vstmt = gimple_build_assign (make_ssa_name (vectype),
3630 VEC_PERM_EXPR,
3631 gimple_assign_lhs (v0[j]),
3632 gimple_assign_lhs (v1[j]), tmask);
3633 vect_finish_stmt_generation (stmt, vstmt, &si);
3634 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3636 v0.release ();
3637 v1.release ();
3638 return false;
3641 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3643 /* Restore stmt def-types. */
3644 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3645 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3646 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3647 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3649 return is_store;
3652 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3653 For loop vectorization this is done in vectorizable_call, but for SLP
3654 it needs to be deferred until end of vect_schedule_slp, because multiple
3655 SLP instances may refer to the same scalar stmt. */
3657 static void
3658 vect_remove_slp_scalar_calls (slp_tree node)
3660 gimple *stmt, *new_stmt;
3661 gimple_stmt_iterator gsi;
3662 int i;
3663 slp_tree child;
3664 tree lhs;
3665 stmt_vec_info stmt_info;
3667 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3668 return;
3670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3671 vect_remove_slp_scalar_calls (child);
3673 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3675 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3676 continue;
3677 stmt_info = vinfo_for_stmt (stmt);
3678 if (stmt_info == NULL
3679 || is_pattern_stmt_p (stmt_info)
3680 || !PURE_SLP_STMT (stmt_info))
3681 continue;
3682 lhs = gimple_call_lhs (stmt);
3683 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3684 set_vinfo_for_stmt (new_stmt, stmt_info);
3685 set_vinfo_for_stmt (stmt, NULL);
3686 STMT_VINFO_STMT (stmt_info) = new_stmt;
3687 gsi = gsi_for_stmt (stmt);
3688 gsi_replace (&gsi, new_stmt, false);
3689 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3693 /* Generate vector code for all SLP instances in the loop/basic block. */
3695 bool
3696 vect_schedule_slp (vec_info *vinfo)
3698 vec<slp_instance> slp_instances;
3699 slp_instance instance;
3700 unsigned int i, vf;
3701 bool is_store = false;
3703 slp_instances = vinfo->slp_instances;
3704 if (is_a <loop_vec_info> (vinfo))
3705 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3706 else
3707 vf = 1;
3709 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3711 /* Schedule the tree of INSTANCE. */
3712 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3713 instance, vf);
3714 if (dump_enabled_p ())
3715 dump_printf_loc (MSG_NOTE, vect_location,
3716 "vectorizing stmts using SLP.\n");
3719 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3721 slp_tree root = SLP_INSTANCE_TREE (instance);
3722 gimple *store;
3723 unsigned int j;
3724 gimple_stmt_iterator gsi;
3726 /* Remove scalar call stmts. Do not do this for basic-block
3727 vectorization as not all uses may be vectorized.
3728 ??? Why should this be necessary? DCE should be able to
3729 remove the stmts itself.
3730 ??? For BB vectorization we can as well remove scalar
3731 stmts starting from the SLP tree root if they have no
3732 uses. */
3733 if (is_a <loop_vec_info> (vinfo))
3734 vect_remove_slp_scalar_calls (root);
3736 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3737 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3739 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3740 break;
3742 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3743 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3744 /* Free the attached stmt_vec_info and remove the stmt. */
3745 gsi = gsi_for_stmt (store);
3746 unlink_stmt_vdef (store);
3747 gsi_remove (&gsi, true);
3748 release_defs (store);
3749 free_stmt_vec_info (store);
3753 return is_store;