2018-05-25 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-slp.c
blob0a96a9379a26839e4feb16166744445d3cdb84b9
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
52 static void
53 vect_free_slp_tree (slp_tree node)
55 int i;
56 slp_tree child;
58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
59 vect_free_slp_tree (child);
61 gimple *stmt;
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt))
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
70 SLP_TREE_CHILDREN (node).release ();
71 SLP_TREE_SCALAR_STMTS (node).release ();
72 SLP_TREE_VEC_STMTS (node).release ();
73 SLP_TREE_LOAD_PERMUTATION (node).release ();
75 free (node);
79 /* Free the memory allocated for the SLP instance. */
81 void
82 vect_free_slp_instance (slp_instance instance)
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
85 SLP_INSTANCE_LOADS (instance).release ();
86 free (instance);
90 /* Create an SLP node for SCALAR_STMTS. */
92 static slp_tree
93 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
95 slp_tree node;
96 gimple *stmt = scalar_stmts[0];
97 unsigned int nops;
99 if (is_gimple_call (stmt))
100 nops = gimple_call_num_args (stmt);
101 else if (is_gimple_assign (stmt))
103 nops = gimple_num_ops (stmt) - 1;
104 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
105 nops++;
107 else if (gimple_code (stmt) == GIMPLE_PHI)
108 nops = 0;
109 else
110 return NULL;
112 node = XNEW (struct _slp_tree);
113 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
114 SLP_TREE_VEC_STMTS (node).create (0);
115 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
116 SLP_TREE_CHILDREN (node).create (nops);
117 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
118 SLP_TREE_TWO_OPERATORS (node) = false;
119 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
121 unsigned i;
122 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
125 return node;
129 /* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
131 node. */
132 typedef struct _slp_oprnd_info
134 /* Def-stmts for the operands. */
135 vec<gimple *> def_stmts;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
138 stmt. */
139 tree first_op_type;
140 enum vect_def_type first_dt;
141 bool first_pattern;
142 bool second_pattern;
143 } *slp_oprnd_info;
146 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 operand. */
148 static vec<slp_oprnd_info>
149 vect_create_oprnd_info (int nops, int group_size)
151 int i;
152 slp_oprnd_info oprnd_info;
153 vec<slp_oprnd_info> oprnds_info;
155 oprnds_info.create (nops);
156 for (i = 0; i < nops; i++)
158 oprnd_info = XNEW (struct _slp_oprnd_info);
159 oprnd_info->def_stmts.create (group_size);
160 oprnd_info->first_dt = vect_uninitialized_def;
161 oprnd_info->first_op_type = NULL_TREE;
162 oprnd_info->first_pattern = false;
163 oprnd_info->second_pattern = false;
164 oprnds_info.quick_push (oprnd_info);
167 return oprnds_info;
171 /* Free operands info. */
173 static void
174 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
176 int i;
177 slp_oprnd_info oprnd_info;
179 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
181 oprnd_info->def_stmts.release ();
182 XDELETE (oprnd_info);
185 oprnds_info.release ();
189 /* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
195 gimple *next_stmt = first_stmt;
196 int result = 0;
198 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
199 return -1;
203 if (next_stmt == stmt)
204 return result;
205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
206 if (next_stmt)
207 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
209 while (next_stmt);
211 return -1;
214 /* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
218 (if nonnull). */
220 bool
221 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
222 unsigned int *nvectors_out,
223 tree *vector_type_out,
224 tree *permutes)
226 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
227 poly_int64 nelts;
228 unsigned int nvectors = 1;
229 for (;;)
231 scalar_int_mode int_mode;
232 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
233 if (multiple_p (current_vector_size, elt_bytes, &nelts)
234 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
236 tree int_type = build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode), 1);
238 tree vector_type = build_vector_type (int_type, nelts);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
241 vec_perm_builder sel1 (nelts, 2, 3);
242 vec_perm_builder sel2 (nelts, 2, 3);
243 poly_int64 half_nelts = exact_div (nelts, 2);
244 for (unsigned int i = 0; i < 3; ++i)
246 sel1.quick_push (i);
247 sel1.quick_push (i + nelts);
248 sel2.quick_push (half_nelts + i);
249 sel2.quick_push (half_nelts + i + nelts);
251 vec_perm_indices indices1 (sel1, 2, nelts);
252 vec_perm_indices indices2 (sel2, 2, nelts);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
256 if (nvectors_out)
257 *nvectors_out = nvectors;
258 if (vector_type_out)
259 *vector_type_out = vector_type;
260 if (permutes)
262 permutes[0] = vect_gen_perm_mask_checked (vector_type,
263 indices1);
264 permutes[1] = vect_gen_perm_mask_checked (vector_type,
265 indices2);
267 return true;
271 if (!multiple_p (elt_bytes, 2, &elt_bytes))
272 return false;
273 nvectors *= 2;
277 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
285 value.
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
288 ok return 0. */
289 static int
290 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
291 vec<gimple *> stmts, unsigned stmt_num,
292 vec<slp_oprnd_info> *oprnds_info)
294 gimple *stmt = stmts[stmt_num];
295 tree oprnd;
296 unsigned int i, number_of_oprnds;
297 gimple *def_stmt;
298 enum vect_def_type dt = vect_uninitialized_def;
299 bool pattern = false;
300 slp_oprnd_info oprnd_info;
301 int first_op_idx = 1;
302 bool commutative = false;
303 bool first_op_cond = false;
304 bool first = stmt_num == 0;
305 bool second = stmt_num == 1;
307 if (is_gimple_call (stmt))
309 number_of_oprnds = gimple_call_num_args (stmt);
310 first_op_idx = 3;
312 else if (is_gimple_assign (stmt))
314 enum tree_code code = gimple_assign_rhs_code (stmt);
315 number_of_oprnds = gimple_num_ops (stmt) - 1;
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
318 if (gimple_assign_rhs_code (stmt) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
321 first_op_cond = true;
322 number_of_oprnds++;
324 else
325 commutative = commutative_tree_code (code);
327 else
328 return -1;
330 bool swapped = (*swap != 0);
331 gcc_assert (!swapped || first_op_cond);
332 for (i = 0; i < number_of_oprnds; i++)
334 again:
335 if (first_op_cond)
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map = maps[*swap];
341 if (i < 2)
342 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
343 else
344 oprnd = gimple_op (stmt, map[i]);
346 else
347 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
349 oprnd_info = (*oprnds_info)[i];
351 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
353 if (dump_enabled_p ())
355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
356 "Build SLP failed: can't analyze def for ");
357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
358 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
361 return -1;
364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
365 from the pattern. Check that all the stmts of the node are in the
366 pattern. */
367 if (def_stmt && gimple_bb (def_stmt)
368 && vect_stmt_in_region_p (vinfo, def_stmt)
369 && vinfo_for_stmt (def_stmt)
370 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
371 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
372 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
374 pattern = true;
375 if (!first && !oprnd_info->first_pattern
376 /* Allow different pattern state for the defs of the
377 first stmt in reduction chains. */
378 && (oprnd_info->first_dt != vect_reduction_def
379 || (!second && !oprnd_info->second_pattern)))
381 if (i == 0
382 && !swapped
383 && commutative)
385 swapped = true;
386 goto again;
389 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
392 "Build SLP failed: some of the stmts"
393 " are in a pattern, and others are not ");
394 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
395 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
398 return 1;
401 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
402 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
404 if (dt == vect_unknown_def_type)
406 if (dump_enabled_p ())
407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
408 "Unsupported pattern.\n");
409 return -1;
412 switch (gimple_code (def_stmt))
414 case GIMPLE_PHI:
415 case GIMPLE_ASSIGN:
416 break;
418 default:
419 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "unsupported defining stmt:\n");
422 return -1;
426 if (second)
427 oprnd_info->second_pattern = pattern;
429 if (first)
431 oprnd_info->first_dt = dt;
432 oprnd_info->first_pattern = pattern;
433 oprnd_info->first_op_type = TREE_TYPE (oprnd);
435 else
437 /* Not first stmt of the group, check that the def-stmt/s match
438 the def-stmt/s of the first stmt. Allow different definition
439 types for reduction chains: the first stmt must be a
440 vect_reduction_def (a phi node), and the rest
441 vect_internal_def. */
442 tree type = TREE_TYPE (oprnd);
443 if ((oprnd_info->first_dt != dt
444 && !(oprnd_info->first_dt == vect_reduction_def
445 && dt == vect_internal_def)
446 && !((oprnd_info->first_dt == vect_external_def
447 || oprnd_info->first_dt == vect_constant_def)
448 && (dt == vect_external_def
449 || dt == vect_constant_def)))
450 || !types_compatible_p (oprnd_info->first_op_type, type))
452 /* Try swapping operands if we got a mismatch. */
453 if (i == 0
454 && !swapped
455 && commutative)
457 swapped = true;
458 goto again;
461 if (dump_enabled_p ())
462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
463 "Build SLP failed: different types\n");
465 return 1;
467 if ((dt == vect_constant_def
468 || dt == vect_external_def)
469 && !current_vector_size.is_constant ()
470 && (TREE_CODE (type) == BOOLEAN_TYPE
471 || !can_duplicate_and_interleave_p (stmts.length (),
472 TYPE_MODE (type))))
474 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
477 "Build SLP failed: invalid type of def "
478 "for variable-length SLP ");
479 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
480 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
482 return -1;
486 /* Check the types of the definitions. */
487 switch (dt)
489 case vect_constant_def:
490 case vect_external_def:
491 break;
493 case vect_reduction_def:
494 case vect_induction_def:
495 case vect_internal_def:
496 oprnd_info->def_stmts.quick_push (def_stmt);
497 break;
499 default:
500 /* FORNOW: Not supported. */
501 if (dump_enabled_p ())
503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
504 "Build SLP failed: illegal type of def ");
505 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
506 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
509 return -1;
513 /* Swap operands. */
514 if (swapped)
516 /* If there are already uses of this stmt in a SLP instance then
517 we've committed to the operand order and can't swap it. */
518 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
520 if (dump_enabled_p ())
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523 "Build SLP failed: cannot swap operands of "
524 "shared stmt ");
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
527 return -1;
530 if (first_op_cond)
532 tree cond = gimple_assign_rhs1 (stmt);
533 enum tree_code code = TREE_CODE (cond);
535 /* Swap. */
536 if (*swap == 1)
538 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
539 &TREE_OPERAND (cond, 1));
540 TREE_SET_CODE (cond, swap_tree_comparison (code));
542 /* Invert. */
543 else
545 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
546 gimple_assign_rhs3_ptr (stmt));
547 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
548 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
549 gcc_assert (code != ERROR_MARK);
550 TREE_SET_CODE (cond, code);
553 else
554 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
555 gimple_assign_rhs2_ptr (stmt));
556 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "swapped operands to match def types in ");
560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
564 *swap = swapped;
565 return 0;
568 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
569 caller's attempt to find the vector type in STMT with the narrowest
570 element type. Return true if VECTYPE is nonnull and if it is valid
571 for VINFO. When returning true, update MAX_NUNITS to reflect the
572 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
573 as for vect_build_slp_tree. */
575 static bool
576 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
577 tree vectype, poly_uint64 *max_nunits)
579 if (!vectype)
581 if (dump_enabled_p ())
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unsupported data-type in ");
585 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
586 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
588 /* Fatal mismatch. */
589 return false;
592 /* If populating the vector type requires unrolling then fail
593 before adjusting *max_nunits for basic-block vectorization. */
594 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
595 unsigned HOST_WIDE_INT const_nunits;
596 if (is_a <bb_vec_info> (vinfo)
597 && (!nunits.is_constant (&const_nunits)
598 || const_nunits > group_size))
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
601 "Build SLP failed: unrolling required "
602 "in basic block SLP\n");
603 /* Fatal mismatch. */
604 return false;
607 /* In case of multiple types we need to detect the smallest type. */
608 vect_update_max_nunits (max_nunits, vectype);
609 return true;
612 /* STMTS is a group of GROUP_SIZE SLP statements in which some
613 statements do the same operation as the first statement and in which
614 the others do ALT_STMT_CODE. Return true if we can take one vector
615 of the first operation and one vector of the second and permute them
616 to get the required result. VECTYPE is the type of the vector that
617 would be permuted. */
619 static bool
620 vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
621 tree vectype, tree_code alt_stmt_code)
623 unsigned HOST_WIDE_INT count;
624 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
625 return false;
627 vec_perm_builder sel (count, count, 1);
628 for (unsigned int i = 0; i < count; ++i)
630 unsigned int elt = i;
631 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
632 elt += count;
633 sel.quick_push (elt);
635 vec_perm_indices indices (sel, 2, count);
636 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
639 /* Verify if the scalar stmts STMTS are isomorphic, require data
640 permutation or are of unsupported types of operation. Return
641 true if they are, otherwise return false and indicate in *MATCHES
642 which stmts are not isomorphic to the first one. If MATCHES[0]
643 is false then this indicates the comparison could not be
644 carried out or the stmts will never be vectorized by SLP.
646 Note COND_EXPR is possibly ismorphic to another one after swapping its
647 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
648 the first stmt by swapping the two operands of comparison; set SWAP[i]
649 to 2 if stmt I is isormorphic to the first stmt by inverting the code
650 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
651 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
653 static bool
654 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
655 vec<gimple *> stmts, unsigned int group_size,
656 unsigned nops, poly_uint64 *max_nunits,
657 bool *matches, bool *two_operators)
659 unsigned int i;
660 gimple *first_stmt = stmts[0], *stmt = stmts[0];
661 enum tree_code first_stmt_code = ERROR_MARK;
662 enum tree_code alt_stmt_code = ERROR_MARK;
663 enum tree_code rhs_code = ERROR_MARK;
664 enum tree_code first_cond_code = ERROR_MARK;
665 tree lhs;
666 bool need_same_oprnds = false;
667 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
668 optab optab;
669 int icode;
670 machine_mode optab_op2_mode;
671 machine_mode vec_mode;
672 gimple *first_load = NULL, *prev_first_load = NULL;
674 /* For every stmt in NODE find its def stmt/s. */
675 FOR_EACH_VEC_ELT (stmts, i, stmt)
677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
678 swap[i] = 0;
679 matches[i] = false;
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
687 /* Fail to vectorize statements marked as unvectorizable. */
688 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
693 "Build SLP failed: unvectorizable statement ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
696 /* Fatal mismatch. */
697 matches[0] = false;
698 return false;
701 lhs = gimple_get_lhs (stmt);
702 if (lhs == NULL_TREE)
704 if (dump_enabled_p ())
706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
707 "Build SLP failed: not GIMPLE_ASSIGN nor "
708 "GIMPLE_CALL ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
711 /* Fatal mismatch. */
712 matches[0] = false;
713 return false;
716 tree nunits_vectype;
717 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
718 &nunits_vectype)
719 || (nunits_vectype
720 && !vect_record_max_nunits (vinfo, stmt, group_size,
721 nunits_vectype, max_nunits)))
723 /* Fatal mismatch. */
724 matches[0] = false;
725 return false;
728 gcc_assert (vectype);
730 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
732 rhs_code = CALL_EXPR;
733 if (gimple_call_internal_p (call_stmt)
734 || gimple_call_tail_p (call_stmt)
735 || gimple_call_noreturn_p (call_stmt)
736 || !gimple_call_nothrow_p (call_stmt)
737 || gimple_call_chain (call_stmt))
739 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported call type ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 call_stmt, 0);
746 /* Fatal mismatch. */
747 matches[0] = false;
748 return false;
751 else
752 rhs_code = gimple_assign_rhs_code (stmt);
754 /* Check the operation. */
755 if (i == 0)
757 first_stmt_code = rhs_code;
759 /* Shift arguments should be equal in all the packed stmts for a
760 vector shift with scalar shift operand. */
761 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
762 || rhs_code == LROTATE_EXPR
763 || rhs_code == RROTATE_EXPR)
765 if (vectype == boolean_type_node)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "Build SLP failed: shift of a"
770 " boolean.\n");
771 /* Fatal mismatch. */
772 matches[0] = false;
773 return false;
776 vec_mode = TYPE_MODE (vectype);
778 /* First see if we have a vector/vector shift. */
779 optab = optab_for_tree_code (rhs_code, vectype,
780 optab_vector);
782 if (!optab
783 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
785 /* No vector/vector shift, try for a vector/scalar shift. */
786 optab = optab_for_tree_code (rhs_code, vectype,
787 optab_scalar);
789 if (!optab)
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
793 "Build SLP failed: no optab.\n");
794 /* Fatal mismatch. */
795 matches[0] = false;
796 return false;
798 icode = (int) optab_handler (optab, vec_mode);
799 if (icode == CODE_FOR_nothing)
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
803 "Build SLP failed: "
804 "op not supported by target.\n");
805 /* Fatal mismatch. */
806 matches[0] = false;
807 return false;
809 optab_op2_mode = insn_data[icode].operand[2].mode;
810 if (!VECTOR_MODE_P (optab_op2_mode))
812 need_same_oprnds = true;
813 first_op1 = gimple_assign_rhs2 (stmt);
817 else if (rhs_code == WIDEN_LSHIFT_EXPR)
819 need_same_oprnds = true;
820 first_op1 = gimple_assign_rhs2 (stmt);
823 else
825 if (first_stmt_code != rhs_code
826 && alt_stmt_code == ERROR_MARK)
827 alt_stmt_code = rhs_code;
828 if (first_stmt_code != rhs_code
829 && (first_stmt_code != IMAGPART_EXPR
830 || rhs_code != REALPART_EXPR)
831 && (first_stmt_code != REALPART_EXPR
832 || rhs_code != IMAGPART_EXPR)
833 /* Handle mismatches in plus/minus by computing both
834 and merging the results. */
835 && !((first_stmt_code == PLUS_EXPR
836 || first_stmt_code == MINUS_EXPR)
837 && (alt_stmt_code == PLUS_EXPR
838 || alt_stmt_code == MINUS_EXPR)
839 && rhs_code == alt_stmt_code)
840 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
841 && (first_stmt_code == ARRAY_REF
842 || first_stmt_code == BIT_FIELD_REF
843 || first_stmt_code == INDIRECT_REF
844 || first_stmt_code == COMPONENT_REF
845 || first_stmt_code == MEM_REF)))
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: different operation "
851 "in stmt ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
854 "original stmt ");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 first_stmt, 0);
858 /* Mismatch. */
859 continue;
862 if (need_same_oprnds
863 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
865 if (dump_enabled_p ())
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different shift "
869 "arguments in ");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
872 /* Mismatch. */
873 continue;
876 if (rhs_code == CALL_EXPR)
878 gimple *first_stmt = stmts[0];
879 if (gimple_call_num_args (stmt) != nops
880 || !operand_equal_p (gimple_call_fn (first_stmt),
881 gimple_call_fn (stmt), 0)
882 || gimple_call_fntype (first_stmt)
883 != gimple_call_fntype (stmt))
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "Build SLP failed: different calls in ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890 stmt, 0);
892 /* Mismatch. */
893 continue;
898 /* Grouped store or load. */
899 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
901 if (REFERENCE_CLASS_P (lhs))
903 /* Store. */
906 else
908 /* Load. */
909 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
910 if (prev_first_load)
912 /* Check that there are no loads from different interleaving
913 chains in the same node. */
914 if (prev_first_load != first_load)
916 if (dump_enabled_p ())
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
919 vect_location,
920 "Build SLP failed: different "
921 "interleaving chains in one node ");
922 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
923 stmt, 0);
925 /* Mismatch. */
926 continue;
929 else
930 prev_first_load = first_load;
932 } /* Grouped access. */
933 else
935 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
937 /* Not grouped load. */
938 if (dump_enabled_p ())
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
941 "Build SLP failed: not grouped load ");
942 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
945 /* FORNOW: Not grouped loads are not supported. */
946 /* Fatal mismatch. */
947 matches[0] = false;
948 return false;
951 /* Not memory operation. */
952 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
953 && TREE_CODE_CLASS (rhs_code) != tcc_unary
954 && TREE_CODE_CLASS (rhs_code) != tcc_expression
955 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
956 && rhs_code != CALL_EXPR)
958 if (dump_enabled_p ())
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
961 "Build SLP failed: operation");
962 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
963 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
965 /* Fatal mismatch. */
966 matches[0] = false;
967 return false;
970 if (rhs_code == COND_EXPR)
972 tree cond_expr = gimple_assign_rhs1 (stmt);
973 enum tree_code cond_code = TREE_CODE (cond_expr);
974 enum tree_code swap_code = ERROR_MARK;
975 enum tree_code invert_code = ERROR_MARK;
977 if (i == 0)
978 first_cond_code = TREE_CODE (cond_expr);
979 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
981 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
982 swap_code = swap_tree_comparison (cond_code);
983 invert_code = invert_tree_comparison (cond_code, honor_nans);
986 if (first_cond_code == cond_code)
988 /* Isomorphic can be achieved by swapping. */
989 else if (first_cond_code == swap_code)
990 swap[i] = 1;
991 /* Isomorphic can be achieved by inverting. */
992 else if (first_cond_code == invert_code)
993 swap[i] = 2;
994 else
996 if (dump_enabled_p ())
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
999 "Build SLP failed: different"
1000 " operation");
1001 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1002 stmt, 0);
1004 /* Mismatch. */
1005 continue;
1010 matches[i] = true;
1013 for (i = 0; i < group_size; ++i)
1014 if (!matches[i])
1015 return false;
1017 /* If we allowed a two-operation SLP node verify the target can cope
1018 with the permute we are going to use. */
1019 if (alt_stmt_code != ERROR_MARK
1020 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1022 if (vectype == boolean_type_node
1023 || !vect_two_operations_perm_ok_p (stmts, group_size,
1024 vectype, alt_stmt_code))
1026 for (i = 0; i < group_size; ++i)
1027 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1029 matches[i] = false;
1030 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1033 "Build SLP failed: different operation "
1034 "in stmt ");
1035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1036 stmts[i], 0);
1037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1038 "original stmt ");
1039 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1040 first_stmt, 0);
1043 return false;
1045 *two_operators = true;
1048 return true;
1051 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1052 Note we never remove apart from at destruction time so we do not
1053 need a special value for deleted that differs from empty. */
1054 struct bst_traits
1056 typedef vec <gimple *> value_type;
1057 typedef vec <gimple *> compare_type;
1058 static inline hashval_t hash (value_type);
1059 static inline bool equal (value_type existing, value_type candidate);
1060 static inline bool is_empty (value_type x) { return !x.exists (); }
1061 static inline bool is_deleted (value_type x) { return !x.exists (); }
1062 static inline void mark_empty (value_type &x) { x.release (); }
1063 static inline void mark_deleted (value_type &x) { x.release (); }
1064 static inline void remove (value_type &x) { x.release (); }
1066 inline hashval_t
1067 bst_traits::hash (value_type x)
1069 inchash::hash h;
1070 for (unsigned i = 0; i < x.length (); ++i)
1071 h.add_int (gimple_uid (x[i]));
1072 return h.end ();
1074 inline bool
1075 bst_traits::equal (value_type existing, value_type candidate)
1077 if (existing.length () != candidate.length ())
1078 return false;
1079 for (unsigned i = 0; i < existing.length (); ++i)
1080 if (existing[i] != candidate[i])
1081 return false;
1082 return true;
1085 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1086 static scalar_stmts_set_t *bst_fail;
1088 typedef hash_map <vec <gimple *>, slp_tree,
1089 simple_hashmap_traits <bst_traits, slp_tree> >
1090 scalar_stmts_to_slp_tree_map_t;
1092 static slp_tree
1093 vect_build_slp_tree_2 (vec_info *vinfo,
1094 vec<gimple *> stmts, unsigned int group_size,
1095 poly_uint64 *max_nunits,
1096 vec<slp_tree> *loads,
1097 bool *matches, unsigned *npermutes, unsigned *tree_size,
1098 unsigned max_tree_size);
1100 static slp_tree
1101 vect_build_slp_tree (vec_info *vinfo,
1102 vec<gimple *> stmts, unsigned int group_size,
1103 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1104 bool *matches, unsigned *npermutes, unsigned *tree_size,
1105 unsigned max_tree_size)
1107 if (bst_fail->contains (stmts))
1108 return NULL;
1109 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1110 loads, matches, npermutes, tree_size,
1111 max_tree_size);
1112 /* When SLP build fails for stmts record this, otherwise SLP build
1113 can be exponential in time when we allow to construct parts from
1114 scalars, see PR81723. */
1115 if (! res)
1117 vec <gimple *> x;
1118 x.create (stmts.length ());
1119 x.splice (stmts);
1120 bst_fail->add (x);
1122 return res;
1125 /* Recursively build an SLP tree starting from NODE.
1126 Fail (and return a value not equal to zero) if def-stmts are not
1127 isomorphic, require data permutation or are of unsupported types of
1128 operation. Otherwise, return 0.
1129 The value returned is the depth in the SLP tree where a mismatch
1130 was found. */
1132 static slp_tree
1133 vect_build_slp_tree_2 (vec_info *vinfo,
1134 vec<gimple *> stmts, unsigned int group_size,
1135 poly_uint64 *max_nunits,
1136 vec<slp_tree> *loads,
1137 bool *matches, unsigned *npermutes, unsigned *tree_size,
1138 unsigned max_tree_size)
1140 unsigned nops, i, this_tree_size = 0;
1141 poly_uint64 this_max_nunits = *max_nunits;
1142 gimple *stmt;
1143 slp_tree node;
1145 matches[0] = false;
1147 stmt = stmts[0];
1148 if (is_gimple_call (stmt))
1149 nops = gimple_call_num_args (stmt);
1150 else if (is_gimple_assign (stmt))
1152 nops = gimple_num_ops (stmt) - 1;
1153 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1154 nops++;
1156 else if (gimple_code (stmt) == GIMPLE_PHI)
1157 nops = 0;
1158 else
1159 return NULL;
1161 /* If the SLP node is a PHI (induction or reduction), terminate
1162 the recursion. */
1163 if (gimple_code (stmt) == GIMPLE_PHI)
1165 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1166 tree vectype = get_vectype_for_scalar_type (scalar_type);
1167 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1168 max_nunits))
1169 return NULL;
1171 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1172 /* Induction from different IVs is not supported. */
1173 if (def_type == vect_induction_def)
1175 FOR_EACH_VEC_ELT (stmts, i, stmt)
1176 if (stmt != stmts[0])
1177 return NULL;
1179 else
1181 /* Else def types have to match. */
1182 FOR_EACH_VEC_ELT (stmts, i, stmt)
1184 /* But for reduction chains only check on the first stmt. */
1185 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1186 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1187 continue;
1188 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1189 return NULL;
1192 node = vect_create_new_slp_node (stmts);
1193 return node;
1197 bool two_operators = false;
1198 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1199 if (!vect_build_slp_tree_1 (vinfo, swap,
1200 stmts, group_size, nops,
1201 &this_max_nunits, matches, &two_operators))
1202 return NULL;
1204 /* If the SLP node is a load, terminate the recursion. */
1205 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1206 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1208 *max_nunits = this_max_nunits;
1209 node = vect_create_new_slp_node (stmts);
1210 loads->safe_push (node);
1211 return node;
1214 /* Get at the operands, verifying they are compatible. */
1215 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1216 slp_oprnd_info oprnd_info;
1217 FOR_EACH_VEC_ELT (stmts, i, stmt)
1219 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1220 stmts, i, &oprnds_info);
1221 if (res != 0)
1222 matches[(res == -1) ? 0 : i] = false;
1223 if (!matches[0])
1224 break;
1226 for (i = 0; i < group_size; ++i)
1227 if (!matches[i])
1229 vect_free_oprnd_info (oprnds_info);
1230 return NULL;
1233 auto_vec<slp_tree, 4> children;
1234 auto_vec<slp_tree> this_loads;
1236 stmt = stmts[0];
1238 if (tree_size)
1239 max_tree_size -= *tree_size;
1241 /* Create SLP_TREE nodes for the definition node/s. */
1242 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1244 slp_tree child;
1245 unsigned old_nloads = this_loads.length ();
1246 unsigned old_tree_size = this_tree_size;
1247 unsigned int j;
1249 if (oprnd_info->first_dt != vect_internal_def
1250 && oprnd_info->first_dt != vect_reduction_def
1251 && oprnd_info->first_dt != vect_induction_def)
1252 continue;
1254 if (++this_tree_size > max_tree_size)
1256 FOR_EACH_VEC_ELT (children, j, child)
1257 vect_free_slp_tree (child);
1258 vect_free_oprnd_info (oprnds_info);
1259 return NULL;
1262 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1263 group_size, &this_max_nunits,
1264 &this_loads, matches, npermutes,
1265 &this_tree_size,
1266 max_tree_size)) != NULL)
1268 /* If we have all children of child built up from scalars then just
1269 throw that away and build it up this node from scalars. */
1270 if (!SLP_TREE_CHILDREN (child).is_empty ()
1271 /* ??? Rejecting patterns this way doesn't work. We'd have to
1272 do extra work to cancel the pattern so the uses see the
1273 scalar version. */
1274 && !is_pattern_stmt_p
1275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1277 slp_tree grandchild;
1279 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1280 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1281 break;
1282 if (!grandchild)
1284 /* Roll back. */
1285 this_loads.truncate (old_nloads);
1286 this_tree_size = old_tree_size;
1287 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1288 vect_free_slp_tree (grandchild);
1289 SLP_TREE_CHILDREN (child).truncate (0);
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "Building parent vector operands from "
1293 "scalars instead\n");
1294 oprnd_info->def_stmts = vNULL;
1295 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1296 children.safe_push (child);
1297 continue;
1301 oprnd_info->def_stmts = vNULL;
1302 children.safe_push (child);
1303 continue;
1306 /* If the SLP build failed fatally and we analyze a basic-block
1307 simply treat nodes we fail to build as externally defined
1308 (and thus build vectors from the scalar defs).
1309 The cost model will reject outright expensive cases.
1310 ??? This doesn't treat cases where permutation ultimatively
1311 fails (or we don't try permutation below). Ideally we'd
1312 even compute a permutation that will end up with the maximum
1313 SLP tree size... */
1314 if (is_a <bb_vec_info> (vinfo)
1315 && !matches[0]
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1318 scalar version. */
1319 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "Building vector operands from scalars\n");
1323 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1324 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1325 children.safe_push (child);
1326 oprnd_info->def_stmts = vNULL;
1327 continue;
1330 /* If the SLP build for operand zero failed and operand zero
1331 and one can be commutated try that for the scalar stmts
1332 that failed the match. */
1333 if (i == 0
1334 /* A first scalar stmt mismatch signals a fatal mismatch. */
1335 && matches[0]
1336 /* ??? For COND_EXPRs we can swap the comparison operands
1337 as well as the arms under some constraints. */
1338 && nops == 2
1339 && oprnds_info[1]->first_dt == vect_internal_def
1340 && is_gimple_assign (stmt)
1341 /* Do so only if the number of not successful permutes was nor more
1342 than a cut-ff as re-trying the recursive match on
1343 possibly each level of the tree would expose exponential
1344 behavior. */
1345 && *npermutes < 4)
1347 /* See whether we can swap the matching or the non-matching
1348 stmt operands. */
1349 bool swap_not_matching = true;
1352 for (j = 0; j < group_size; ++j)
1354 if (matches[j] != !swap_not_matching)
1355 continue;
1356 gimple *stmt = stmts[j];
1357 /* Verify if we can swap operands of this stmt. */
1358 if (!is_gimple_assign (stmt)
1359 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1361 if (!swap_not_matching)
1362 goto fail;
1363 swap_not_matching = false;
1364 break;
1366 /* Verify if we can safely swap or if we committed to a
1367 specific operand order already.
1368 ??? Instead of modifying GIMPLE stmts here we could
1369 record whether we want to swap operands in the SLP
1370 node and temporarily do that when processing it
1371 (or wrap operand accessors in a helper). */
1372 else if (swap[j] != 0
1373 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1375 if (!swap_not_matching)
1377 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1380 vect_location,
1381 "Build SLP failed: cannot swap "
1382 "operands of shared stmt ");
1383 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1384 TDF_SLIM, stmts[j], 0);
1386 goto fail;
1388 swap_not_matching = false;
1389 break;
1393 while (j != group_size);
1395 /* Swap mismatched definition stmts. */
1396 dump_printf_loc (MSG_NOTE, vect_location,
1397 "Re-trying with swapped operands of stmts ");
1398 for (j = 0; j < group_size; ++j)
1399 if (matches[j] == !swap_not_matching)
1401 std::swap (oprnds_info[0]->def_stmts[j],
1402 oprnds_info[1]->def_stmts[j]);
1403 dump_printf (MSG_NOTE, "%d ", j);
1405 dump_printf (MSG_NOTE, "\n");
1406 /* And try again with scratch 'matches' ... */
1407 bool *tem = XALLOCAVEC (bool, group_size);
1408 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1409 group_size, &this_max_nunits,
1410 &this_loads, tem, npermutes,
1411 &this_tree_size,
1412 max_tree_size)) != NULL)
1414 /* ... so if successful we can apply the operand swapping
1415 to the GIMPLE IL. This is necessary because for example
1416 vect_get_slp_defs uses operand indexes and thus expects
1417 canonical operand order. This is also necessary even
1418 if we end up building the operand from scalars as
1419 we'll continue to process swapped operand two. */
1420 for (j = 0; j < group_size; ++j)
1422 gimple *stmt = stmts[j];
1423 gimple_set_plf (stmt, GF_PLF_1, false);
1425 for (j = 0; j < group_size; ++j)
1427 gimple *stmt = stmts[j];
1428 if (matches[j] == !swap_not_matching)
1430 /* Avoid swapping operands twice. */
1431 if (gimple_plf (stmt, GF_PLF_1))
1432 continue;
1433 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1434 gimple_assign_rhs2_ptr (stmt));
1435 gimple_set_plf (stmt, GF_PLF_1, true);
1438 /* Verify we swap all duplicates or none. */
1439 if (flag_checking)
1440 for (j = 0; j < group_size; ++j)
1442 gimple *stmt = stmts[j];
1443 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1444 == (matches[j] == !swap_not_matching));
1447 /* If we have all children of child built up from scalars then
1448 just throw that away and build it up this node from scalars. */
1449 if (!SLP_TREE_CHILDREN (child).is_empty ()
1450 /* ??? Rejecting patterns this way doesn't work. We'd have
1451 to do extra work to cancel the pattern so the uses see the
1452 scalar version. */
1453 && !is_pattern_stmt_p
1454 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1456 unsigned int j;
1457 slp_tree grandchild;
1459 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1460 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1461 break;
1462 if (!grandchild)
1464 /* Roll back. */
1465 this_loads.truncate (old_nloads);
1466 this_tree_size = old_tree_size;
1467 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1468 vect_free_slp_tree (grandchild);
1469 SLP_TREE_CHILDREN (child).truncate (0);
1471 dump_printf_loc (MSG_NOTE, vect_location,
1472 "Building parent vector operands from "
1473 "scalars instead\n");
1474 oprnd_info->def_stmts = vNULL;
1475 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1476 children.safe_push (child);
1477 continue;
1481 oprnd_info->def_stmts = vNULL;
1482 children.safe_push (child);
1483 continue;
1486 ++*npermutes;
1489 fail:
1490 gcc_assert (child == NULL);
1491 FOR_EACH_VEC_ELT (children, j, child)
1492 vect_free_slp_tree (child);
1493 vect_free_oprnd_info (oprnds_info);
1494 return NULL;
1497 vect_free_oprnd_info (oprnds_info);
1499 if (tree_size)
1500 *tree_size += this_tree_size;
1501 *max_nunits = this_max_nunits;
1502 loads->safe_splice (this_loads);
1504 node = vect_create_new_slp_node (stmts);
1505 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1506 SLP_TREE_CHILDREN (node).splice (children);
1507 return node;
1510 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1512 static void
1513 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1515 int i;
1516 gimple *stmt;
1517 slp_tree child;
1519 dump_printf_loc (dump_kind, loc, "node%s\n",
1520 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1521 ? " (external)" : "");
1522 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1524 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1525 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1527 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1528 vect_print_slp_tree (dump_kind, loc, child);
1532 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1533 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1534 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1535 stmts in NODE are to be marked. */
1537 static void
1538 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1540 int i;
1541 gimple *stmt;
1542 slp_tree child;
1544 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1545 return;
1547 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1548 if (j < 0 || i == j)
1549 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1552 vect_mark_slp_stmts (child, mark, j);
1556 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1558 static void
1559 vect_mark_slp_stmts_relevant (slp_tree node)
1561 int i;
1562 gimple *stmt;
1563 stmt_vec_info stmt_info;
1564 slp_tree child;
1566 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1567 return;
1569 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1571 stmt_info = vinfo_for_stmt (stmt);
1572 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1573 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1574 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1577 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1578 vect_mark_slp_stmts_relevant (child);
1582 /* Rearrange the statements of NODE according to PERMUTATION. */
1584 static void
1585 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1586 vec<unsigned> permutation)
1588 gimple *stmt;
1589 vec<gimple *> tmp_stmts;
1590 unsigned int i;
1591 slp_tree child;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1594 vect_slp_rearrange_stmts (child, group_size, permutation);
1596 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1597 tmp_stmts.create (group_size);
1598 tmp_stmts.quick_grow_cleared (group_size);
1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1601 tmp_stmts[permutation[i]] = stmt;
1603 SLP_TREE_SCALAR_STMTS (node).release ();
1604 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1608 /* Attempt to reorder stmts in a reduction chain so that we don't
1609 require any load permutation. Return true if that was possible,
1610 otherwise return false. */
1612 static bool
1613 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1615 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1616 unsigned int i, j;
1617 unsigned int lidx;
1618 slp_tree node, load;
1620 /* Compare all the permutation sequences to the first one. We know
1621 that at least one load is permuted. */
1622 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1623 if (!node->load_permutation.exists ())
1624 return false;
1625 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1627 if (!load->load_permutation.exists ())
1628 return false;
1629 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1630 if (lidx != node->load_permutation[j])
1631 return false;
1634 /* Check that the loads in the first sequence are different and there
1635 are no gaps between them. */
1636 auto_sbitmap load_index (group_size);
1637 bitmap_clear (load_index);
1638 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1640 if (lidx >= group_size)
1641 return false;
1642 if (bitmap_bit_p (load_index, lidx))
1643 return false;
1645 bitmap_set_bit (load_index, lidx);
1647 for (i = 0; i < group_size; i++)
1648 if (!bitmap_bit_p (load_index, i))
1649 return false;
1651 /* This permutation is valid for reduction. Since the order of the
1652 statements in the nodes is not important unless they are memory
1653 accesses, we can rearrange the statements in all the nodes
1654 according to the order of the loads. */
1655 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1656 node->load_permutation);
1658 /* We are done, no actual permutations need to be generated. */
1659 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1660 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1662 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1663 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1664 /* But we have to keep those permutations that are required because
1665 of handling of gaps. */
1666 if (known_eq (unrolling_factor, 1U)
1667 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1668 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1669 SLP_TREE_LOAD_PERMUTATION (node).release ();
1670 else
1671 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1672 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1675 return true;
1678 /* Check if the required load permutations in the SLP instance
1679 SLP_INSTN are supported. */
1681 static bool
1682 vect_supported_load_permutation_p (slp_instance slp_instn)
1684 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1685 unsigned int i, j, k, next;
1686 slp_tree node;
1687 gimple *stmt, *load, *next_load;
1689 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1693 if (node->load_permutation.exists ())
1694 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1695 dump_printf (MSG_NOTE, "%d ", next);
1696 else
1697 for (k = 0; k < group_size; ++k)
1698 dump_printf (MSG_NOTE, "%d ", k);
1699 dump_printf (MSG_NOTE, "\n");
1702 /* In case of reduction every load permutation is allowed, since the order
1703 of the reduction statements is not important (as opposed to the case of
1704 grouped stores). The only condition we need to check is that all the
1705 load nodes are of the same size and have the same permutation (and then
1706 rearrange all the nodes of the SLP instance according to this
1707 permutation). */
1709 /* Check that all the load nodes are of the same size. */
1710 /* ??? Can't we assert this? */
1711 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1712 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1713 return false;
1715 node = SLP_INSTANCE_TREE (slp_instn);
1716 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1718 /* Reduction (there are no data-refs in the root).
1719 In reduction chain the order of the loads is not important. */
1720 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1721 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1722 vect_attempt_slp_rearrange_stmts (slp_instn);
1724 /* In basic block vectorization we allow any subchain of an interleaving
1725 chain.
1726 FORNOW: not supported in loop SLP because of realignment compications. */
1727 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1729 /* Check whether the loads in an instance form a subchain and thus
1730 no permutation is necessary. */
1731 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1733 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1734 continue;
1735 bool subchain_p = true;
1736 next_load = NULL;
1737 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1739 if (j != 0
1740 && (next_load != load
1741 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
1743 subchain_p = false;
1744 break;
1746 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1748 if (subchain_p)
1749 SLP_TREE_LOAD_PERMUTATION (node).release ();
1750 else
1752 stmt_vec_info group_info
1753 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1754 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
1755 unsigned HOST_WIDE_INT nunits;
1756 unsigned k, maxk = 0;
1757 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1758 if (k > maxk)
1759 maxk = k;
1760 /* In BB vectorization we may not actually use a loaded vector
1761 accessing elements in excess of DR_GROUP_SIZE. */
1762 tree vectype = STMT_VINFO_VECTYPE (group_info);
1763 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1764 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1767 "BB vectorization with gaps at the end of "
1768 "a load is not supported\n");
1769 return false;
1772 /* Verify the permutation can be generated. */
1773 vec<tree> tem;
1774 unsigned n_perms;
1775 if (!vect_transform_slp_perm_load (node, tem, NULL,
1776 1, slp_instn, true, &n_perms))
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1779 vect_location,
1780 "unsupported load permutation\n");
1781 return false;
1785 return true;
1788 /* For loop vectorization verify we can generate the permutation. Be
1789 conservative about the vectorization factor, there are permutations
1790 that will use three vector inputs only starting from a specific factor
1791 and the vectorization factor is not yet final.
1792 ??? The SLP instance unrolling factor might not be the maximum one. */
1793 unsigned n_perms;
1794 poly_uint64 test_vf
1795 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1796 LOOP_VINFO_VECT_FACTOR
1797 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1798 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1799 if (node->load_permutation.exists ()
1800 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1801 slp_instn, true, &n_perms))
1802 return false;
1804 return true;
1808 /* Find the last store in SLP INSTANCE. */
1810 gimple *
1811 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1813 gimple *last = NULL, *stmt;
1815 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1817 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1818 if (is_pattern_stmt_p (stmt_vinfo))
1819 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1820 else
1821 last = get_later_stmt (stmt, last);
1824 return last;
1827 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1828 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1829 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1830 containing the remainder.
1831 Return the first stmt in the second group. */
1833 static gimple *
1834 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1836 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1837 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1838 gcc_assert (group1_size > 0);
1839 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1840 gcc_assert (group2_size > 0);
1841 DR_GROUP_SIZE (first_vinfo) = group1_size;
1843 gimple *stmt = first_stmt;
1844 for (unsigned i = group1_size; i > 1; i--)
1846 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1847 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1849 /* STMT is now the last element of the first group. */
1850 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1851 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1853 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1854 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1856 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1857 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1860 /* For the second group, the DR_GROUP_GAP is that before the original group,
1861 plus skipping over the first vector. */
1862 DR_GROUP_GAP (vinfo_for_stmt (group2))
1863 = DR_GROUP_GAP (first_vinfo) + group1_size;
1865 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1866 DR_GROUP_GAP (first_vinfo) += group2_size;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1870 group1_size, group2_size);
1872 return group2;
1875 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1876 statements and a vector of NUNITS elements. */
1878 static poly_uint64
1879 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1881 return exact_div (common_multiple (nunits, group_size), group_size);
1884 /* Analyze an SLP instance starting from a group of grouped stores. Call
1885 vect_build_slp_tree to build a tree of packed stmts if possible.
1886 Return FALSE if it's impossible to SLP any stmt in the loop. */
1888 static bool
1889 vect_analyze_slp_instance (vec_info *vinfo,
1890 gimple *stmt, unsigned max_tree_size)
1892 slp_instance new_instance;
1893 slp_tree node;
1894 unsigned int group_size;
1895 tree vectype, scalar_type = NULL_TREE;
1896 gimple *next;
1897 unsigned int i;
1898 vec<slp_tree> loads;
1899 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1900 vec<gimple *> scalar_stmts;
1902 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1904 scalar_type = TREE_TYPE (DR_REF (dr));
1905 vectype = get_vectype_for_scalar_type (scalar_type);
1906 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1908 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1910 gcc_assert (is_a <loop_vec_info> (vinfo));
1911 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1912 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
1914 else
1916 gcc_assert (is_a <loop_vec_info> (vinfo));
1917 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1918 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1921 if (!vectype)
1923 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "Build SLP failed: unsupported data-type ");
1927 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1928 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1931 return false;
1933 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1935 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1936 scalar_stmts.create (group_size);
1937 next = stmt;
1938 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1940 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1941 while (next)
1943 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1944 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1945 scalar_stmts.safe_push (
1946 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1947 else
1948 scalar_stmts.safe_push (next);
1949 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1952 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1954 /* Collect the reduction stmts and store them in
1955 SLP_TREE_SCALAR_STMTS. */
1956 while (next)
1958 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1959 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1960 scalar_stmts.safe_push (
1961 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1962 else
1963 scalar_stmts.safe_push (next);
1964 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1966 /* Mark the first element of the reduction chain as reduction to properly
1967 transform the node. In the reduction analysis phase only the last
1968 element of the chain is marked as reduction. */
1969 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1971 else
1973 /* Collect reduction statements. */
1974 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1975 for (i = 0; reductions.iterate (i, &next); i++)
1976 scalar_stmts.safe_push (next);
1979 loads.create (group_size);
1981 /* Build the tree for the SLP instance. */
1982 bool *matches = XALLOCAVEC (bool, group_size);
1983 unsigned npermutes = 0;
1984 bst_fail = new scalar_stmts_set_t ();
1985 poly_uint64 max_nunits = nunits;
1986 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1987 &max_nunits, &loads, matches, &npermutes,
1988 NULL, max_tree_size);
1989 delete bst_fail;
1990 if (node != NULL)
1992 /* Calculate the unrolling factor based on the smallest type. */
1993 poly_uint64 unrolling_factor
1994 = calculate_unrolling_factor (max_nunits, group_size);
1996 if (maybe_ne (unrolling_factor, 1U)
1997 && is_a <bb_vec_info> (vinfo))
1999 unsigned HOST_WIDE_INT const_max_nunits;
2000 if (!max_nunits.is_constant (&const_max_nunits)
2001 || const_max_nunits > group_size)
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2005 "Build SLP failed: store group "
2006 "size not a multiple of the vector size "
2007 "in basic block SLP\n");
2008 vect_free_slp_tree (node);
2009 loads.release ();
2010 return false;
2012 /* Fatal mismatch. */
2013 matches[group_size / const_max_nunits * const_max_nunits] = false;
2014 vect_free_slp_tree (node);
2015 loads.release ();
2017 else
2019 /* Create a new SLP instance. */
2020 new_instance = XNEW (struct _slp_instance);
2021 SLP_INSTANCE_TREE (new_instance) = node;
2022 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2023 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2024 SLP_INSTANCE_LOADS (new_instance) = loads;
2026 /* Compute the load permutation. */
2027 slp_tree load_node;
2028 bool loads_permuted = false;
2029 FOR_EACH_VEC_ELT (loads, i, load_node)
2031 vec<unsigned> load_permutation;
2032 int j;
2033 gimple *load, *first_stmt;
2034 bool this_load_permuted = false;
2035 load_permutation.create (group_size);
2036 first_stmt = DR_GROUP_FIRST_ELEMENT
2037 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2040 int load_place = vect_get_place_in_interleaving_chain
2041 (load, first_stmt);
2042 gcc_assert (load_place != -1);
2043 if (load_place != j)
2044 this_load_permuted = true;
2045 load_permutation.safe_push (load_place);
2047 if (!this_load_permuted
2048 /* The load requires permutation when unrolling exposes
2049 a gap either because the group is larger than the SLP
2050 group-size or because there is a gap between the groups. */
2051 && (known_eq (unrolling_factor, 1U)
2052 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2053 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2055 load_permutation.release ();
2056 continue;
2058 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2059 loads_permuted = true;
2062 if (loads_permuted)
2064 if (!vect_supported_load_permutation_p (new_instance))
2066 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "Build SLP failed: unsupported load "
2070 "permutation ");
2071 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2072 TDF_SLIM, stmt, 0);
2074 vect_free_slp_instance (new_instance);
2075 return false;
2079 /* If the loads and stores can be handled with load/store-lan
2080 instructions do not generate this SLP instance. */
2081 if (is_a <loop_vec_info> (vinfo)
2082 && loads_permuted
2083 && dr && vect_store_lanes_supported (vectype, group_size, false))
2085 slp_tree load_node;
2086 FOR_EACH_VEC_ELT (loads, i, load_node)
2088 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
2089 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2090 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2091 /* Use SLP for strided accesses (or if we
2092 can't load-lanes). */
2093 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2094 || ! vect_load_lanes_supported
2095 (STMT_VINFO_VECTYPE (stmt_vinfo),
2096 DR_GROUP_SIZE (stmt_vinfo), false))
2097 break;
2099 if (i == loads.length ())
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2103 "Built SLP cancelled: can use "
2104 "load/store-lanes\n");
2105 vect_free_slp_instance (new_instance);
2106 return false;
2110 vinfo->slp_instances.safe_push (new_instance);
2112 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "Final SLP tree for instance:\n");
2116 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2119 return true;
2122 else
2124 /* Failed to SLP. */
2125 /* Free the allocated memory. */
2126 scalar_stmts.release ();
2127 loads.release ();
2130 /* For basic block SLP, try to break the group up into multiples of the
2131 vector size. */
2132 unsigned HOST_WIDE_INT const_nunits;
2133 if (is_a <bb_vec_info> (vinfo)
2134 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2135 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2136 && nunits.is_constant (&const_nunits))
2138 /* We consider breaking the group only on VF boundaries from the existing
2139 start. */
2140 for (i = 0; i < group_size; i++)
2141 if (!matches[i]) break;
2143 if (i >= const_nunits && i < group_size)
2145 /* Split into two groups at the first vector boundary before i. */
2146 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2147 unsigned group1_size = i & ~(const_nunits - 1);
2149 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2150 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2151 /* If the first non-match was in the middle of a vector,
2152 skip the rest of that vector. */
2153 if (group1_size < i)
2155 i = group1_size + const_nunits;
2156 if (i < group_size)
2157 rest = vect_split_slp_store_group (rest, const_nunits);
2159 if (i < group_size)
2160 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2161 return res;
2163 /* Even though the first vector did not all match, we might be able to SLP
2164 (some) of the remainder. FORNOW ignore this possibility. */
2167 return false;
2171 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2172 trees of packed scalar stmts if SLP is possible. */
2174 bool
2175 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2177 unsigned int i;
2178 gimple *first_element;
2180 if (dump_enabled_p ())
2181 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2183 /* Find SLP sequences starting from groups of grouped stores. */
2184 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2185 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2187 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2189 if (loop_vinfo->reduction_chains.length () > 0)
2191 /* Find SLP sequences starting from reduction chains. */
2192 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2193 if (! vect_analyze_slp_instance (vinfo, first_element,
2194 max_tree_size))
2196 /* Dissolve reduction chain group. */
2197 gimple *next, *stmt = first_element;
2198 while (stmt)
2200 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2201 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2202 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2203 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2204 stmt = next;
2206 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2207 = vect_internal_def;
2211 /* Find SLP sequences starting from groups of reductions. */
2212 if (loop_vinfo->reductions.length () > 1)
2213 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2214 max_tree_size);
2217 return true;
2221 /* For each possible SLP instance decide whether to SLP it and calculate overall
2222 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2223 least one instance. */
2225 bool
2226 vect_make_slp_decision (loop_vec_info loop_vinfo)
2228 unsigned int i;
2229 poly_uint64 unrolling_factor = 1;
2230 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2231 slp_instance instance;
2232 int decided_to_slp = 0;
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2236 "\n");
2238 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2240 /* FORNOW: SLP if you can. */
2241 /* All unroll factors have the form current_vector_size * X for some
2242 rational X, so they must have a common multiple. */
2243 unrolling_factor
2244 = force_common_multiple (unrolling_factor,
2245 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2247 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2248 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2249 loop-based vectorization. Such stmts will be marked as HYBRID. */
2250 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2251 decided_to_slp++;
2254 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2256 if (decided_to_slp && dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "Decided to SLP %d instances. Unrolling factor ",
2260 decided_to_slp);
2261 dump_dec (MSG_NOTE, unrolling_factor);
2262 dump_printf (MSG_NOTE, "\n");
2265 return (decided_to_slp > 0);
2269 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2270 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2272 static void
2273 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2275 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2276 imm_use_iterator imm_iter;
2277 gimple *use_stmt;
2278 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2279 slp_tree child;
2280 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2282 int j;
2284 /* Propagate hybrid down the SLP tree. */
2285 if (stype == hybrid)
2287 else if (HYBRID_SLP_STMT (stmt_vinfo))
2288 stype = hybrid;
2289 else
2291 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2292 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2293 /* If we get a pattern stmt here we have to use the LHS of the
2294 original stmt for immediate uses. */
2295 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2296 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2297 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2298 tree def;
2299 if (gimple_code (stmt) == GIMPLE_PHI)
2300 def = gimple_phi_result (stmt);
2301 else
2302 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2303 if (def)
2304 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2306 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2307 continue;
2308 use_vinfo = vinfo_for_stmt (use_stmt);
2309 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2310 && STMT_VINFO_RELATED_STMT (use_vinfo))
2311 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2312 if (!STMT_SLP_TYPE (use_vinfo)
2313 && (STMT_VINFO_RELEVANT (use_vinfo)
2314 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2315 && !(gimple_code (use_stmt) == GIMPLE_PHI
2316 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2318 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2321 "def in non-SLP stmt: ");
2322 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2324 stype = hybrid;
2329 if (stype == hybrid
2330 && !HYBRID_SLP_STMT (stmt_vinfo))
2332 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2337 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2341 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2342 vect_detect_hybrid_slp_stmts (child, i, stype);
2345 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2347 static tree
2348 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2350 walk_stmt_info *wi = (walk_stmt_info *)data;
2351 struct loop *loopp = (struct loop *)wi->info;
2353 if (wi->is_lhs)
2354 return NULL_TREE;
2356 if (TREE_CODE (*tp) == SSA_NAME
2357 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2359 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2360 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2361 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2363 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2366 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2368 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2372 return NULL_TREE;
2375 static tree
2376 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2377 walk_stmt_info *)
2379 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2380 /* If the stmt is in a SLP instance then this isn't a reason
2381 to mark use definitions in other SLP instances as hybrid. */
2382 if (! STMT_SLP_TYPE (use_vinfo)
2383 && (STMT_VINFO_RELEVANT (use_vinfo)
2384 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2385 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2386 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2388 else
2389 *handled = true;
2390 return NULL_TREE;
2393 /* Find stmts that must be both vectorized and SLPed. */
2395 void
2396 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2398 unsigned int i;
2399 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2400 slp_instance instance;
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2404 "\n");
2406 /* First walk all pattern stmt in the loop and mark defs of uses as
2407 hybrid because immediate uses in them are not recorded. */
2408 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2410 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2411 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2412 gsi_next (&gsi))
2414 gimple *stmt = gsi_stmt (gsi);
2415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2416 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2418 walk_stmt_info wi;
2419 memset (&wi, 0, sizeof (wi));
2420 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2421 gimple_stmt_iterator gsi2
2422 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2423 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2424 vect_detect_hybrid_slp_1, &wi);
2425 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2426 vect_detect_hybrid_slp_2,
2427 vect_detect_hybrid_slp_1, &wi);
2432 /* Then walk the SLP instance trees marking stmts with uses in
2433 non-SLP stmts as hybrid, also propagating hybrid down the
2434 SLP tree, collecting the above info on-the-fly. */
2435 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2437 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2438 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2439 i, pure_slp);
2444 /* Initialize a bb_vec_info struct for the statements between
2445 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2447 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2448 gimple_stmt_iterator region_end_in)
2449 : vec_info (vec_info::bb, init_cost (NULL)),
2450 bb (gsi_bb (region_begin_in)),
2451 region_begin (region_begin_in),
2452 region_end (region_end_in)
2454 gimple_stmt_iterator gsi;
2456 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2457 gsi_next (&gsi))
2459 gimple *stmt = gsi_stmt (gsi);
2460 gimple_set_uid (stmt, 0);
2461 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2464 bb->aux = this;
2468 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2469 stmts in the basic block. */
2471 _bb_vec_info::~_bb_vec_info ()
2473 for (gimple_stmt_iterator si = region_begin;
2474 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2476 gimple *stmt = gsi_stmt (si);
2477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2479 if (stmt_info)
2480 /* Free stmt_vec_info. */
2481 free_stmt_vec_info (stmt);
2483 /* Reset region marker. */
2484 gimple_set_uid (stmt, -1);
2487 bb->aux = NULL;
2490 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2491 given then that child nodes have already been processed, and that
2492 their def types currently match their SLP node's def type. */
2494 static bool
2495 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2496 slp_instance node_instance,
2497 stmt_vector_for_cost *cost_vec)
2499 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2500 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2501 gcc_assert (stmt_info);
2502 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2504 /* For BB vectorization vector types are assigned here.
2505 Memory accesses already got their vector type assigned
2506 in vect_analyze_data_refs. */
2507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2508 if (bb_vinfo
2509 && ! STMT_VINFO_DATA_REF (stmt_info))
2511 tree vectype, nunits_vectype;
2512 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2513 &nunits_vectype))
2514 /* We checked this when building the node. */
2515 gcc_unreachable ();
2516 if (vectype == boolean_type_node)
2518 vectype = vect_get_mask_type_for_stmt (stmt_info);
2519 if (!vectype)
2520 /* vect_get_mask_type_for_stmt has already explained the
2521 failure. */
2522 return false;
2525 gimple *sstmt;
2526 unsigned int i;
2527 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2528 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2531 /* Calculate the number of vector statements to be created for the
2532 scalar stmts in this node. For SLP reductions it is equal to the
2533 number of vector statements in the children (which has already been
2534 calculated by the recursive call). Otherwise it is the number of
2535 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2536 VF divided by the number of elements in a vector. */
2537 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2538 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2539 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2540 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2541 else
2543 poly_uint64 vf;
2544 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2545 vf = loop_vinfo->vectorization_factor;
2546 else
2547 vf = 1;
2548 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2551 = vect_get_num_vectors (vf * group_size, vectype);
2554 bool dummy;
2555 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2558 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2559 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2561 Return true if the operations are supported. */
2563 static bool
2564 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2565 slp_instance node_instance,
2566 scalar_stmts_to_slp_tree_map_t *visited,
2567 scalar_stmts_to_slp_tree_map_t *lvisited,
2568 stmt_vector_for_cost *cost_vec)
2570 int i, j;
2571 slp_tree child;
2573 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2574 return true;
2576 /* If we already analyzed the exact same set of scalar stmts we're done.
2577 We share the generated vector stmts for those. */
2578 slp_tree *leader;
2579 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2580 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2582 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2583 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2584 return true;
2587 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2588 doesn't result in any issue since we throw away the lvisited set
2589 when we fail. */
2590 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2592 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2593 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2594 visited, lvisited, cost_vec))
2595 return false;
2597 /* Push SLP node def-type to stmt operands. */
2598 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2599 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2600 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2601 = SLP_TREE_DEF_TYPE (child);
2602 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2603 cost_vec);
2604 /* Restore def-types. */
2605 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2606 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2607 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2608 = vect_internal_def;
2609 if (! res)
2610 return false;
2612 return true;
2616 /* Analyze statements in SLP instances of VINFO. Return true if the
2617 operations are supported. */
2619 bool
2620 vect_slp_analyze_operations (vec_info *vinfo)
2622 slp_instance instance;
2623 int i;
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_NOTE, vect_location,
2627 "=== vect_slp_analyze_operations ===\n");
2629 scalar_stmts_to_slp_tree_map_t *visited
2630 = new scalar_stmts_to_slp_tree_map_t ();
2631 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2633 scalar_stmts_to_slp_tree_map_t lvisited;
2634 stmt_vector_for_cost cost_vec;
2635 cost_vec.create (2);
2636 if (!vect_slp_analyze_node_operations (vinfo,
2637 SLP_INSTANCE_TREE (instance),
2638 instance, visited, &lvisited,
2639 &cost_vec))
2641 dump_printf_loc (MSG_NOTE, vect_location,
2642 "removing SLP instance operations starting from: ");
2643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2644 SLP_TREE_SCALAR_STMTS
2645 (SLP_INSTANCE_TREE (instance))[0], 0);
2646 vect_free_slp_instance (instance);
2647 vinfo->slp_instances.ordered_remove (i);
2648 cost_vec.release ();
2650 else
2652 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2653 x != lvisited.end(); ++x)
2654 visited->put ((*x).first.copy (), (*x).second);
2655 i++;
2657 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2658 cost_vec.release ();
2661 delete visited;
2663 return !vinfo->slp_instances.is_empty ();
2667 /* Compute the scalar cost of the SLP node NODE and its children
2668 and return it. Do not account defs that are marked in LIFE and
2669 update LIFE according to uses of NODE. */
2671 static void
2672 vect_bb_slp_scalar_cost (basic_block bb,
2673 slp_tree node, vec<bool, va_heap> *life,
2674 stmt_vector_for_cost *cost_vec)
2676 unsigned i;
2677 gimple *stmt;
2678 slp_tree child;
2680 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2682 ssa_op_iter op_iter;
2683 def_operand_p def_p;
2684 stmt_vec_info stmt_info;
2686 if ((*life)[i])
2687 continue;
2689 /* If there is a non-vectorized use of the defs then the scalar
2690 stmt is kept live in which case we do not account it or any
2691 required defs in the SLP children in the scalar cost. This
2692 way we make the vectorization more costly when compared to
2693 the scalar cost. */
2694 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2696 imm_use_iterator use_iter;
2697 gimple *use_stmt;
2698 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2699 if (!is_gimple_debug (use_stmt)
2700 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2701 use_stmt)
2702 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2704 (*life)[i] = true;
2705 BREAK_FROM_IMM_USE_STMT (use_iter);
2708 if ((*life)[i])
2709 continue;
2711 /* Count scalar stmts only once. */
2712 if (gimple_visited_p (stmt))
2713 continue;
2714 gimple_set_visited (stmt, true);
2716 stmt_info = vinfo_for_stmt (stmt);
2717 vect_cost_for_stmt kind;
2718 if (STMT_VINFO_DATA_REF (stmt_info))
2720 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2721 kind = scalar_load;
2722 else
2723 kind = scalar_store;
2725 else
2726 kind = scalar_stmt;
2727 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2730 auto_vec<bool, 20> subtree_life;
2731 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2733 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2735 /* Do not directly pass LIFE to the recursive call, copy it to
2736 confine changes in the callee to the current child/subtree. */
2737 subtree_life.safe_splice (*life);
2738 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2739 subtree_life.truncate (0);
2744 /* Check if vectorization of the basic block is profitable. */
2746 static bool
2747 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2749 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2750 slp_instance instance;
2751 int i;
2752 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2753 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2755 /* Calculate scalar cost. */
2756 stmt_vector_for_cost scalar_costs;
2757 scalar_costs.create (0);
2758 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2760 auto_vec<bool, 20> life;
2761 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2762 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2763 SLP_INSTANCE_TREE (instance),
2764 &life, &scalar_costs);
2766 void *target_cost_data = init_cost (NULL);
2767 add_stmt_costs (target_cost_data, &scalar_costs);
2768 scalar_costs.release ();
2769 unsigned dummy;
2770 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2771 destroy_cost_data (target_cost_data);
2773 /* Unset visited flag. */
2774 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2775 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2776 gimple_set_visited (gsi_stmt (gsi), false);
2778 /* Complete the target-specific cost calculation. */
2779 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2780 &vec_inside_cost, &vec_epilogue_cost);
2782 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2784 if (dump_enabled_p ())
2786 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2787 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2788 vec_inside_cost);
2789 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2790 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2791 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2794 /* Vectorization is profitable if its cost is more than the cost of scalar
2795 version. Note that we err on the vector side for equal cost because
2796 the cost estimate is otherwise quite pessimistic (constant uses are
2797 free on the scalar side but cost a load on the vector side for
2798 example). */
2799 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2800 return false;
2802 return true;
2805 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2806 if so and sets fatal to true if failure is independent of
2807 current_vector_size. */
2809 static bb_vec_info
2810 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2811 gimple_stmt_iterator region_end,
2812 vec<data_reference_p> datarefs, int n_stmts,
2813 bool &fatal)
2815 bb_vec_info bb_vinfo;
2816 slp_instance instance;
2817 int i;
2818 poly_uint64 min_vf = 2;
2820 /* The first group of checks is independent of the vector size. */
2821 fatal = true;
2823 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2825 if (dump_enabled_p ())
2826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2827 "not vectorized: too many instructions in "
2828 "basic block.\n");
2829 free_data_refs (datarefs);
2830 return NULL;
2833 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2834 if (!bb_vinfo)
2835 return NULL;
2837 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2839 /* Analyze the data references. */
2841 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2843 if (dump_enabled_p ())
2844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2845 "not vectorized: unhandled data-ref in basic "
2846 "block.\n");
2848 delete bb_vinfo;
2849 return NULL;
2852 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2856 "not vectorized: not enough data-refs in "
2857 "basic block.\n");
2859 delete bb_vinfo;
2860 return NULL;
2863 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2865 if (dump_enabled_p ())
2866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2867 "not vectorized: unhandled data access in "
2868 "basic block.\n");
2870 delete bb_vinfo;
2871 return NULL;
2874 /* If there are no grouped stores in the region there is no need
2875 to continue with pattern recog as vect_analyze_slp will fail
2876 anyway. */
2877 if (bb_vinfo->grouped_stores.is_empty ())
2879 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2881 "not vectorized: no grouped stores in "
2882 "basic block.\n");
2884 delete bb_vinfo;
2885 return NULL;
2888 /* While the rest of the analysis below depends on it in some way. */
2889 fatal = false;
2891 vect_pattern_recog (bb_vinfo);
2893 /* Check the SLP opportunities in the basic block, analyze and build SLP
2894 trees. */
2895 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2897 if (dump_enabled_p ())
2899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2900 "Failed to SLP the basic block.\n");
2901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2902 "not vectorized: failed to find SLP opportunities "
2903 "in basic block.\n");
2906 delete bb_vinfo;
2907 return NULL;
2910 vect_record_base_alignments (bb_vinfo);
2912 /* Analyze and verify the alignment of data references and the
2913 dependence in the SLP instances. */
2914 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2916 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2917 || ! vect_slp_analyze_instance_dependence (instance))
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "removing SLP instance operations starting from: ");
2921 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2922 SLP_TREE_SCALAR_STMTS
2923 (SLP_INSTANCE_TREE (instance))[0], 0);
2924 vect_free_slp_instance (instance);
2925 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2926 continue;
2929 /* Mark all the statements that we want to vectorize as pure SLP and
2930 relevant. */
2931 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2932 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2934 i++;
2936 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2938 delete bb_vinfo;
2939 return NULL;
2942 if (!vect_slp_analyze_operations (bb_vinfo))
2944 if (dump_enabled_p ())
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2946 "not vectorized: bad operation in basic block.\n");
2948 delete bb_vinfo;
2949 return NULL;
2952 /* Cost model: check if the vectorization is worthwhile. */
2953 if (!unlimited_cost_model (NULL)
2954 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "not vectorized: vectorization is not "
2959 "profitable.\n");
2961 delete bb_vinfo;
2962 return NULL;
2965 if (dump_enabled_p ())
2966 dump_printf_loc (MSG_NOTE, vect_location,
2967 "Basic block will be vectorized using SLP\n");
2969 return bb_vinfo;
2973 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2974 true if anything in the basic-block was vectorized. */
2976 bool
2977 vect_slp_bb (basic_block bb)
2979 bb_vec_info bb_vinfo;
2980 gimple_stmt_iterator gsi;
2981 bool any_vectorized = false;
2982 auto_vector_sizes vector_sizes;
2984 if (dump_enabled_p ())
2985 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2987 /* Autodetect first vector size we try. */
2988 current_vector_size = 0;
2989 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2990 unsigned int next_size = 0;
2992 gsi = gsi_start_bb (bb);
2994 poly_uint64 autodetected_vector_size = 0;
2995 while (1)
2997 if (gsi_end_p (gsi))
2998 break;
3000 gimple_stmt_iterator region_begin = gsi;
3001 vec<data_reference_p> datarefs = vNULL;
3002 int insns = 0;
3004 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3006 gimple *stmt = gsi_stmt (gsi);
3007 if (is_gimple_debug (stmt))
3008 continue;
3009 insns++;
3011 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3012 vect_location = gimple_location (stmt);
3014 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3015 break;
3018 /* Skip leading unhandled stmts. */
3019 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3021 gsi_next (&gsi);
3022 continue;
3025 gimple_stmt_iterator region_end = gsi;
3027 bool vectorized = false;
3028 bool fatal = false;
3029 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3030 datarefs, insns, fatal);
3031 if (bb_vinfo
3032 && dbg_cnt (vect_slp))
3034 if (dump_enabled_p ())
3035 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3037 vect_schedule_slp (bb_vinfo);
3039 if (dump_enabled_p ())
3040 dump_printf_loc (MSG_NOTE, vect_location,
3041 "basic block part vectorized\n");
3043 vectorized = true;
3045 delete bb_vinfo;
3047 any_vectorized |= vectorized;
3049 if (next_size == 0)
3050 autodetected_vector_size = current_vector_size;
3052 if (next_size < vector_sizes.length ()
3053 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3054 next_size += 1;
3056 if (vectorized
3057 || next_size == vector_sizes.length ()
3058 || known_eq (current_vector_size, 0U)
3059 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3060 vector sizes will fail do not bother iterating. */
3061 || fatal)
3063 if (gsi_end_p (region_end))
3064 break;
3066 /* Skip the unhandled stmt. */
3067 gsi_next (&gsi);
3069 /* And reset vector sizes. */
3070 current_vector_size = 0;
3071 next_size = 0;
3073 else
3075 /* Try the next biggest vector size. */
3076 current_vector_size = vector_sizes[next_size++];
3077 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_NOTE, vect_location,
3080 "***** Re-trying analysis with "
3081 "vector size ");
3082 dump_dec (MSG_NOTE, current_vector_size);
3083 dump_printf (MSG_NOTE, "\n");
3086 /* Start over. */
3087 gsi = region_begin;
3091 return any_vectorized;
3095 /* Return 1 if vector type of boolean constant which is OPNUM
3096 operand in statement STMT is a boolean vector. */
3098 static bool
3099 vect_mask_constant_operand_p (gimple *stmt, int opnum)
3101 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3102 enum tree_code code = gimple_expr_code (stmt);
3103 tree op, vectype;
3104 gimple *def_stmt;
3105 enum vect_def_type dt;
3107 /* For comparison and COND_EXPR type is chosen depending
3108 on the other comparison operand. */
3109 if (TREE_CODE_CLASS (code) == tcc_comparison)
3111 if (opnum)
3112 op = gimple_assign_rhs1 (stmt);
3113 else
3114 op = gimple_assign_rhs2 (stmt);
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3117 &dt, &vectype))
3118 gcc_unreachable ();
3120 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3123 if (code == COND_EXPR)
3125 tree cond = gimple_assign_rhs1 (stmt);
3127 if (TREE_CODE (cond) == SSA_NAME)
3128 op = cond;
3129 else if (opnum)
3130 op = TREE_OPERAND (cond, 1);
3131 else
3132 op = TREE_OPERAND (cond, 0);
3134 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3135 &dt, &vectype))
3136 gcc_unreachable ();
3138 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3141 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3144 /* Build a variable-length vector in which the elements in ELTS are repeated
3145 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3146 RESULTS and add any new instructions to SEQ.
3148 The approach we use is:
3150 (1) Find a vector mode VM with integer elements of mode IM.
3152 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3153 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3154 from small vectors to IM.
3156 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3158 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3159 correct byte contents.
3161 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3163 We try to find the largest IM for which this sequence works, in order
3164 to cut down on the number of interleaves. */
3166 void
3167 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3168 unsigned int nresults, vec<tree> &results)
3170 unsigned int nelts = elts.length ();
3171 tree element_type = TREE_TYPE (vector_type);
3173 /* (1) Find a vector mode VM with integer elements of mode IM. */
3174 unsigned int nvectors = 1;
3175 tree new_vector_type;
3176 tree permutes[2];
3177 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3178 &nvectors, &new_vector_type,
3179 permutes))
3180 gcc_unreachable ();
3182 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3183 unsigned int partial_nelts = nelts / nvectors;
3184 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3186 tree_vector_builder partial_elts;
3187 auto_vec<tree, 32> pieces (nvectors * 2);
3188 pieces.quick_grow (nvectors * 2);
3189 for (unsigned int i = 0; i < nvectors; ++i)
3191 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3192 ELTS' has mode IM. */
3193 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3194 for (unsigned int j = 0; j < partial_nelts; ++j)
3195 partial_elts.quick_push (elts[i * partial_nelts + j]);
3196 tree t = gimple_build_vector (seq, &partial_elts);
3197 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3198 TREE_TYPE (new_vector_type), t);
3200 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3201 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3204 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3205 correct byte contents.
3207 We need to repeat the following operation log2(nvectors) times:
3209 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3210 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3212 However, if each input repeats every N elements and the VF is
3213 a multiple of N * 2, the HI result is the same as the LO. */
3214 unsigned int in_start = 0;
3215 unsigned int out_start = nvectors;
3216 unsigned int hi_start = nvectors / 2;
3217 /* A bound on the number of outputs needed to produce NRESULTS results
3218 in the final iteration. */
3219 unsigned int noutputs_bound = nvectors * nresults;
3220 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3222 noutputs_bound /= 2;
3223 unsigned int limit = MIN (noutputs_bound, nvectors);
3224 for (unsigned int i = 0; i < limit; ++i)
3226 if ((i & 1) != 0
3227 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3228 2 * in_repeat))
3230 pieces[out_start + i] = pieces[out_start + i - 1];
3231 continue;
3234 tree output = make_ssa_name (new_vector_type);
3235 tree input1 = pieces[in_start + (i / 2)];
3236 tree input2 = pieces[in_start + (i / 2) + hi_start];
3237 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3238 input1, input2,
3239 permutes[i & 1]);
3240 gimple_seq_add_stmt (seq, stmt);
3241 pieces[out_start + i] = output;
3243 std::swap (in_start, out_start);
3246 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3247 results.reserve (nresults);
3248 for (unsigned int i = 0; i < nresults; ++i)
3249 if (i < nvectors)
3250 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3251 pieces[in_start + i]));
3252 else
3253 results.quick_push (results[i - nvectors]);
3257 /* For constant and loop invariant defs of SLP_NODE this function returns
3258 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3259 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3260 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3261 REDUC_INDEX is the index of the reduction operand in the statements, unless
3262 it is -1. */
3264 static void
3265 vect_get_constant_vectors (tree op, slp_tree slp_node,
3266 vec<tree> *vec_oprnds,
3267 unsigned int op_num, unsigned int number_of_vectors)
3269 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3270 gimple *stmt = stmts[0];
3271 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3272 unsigned HOST_WIDE_INT nunits;
3273 tree vec_cst;
3274 unsigned j, number_of_places_left_in_vector;
3275 tree vector_type;
3276 tree vop;
3277 int group_size = stmts.length ();
3278 unsigned int vec_num, i;
3279 unsigned number_of_copies = 1;
3280 vec<tree> voprnds;
3281 voprnds.create (number_of_vectors);
3282 bool constant_p, is_store;
3283 tree neutral_op = NULL;
3284 enum tree_code code = gimple_expr_code (stmt);
3285 gimple_seq ctor_seq = NULL;
3286 auto_vec<tree, 16> permute_results;
3288 /* Check if vector type is a boolean vector. */
3289 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3290 && vect_mask_constant_operand_p (stmt, op_num))
3291 vector_type
3292 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3293 else
3294 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3296 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3298 is_store = true;
3299 op = gimple_assign_rhs1 (stmt);
3301 else
3302 is_store = false;
3304 gcc_assert (op);
3306 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3307 created vectors. It is greater than 1 if unrolling is performed.
3309 For example, we have two scalar operands, s1 and s2 (e.g., group of
3310 strided accesses of size two), while NUNITS is four (i.e., four scalars
3311 of this type can be packed in a vector). The output vector will contain
3312 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3313 will be 2).
3315 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3316 containing the operands.
3318 For example, NUNITS is four as before, and the group size is 8
3319 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3320 {s5, s6, s7, s8}. */
3322 /* When using duplicate_and_interleave, we just need one element for
3323 each scalar statement. */
3324 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3325 nunits = group_size;
3327 number_of_copies = nunits * number_of_vectors / group_size;
3329 number_of_places_left_in_vector = nunits;
3330 constant_p = true;
3331 tree_vector_builder elts (vector_type, nunits, 1);
3332 elts.quick_grow (nunits);
3333 bool place_after_defs = false;
3334 for (j = 0; j < number_of_copies; j++)
3336 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3338 if (is_store)
3339 op = gimple_assign_rhs1 (stmt);
3340 else
3342 switch (code)
3344 case COND_EXPR:
3346 tree cond = gimple_assign_rhs1 (stmt);
3347 if (TREE_CODE (cond) == SSA_NAME)
3348 op = gimple_op (stmt, op_num + 1);
3349 else if (op_num == 0 || op_num == 1)
3350 op = TREE_OPERAND (cond, op_num);
3351 else
3353 if (op_num == 2)
3354 op = gimple_assign_rhs2 (stmt);
3355 else
3356 op = gimple_assign_rhs3 (stmt);
3359 break;
3361 case CALL_EXPR:
3362 op = gimple_call_arg (stmt, op_num);
3363 break;
3365 case LSHIFT_EXPR:
3366 case RSHIFT_EXPR:
3367 case LROTATE_EXPR:
3368 case RROTATE_EXPR:
3369 op = gimple_op (stmt, op_num + 1);
3370 /* Unlike the other binary operators, shifts/rotates have
3371 the shift count being int, instead of the same type as
3372 the lhs, so make sure the scalar is the right type if
3373 we are dealing with vectors of
3374 long long/long/short/char. */
3375 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3376 op = fold_convert (TREE_TYPE (vector_type), op);
3377 break;
3379 default:
3380 op = gimple_op (stmt, op_num + 1);
3381 break;
3385 /* Create 'vect_ = {op0,op1,...,opn}'. */
3386 number_of_places_left_in_vector--;
3387 tree orig_op = op;
3388 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3390 if (CONSTANT_CLASS_P (op))
3392 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3394 /* Can't use VIEW_CONVERT_EXPR for booleans because
3395 of possibly different sizes of scalar value and
3396 vector element. */
3397 if (integer_zerop (op))
3398 op = build_int_cst (TREE_TYPE (vector_type), 0);
3399 else if (integer_onep (op))
3400 op = build_all_ones_cst (TREE_TYPE (vector_type));
3401 else
3402 gcc_unreachable ();
3404 else
3405 op = fold_unary (VIEW_CONVERT_EXPR,
3406 TREE_TYPE (vector_type), op);
3407 gcc_assert (op && CONSTANT_CLASS_P (op));
3409 else
3411 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3412 gimple *init_stmt;
3413 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3415 tree true_val
3416 = build_all_ones_cst (TREE_TYPE (vector_type));
3417 tree false_val
3418 = build_zero_cst (TREE_TYPE (vector_type));
3419 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3420 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3421 op, true_val,
3422 false_val);
3424 else
3426 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3427 op);
3428 init_stmt
3429 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3430 op);
3432 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3433 op = new_temp;
3436 elts[number_of_places_left_in_vector] = op;
3437 if (!CONSTANT_CLASS_P (op))
3438 constant_p = false;
3439 if (TREE_CODE (orig_op) == SSA_NAME
3440 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3441 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3442 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3443 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3444 place_after_defs = true;
3446 if (number_of_places_left_in_vector == 0)
3448 if (constant_p
3449 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3450 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3451 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3452 else
3454 if (vec_oprnds->is_empty ())
3455 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3456 number_of_vectors,
3457 permute_results);
3458 vec_cst = permute_results[number_of_vectors - j - 1];
3460 tree init;
3461 gimple_stmt_iterator gsi;
3462 if (place_after_defs)
3464 gsi = gsi_for_stmt
3465 (vect_find_last_scalar_stmt_in_slp (slp_node));
3466 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3468 else
3469 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3470 if (ctor_seq != NULL)
3472 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3473 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3474 ctor_seq = NULL;
3476 voprnds.quick_push (init);
3477 place_after_defs = false;
3478 number_of_places_left_in_vector = nunits;
3479 constant_p = true;
3480 elts.new_vector (vector_type, nunits, 1);
3481 elts.quick_grow (nunits);
3486 /* Since the vectors are created in the reverse order, we should invert
3487 them. */
3488 vec_num = voprnds.length ();
3489 for (j = vec_num; j != 0; j--)
3491 vop = voprnds[j - 1];
3492 vec_oprnds->quick_push (vop);
3495 voprnds.release ();
3497 /* In case that VF is greater than the unrolling factor needed for the SLP
3498 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3499 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3500 to replicate the vectors. */
3501 while (number_of_vectors > vec_oprnds->length ())
3503 tree neutral_vec = NULL;
3505 if (neutral_op)
3507 if (!neutral_vec)
3508 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3510 vec_oprnds->quick_push (neutral_vec);
3512 else
3514 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3515 vec_oprnds->quick_push (vop);
3521 /* Get vectorized definitions from SLP_NODE that contains corresponding
3522 vectorized def-stmts. */
3524 static void
3525 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3527 tree vec_oprnd;
3528 gimple *vec_def_stmt;
3529 unsigned int i;
3531 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3533 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3535 gcc_assert (vec_def_stmt);
3536 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3537 vec_oprnd = gimple_phi_result (vec_def_stmt);
3538 else
3539 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3540 vec_oprnds->quick_push (vec_oprnd);
3545 /* Get vectorized definitions for SLP_NODE.
3546 If the scalar definitions are loop invariants or constants, collect them and
3547 call vect_get_constant_vectors() to create vector stmts.
3548 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3549 must be stored in the corresponding child of SLP_NODE, and we call
3550 vect_get_slp_vect_defs () to retrieve them. */
3552 void
3553 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3554 vec<vec<tree> > *vec_oprnds)
3556 gimple *first_stmt;
3557 int number_of_vects = 0, i;
3558 unsigned int child_index = 0;
3559 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3560 slp_tree child = NULL;
3561 vec<tree> vec_defs;
3562 tree oprnd;
3563 bool vectorized_defs;
3565 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3566 FOR_EACH_VEC_ELT (ops, i, oprnd)
3568 /* For each operand we check if it has vectorized definitions in a child
3569 node or we need to create them (for invariants and constants). We
3570 check if the LHS of the first stmt of the next child matches OPRND.
3571 If it does, we found the correct child. Otherwise, we call
3572 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3573 to check this child node for the next operand. */
3574 vectorized_defs = false;
3575 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3577 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3579 /* We have to check both pattern and original def, if available. */
3580 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3582 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3583 gimple *related
3584 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3585 tree first_def_op;
3587 if (gimple_code (first_def) == GIMPLE_PHI)
3588 first_def_op = gimple_phi_result (first_def);
3589 else
3590 first_def_op = gimple_get_lhs (first_def);
3591 if (operand_equal_p (oprnd, first_def_op, 0)
3592 || (related
3593 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3595 /* The number of vector defs is determined by the number of
3596 vector statements in the node from which we get those
3597 statements. */
3598 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3599 vectorized_defs = true;
3600 child_index++;
3603 else
3604 child_index++;
3607 if (!vectorized_defs)
3609 if (i == 0)
3611 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3612 /* Number of vector stmts was calculated according to LHS in
3613 vect_schedule_slp_instance (), fix it by replacing LHS with
3614 RHS, if necessary. See vect_get_smallest_scalar_type () for
3615 details. */
3616 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3617 &rhs_size_unit);
3618 if (rhs_size_unit != lhs_size_unit)
3620 number_of_vects *= rhs_size_unit;
3621 number_of_vects /= lhs_size_unit;
3626 /* Allocate memory for vectorized defs. */
3627 vec_defs = vNULL;
3628 vec_defs.create (number_of_vects);
3630 /* For reduction defs we call vect_get_constant_vectors (), since we are
3631 looking for initial loop invariant values. */
3632 if (vectorized_defs)
3633 /* The defs are already vectorized. */
3634 vect_get_slp_vect_defs (child, &vec_defs);
3635 else
3636 /* Build vectors from scalar defs. */
3637 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3638 number_of_vects);
3640 vec_oprnds->quick_push (vec_defs);
3644 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3645 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3646 permute statements for the SLP node NODE of the SLP instance
3647 SLP_NODE_INSTANCE. */
3649 bool
3650 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3651 gimple_stmt_iterator *gsi, poly_uint64 vf,
3652 slp_instance slp_node_instance, bool analyze_only,
3653 unsigned *n_perms)
3655 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3656 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3657 tree mask_element_type = NULL_TREE, mask_type;
3658 int vec_index = 0;
3659 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3660 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3661 unsigned int mask_element;
3662 machine_mode mode;
3663 unsigned HOST_WIDE_INT nunits, const_vf;
3665 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3666 return false;
3668 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
3670 mode = TYPE_MODE (vectype);
3672 /* At the moment, all permutations are represented using per-element
3673 indices, so we can't cope with variable vector lengths or
3674 vectorization factors. */
3675 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3676 || !vf.is_constant (&const_vf))
3677 return false;
3679 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3680 same size as the vector element being permuted. */
3681 mask_element_type = lang_hooks.types.type_for_mode
3682 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3683 mask_type = get_vectype_for_scalar_type (mask_element_type);
3684 vec_perm_builder mask (nunits, nunits, 1);
3685 mask.quick_grow (nunits);
3686 vec_perm_indices indices;
3688 /* Initialize the vect stmts of NODE to properly insert the generated
3689 stmts later. */
3690 if (! analyze_only)
3691 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3692 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3693 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3695 /* Generate permutation masks for every NODE. Number of masks for each NODE
3696 is equal to GROUP_SIZE.
3697 E.g., we have a group of three nodes with three loads from the same
3698 location in each node, and the vector size is 4. I.e., we have a
3699 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3700 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3701 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3704 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3705 The last mask is illegal since we assume two operands for permute
3706 operation, and the mask element values can't be outside that range.
3707 Hence, the last mask must be converted into {2,5,5,5}.
3708 For the first two permutations we need the first and the second input
3709 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3710 we need the second and the third vectors: {b1,c1,a2,b2} and
3711 {c2,a3,b3,c3}. */
3713 int vect_stmts_counter = 0;
3714 unsigned int index = 0;
3715 int first_vec_index = -1;
3716 int second_vec_index = -1;
3717 bool noop_p = true;
3718 *n_perms = 0;
3720 for (unsigned int j = 0; j < const_vf; j++)
3722 for (int k = 0; k < group_size; k++)
3724 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3725 + j * DR_GROUP_SIZE (stmt_info));
3726 vec_index = i / nunits;
3727 mask_element = i % nunits;
3728 if (vec_index == first_vec_index
3729 || first_vec_index == -1)
3731 first_vec_index = vec_index;
3733 else if (vec_index == second_vec_index
3734 || second_vec_index == -1)
3736 second_vec_index = vec_index;
3737 mask_element += nunits;
3739 else
3741 if (dump_enabled_p ())
3743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3744 "permutation requires at "
3745 "least three vectors ");
3746 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3747 stmt, 0);
3749 gcc_assert (analyze_only);
3750 return false;
3753 gcc_assert (mask_element < 2 * nunits);
3754 if (mask_element != index)
3755 noop_p = false;
3756 mask[index++] = mask_element;
3758 if (index == nunits && !noop_p)
3760 indices.new_vector (mask, 2, nunits);
3761 if (!can_vec_perm_const_p (mode, indices))
3763 if (dump_enabled_p ())
3765 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3766 vect_location,
3767 "unsupported vect permute { ");
3768 for (i = 0; i < nunits; ++i)
3770 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3771 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3773 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3775 gcc_assert (analyze_only);
3776 return false;
3779 ++*n_perms;
3782 if (index == nunits)
3784 if (!analyze_only)
3786 tree mask_vec = NULL_TREE;
3788 if (! noop_p)
3789 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
3791 if (second_vec_index == -1)
3792 second_vec_index = first_vec_index;
3794 /* Generate the permute statement if necessary. */
3795 tree first_vec = dr_chain[first_vec_index];
3796 tree second_vec = dr_chain[second_vec_index];
3797 gimple *perm_stmt;
3798 if (! noop_p)
3800 tree perm_dest
3801 = vect_create_destination_var (gimple_assign_lhs (stmt),
3802 vectype);
3803 perm_dest = make_ssa_name (perm_dest);
3804 perm_stmt = gimple_build_assign (perm_dest,
3805 VEC_PERM_EXPR,
3806 first_vec, second_vec,
3807 mask_vec);
3808 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3810 else
3811 /* If mask was NULL_TREE generate the requested
3812 identity transform. */
3813 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3815 /* Store the vector statement in NODE. */
3816 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3819 index = 0;
3820 first_vec_index = -1;
3821 second_vec_index = -1;
3822 noop_p = true;
3827 return true;
3830 /* Vectorize SLP instance tree in postorder. */
3832 static bool
3833 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3834 scalar_stmts_to_slp_tree_map_t *bst_map)
3836 gimple *stmt;
3837 bool grouped_store, is_store;
3838 gimple_stmt_iterator si;
3839 stmt_vec_info stmt_info;
3840 unsigned int group_size;
3841 tree vectype;
3842 int i, j;
3843 slp_tree child;
3845 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3846 return false;
3848 /* See if we have already vectorized the same set of stmts and reuse their
3849 vectorized stmts. */
3850 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3852 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3853 return false;
3856 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3857 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3858 vect_schedule_slp_instance (child, instance, bst_map);
3860 /* Push SLP node def-type to stmts. */
3861 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3862 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3864 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3866 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3867 stmt_info = vinfo_for_stmt (stmt);
3869 /* VECTYPE is the type of the destination. */
3870 vectype = STMT_VINFO_VECTYPE (stmt_info);
3871 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3872 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3874 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3875 if (!SLP_TREE_VEC_STMTS (node).exists ())
3876 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3878 if (dump_enabled_p ())
3880 dump_printf_loc (MSG_NOTE,vect_location,
3881 "------>vectorizing SLP node starting from: ");
3882 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3885 /* Vectorized stmts go before the last scalar stmt which is where
3886 all uses are ready. */
3887 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3889 /* Mark the first element of the reduction chain as reduction to properly
3890 transform the node. In the analysis phase only the last element of the
3891 chain is marked as reduction. */
3892 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3893 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3894 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3896 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3897 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3900 /* Handle two-operation SLP nodes by vectorizing the group with
3901 both operations and then performing a merge. */
3902 if (SLP_TREE_TWO_OPERATORS (node))
3904 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3905 enum tree_code ocode = ERROR_MARK;
3906 gimple *ostmt;
3907 vec_perm_builder mask (group_size, group_size, 1);
3908 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3909 if (gimple_assign_rhs_code (ostmt) != code0)
3911 mask.quick_push (1);
3912 ocode = gimple_assign_rhs_code (ostmt);
3914 else
3915 mask.quick_push (0);
3916 if (ocode != ERROR_MARK)
3918 vec<gimple *> v0;
3919 vec<gimple *> v1;
3920 unsigned j;
3921 tree tmask = NULL_TREE;
3922 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3923 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3924 SLP_TREE_VEC_STMTS (node).truncate (0);
3925 gimple_assign_set_rhs_code (stmt, ocode);
3926 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3927 gimple_assign_set_rhs_code (stmt, code0);
3928 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3929 SLP_TREE_VEC_STMTS (node).truncate (0);
3930 tree meltype = build_nonstandard_integer_type
3931 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3932 tree mvectype = get_same_sized_vectype (meltype, vectype);
3933 unsigned k = 0, l;
3934 for (j = 0; j < v0.length (); ++j)
3936 /* Enforced by vect_build_slp_tree, which rejects variable-length
3937 vectors for SLP_TREE_TWO_OPERATORS. */
3938 unsigned int const_nunits = nunits.to_constant ();
3939 tree_vector_builder melts (mvectype, const_nunits, 1);
3940 for (l = 0; l < const_nunits; ++l)
3942 if (k >= group_size)
3943 k = 0;
3944 tree t = build_int_cst (meltype,
3945 mask[k++] * const_nunits + l);
3946 melts.quick_push (t);
3948 tmask = melts.build ();
3950 /* ??? Not all targets support a VEC_PERM_EXPR with a
3951 constant mask that would translate to a vec_merge RTX
3952 (with their vec_perm_const_ok). We can either not
3953 vectorize in that case or let veclower do its job.
3954 Unfortunately that isn't too great and at least for
3955 plus/minus we'd eventually like to match targets
3956 vector addsub instructions. */
3957 gimple *vstmt;
3958 vstmt = gimple_build_assign (make_ssa_name (vectype),
3959 VEC_PERM_EXPR,
3960 gimple_assign_lhs (v0[j]),
3961 gimple_assign_lhs (v1[j]), tmask);
3962 vect_finish_stmt_generation (stmt, vstmt, &si);
3963 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3965 v0.release ();
3966 v1.release ();
3967 return false;
3970 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3972 /* Restore stmt def-types. */
3973 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3974 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3975 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3976 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3978 return is_store;
3981 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3982 For loop vectorization this is done in vectorizable_call, but for SLP
3983 it needs to be deferred until end of vect_schedule_slp, because multiple
3984 SLP instances may refer to the same scalar stmt. */
3986 static void
3987 vect_remove_slp_scalar_calls (slp_tree node)
3989 gimple *stmt, *new_stmt;
3990 gimple_stmt_iterator gsi;
3991 int i;
3992 slp_tree child;
3993 tree lhs;
3994 stmt_vec_info stmt_info;
3996 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3997 return;
3999 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4000 vect_remove_slp_scalar_calls (child);
4002 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
4004 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4005 continue;
4006 stmt_info = vinfo_for_stmt (stmt);
4007 if (stmt_info == NULL
4008 || is_pattern_stmt_p (stmt_info)
4009 || !PURE_SLP_STMT (stmt_info))
4010 continue;
4011 lhs = gimple_call_lhs (stmt);
4012 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4013 set_vinfo_for_stmt (new_stmt, stmt_info);
4014 set_vinfo_for_stmt (stmt, NULL);
4015 STMT_VINFO_STMT (stmt_info) = new_stmt;
4016 gsi = gsi_for_stmt (stmt);
4017 gsi_replace (&gsi, new_stmt, false);
4018 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4022 /* Generate vector code for all SLP instances in the loop/basic block. */
4024 bool
4025 vect_schedule_slp (vec_info *vinfo)
4027 vec<slp_instance> slp_instances;
4028 slp_instance instance;
4029 unsigned int i;
4030 bool is_store = false;
4033 scalar_stmts_to_slp_tree_map_t *bst_map
4034 = new scalar_stmts_to_slp_tree_map_t ();
4035 slp_instances = vinfo->slp_instances;
4036 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4038 /* Schedule the tree of INSTANCE. */
4039 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4040 instance, bst_map);
4041 if (dump_enabled_p ())
4042 dump_printf_loc (MSG_NOTE, vect_location,
4043 "vectorizing stmts using SLP.\n");
4045 delete bst_map;
4047 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4049 slp_tree root = SLP_INSTANCE_TREE (instance);
4050 gimple *store;
4051 unsigned int j;
4052 gimple_stmt_iterator gsi;
4054 /* Remove scalar call stmts. Do not do this for basic-block
4055 vectorization as not all uses may be vectorized.
4056 ??? Why should this be necessary? DCE should be able to
4057 remove the stmts itself.
4058 ??? For BB vectorization we can as well remove scalar
4059 stmts starting from the SLP tree root if they have no
4060 uses. */
4061 if (is_a <loop_vec_info> (vinfo))
4062 vect_remove_slp_scalar_calls (root);
4064 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
4065 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4067 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4068 break;
4070 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4071 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
4072 /* Free the attached stmt_vec_info and remove the stmt. */
4073 gsi = gsi_for_stmt (store);
4074 unlink_stmt_vdef (store);
4075 gsi_remove (&gsi, true);
4076 release_defs (store);
4077 free_stmt_vec_info (store);
4081 return is_store;