typeck.c (cp_build_function_call_vec): When mark_used fails unconditionally return...
[official-gcc.git] / gcc / tree-vect-slp.c
blob2a1e5b83e53ae43c6491c5864b1649c37ad46e03
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 if (--node->refcnt != 0)
61 return;
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
64 vect_free_slp_tree (child, final_p);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
70 if (!final_p)
72 stmt_vec_info stmt_info;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
80 SLP_TREE_CHILDREN (node).release ();
81 SLP_TREE_SCALAR_STMTS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
85 free (node);
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
101 /* Create an SLP node for SCALAR_STMTS. */
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_VEC_STMTS (node).create (0);
126 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
127 SLP_TREE_CHILDREN (node).create (nops);
128 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
129 SLP_TREE_TWO_OPERATORS (node) = false;
130 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
131 node->refcnt = 1;
133 unsigned i;
134 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
135 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
137 return node;
141 /* This structure is used in creation of an SLP tree. Each instance
142 corresponds to the same operand in a group of scalar stmts in an SLP
143 node. */
144 typedef struct _slp_oprnd_info
146 /* Def-stmts for the operands. */
147 vec<stmt_vec_info> def_stmts;
148 /* Information about the first statement, its vector def-type, type, the
149 operand itself in case it's constant, and an indication if it's a pattern
150 stmt. */
151 tree first_op_type;
152 enum vect_def_type first_dt;
153 bool first_pattern;
154 bool second_pattern;
155 } *slp_oprnd_info;
158 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
159 operand. */
160 static vec<slp_oprnd_info>
161 vect_create_oprnd_info (int nops, int group_size)
163 int i;
164 slp_oprnd_info oprnd_info;
165 vec<slp_oprnd_info> oprnds_info;
167 oprnds_info.create (nops);
168 for (i = 0; i < nops; i++)
170 oprnd_info = XNEW (struct _slp_oprnd_info);
171 oprnd_info->def_stmts.create (group_size);
172 oprnd_info->first_dt = vect_uninitialized_def;
173 oprnd_info->first_op_type = NULL_TREE;
174 oprnd_info->first_pattern = false;
175 oprnd_info->second_pattern = false;
176 oprnds_info.quick_push (oprnd_info);
179 return oprnds_info;
183 /* Free operands info. */
185 static void
186 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
188 int i;
189 slp_oprnd_info oprnd_info;
191 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
193 oprnd_info->def_stmts.release ();
194 XDELETE (oprnd_info);
197 oprnds_info.release ();
201 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
202 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
203 of the chain. */
206 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
207 stmt_vec_info first_stmt_info)
209 stmt_vec_info next_stmt_info = first_stmt_info;
210 int result = 0;
212 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
213 return -1;
217 if (next_stmt_info == stmt_info)
218 return result;
219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
220 if (next_stmt_info)
221 result += DR_GROUP_GAP (next_stmt_info);
223 while (next_stmt_info);
225 return -1;
228 /* Check whether it is possible to load COUNT elements of type ELT_MODE
229 using the method implemented by duplicate_and_interleave. Return true
230 if so, returning the number of intermediate vectors in *NVECTORS_OUT
231 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
232 (if nonnull). */
234 bool
235 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
236 unsigned int *nvectors_out,
237 tree *vector_type_out,
238 tree *permutes)
240 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
241 poly_int64 nelts;
242 unsigned int nvectors = 1;
243 for (;;)
245 scalar_int_mode int_mode;
246 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
247 if (multiple_p (current_vector_size, elt_bytes, &nelts)
248 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
250 tree int_type = build_nonstandard_integer_type
251 (GET_MODE_BITSIZE (int_mode), 1);
252 tree vector_type = build_vector_type (int_type, nelts);
253 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
255 vec_perm_builder sel1 (nelts, 2, 3);
256 vec_perm_builder sel2 (nelts, 2, 3);
257 poly_int64 half_nelts = exact_div (nelts, 2);
258 for (unsigned int i = 0; i < 3; ++i)
260 sel1.quick_push (i);
261 sel1.quick_push (i + nelts);
262 sel2.quick_push (half_nelts + i);
263 sel2.quick_push (half_nelts + i + nelts);
265 vec_perm_indices indices1 (sel1, 2, nelts);
266 vec_perm_indices indices2 (sel2, 2, nelts);
267 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
268 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
270 if (nvectors_out)
271 *nvectors_out = nvectors;
272 if (vector_type_out)
273 *vector_type_out = vector_type;
274 if (permutes)
276 permutes[0] = vect_gen_perm_mask_checked (vector_type,
277 indices1);
278 permutes[1] = vect_gen_perm_mask_checked (vector_type,
279 indices2);
281 return true;
285 if (!multiple_p (elt_bytes, 2, &elt_bytes))
286 return false;
287 nvectors *= 2;
291 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
292 they are of a valid type and that they match the defs of the first stmt of
293 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
294 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
295 indicates swap is required for cond_expr stmts. Specifically, *SWAP
296 is 1 if STMT is cond and operands of comparison need to be swapped;
297 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
298 If there is any operand swap in this function, *SWAP is set to non-zero
299 value.
300 If there was a fatal error return -1; if the error could be corrected by
301 swapping operands of father node of this one, return 1; if everything is
302 ok return 0. */
303 static int
304 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
305 vec<stmt_vec_info> stmts, unsigned stmt_num,
306 vec<slp_oprnd_info> *oprnds_info)
308 stmt_vec_info stmt_info = stmts[stmt_num];
309 tree oprnd;
310 unsigned int i, number_of_oprnds;
311 enum vect_def_type dt = vect_uninitialized_def;
312 bool pattern = false;
313 slp_oprnd_info oprnd_info;
314 int first_op_idx = 1;
315 unsigned int commutative_op = -1U;
316 bool first_op_cond = false;
317 bool first = stmt_num == 0;
318 bool second = stmt_num == 1;
320 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
322 number_of_oprnds = gimple_call_num_args (stmt);
323 first_op_idx = 3;
324 if (gimple_call_internal_p (stmt))
326 internal_fn ifn = gimple_call_internal_fn (stmt);
327 commutative_op = first_commutative_argument (ifn);
330 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
332 enum tree_code code = gimple_assign_rhs_code (stmt);
333 number_of_oprnds = gimple_num_ops (stmt) - 1;
334 /* Swap can only be done for cond_expr if asked to, otherwise we
335 could result in different comparison code to the first stmt. */
336 if (gimple_assign_rhs_code (stmt) == COND_EXPR
337 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
339 first_op_cond = true;
340 number_of_oprnds++;
342 else
343 commutative_op = commutative_tree_code (code) ? 0U : -1U;
345 else
346 return -1;
348 bool swapped = (*swap != 0);
349 gcc_assert (!swapped || first_op_cond);
350 for (i = 0; i < number_of_oprnds; i++)
352 again:
353 if (first_op_cond)
355 /* Map indicating how operands of cond_expr should be swapped. */
356 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
357 int *map = maps[*swap];
359 if (i < 2)
360 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
361 first_op_idx), map[i]);
362 else
363 oprnd = gimple_op (stmt_info->stmt, map[i]);
365 else
366 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
368 oprnd_info = (*oprnds_info)[i];
370 stmt_vec_info def_stmt_info;
371 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
373 if (dump_enabled_p ())
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
375 "Build SLP failed: can't analyze def for %T\n",
376 oprnd);
378 return -1;
381 if (second)
382 oprnd_info->second_pattern = pattern;
384 if (first)
386 oprnd_info->first_dt = dt;
387 oprnd_info->first_pattern = pattern;
388 oprnd_info->first_op_type = TREE_TYPE (oprnd);
390 else
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
397 tree type = TREE_TYPE (oprnd);
398 if ((oprnd_info->first_dt != dt
399 && !(oprnd_info->first_dt == vect_reduction_def
400 && dt == vect_internal_def)
401 && !((oprnd_info->first_dt == vect_external_def
402 || oprnd_info->first_dt == vect_constant_def)
403 && (dt == vect_external_def
404 || dt == vect_constant_def)))
405 || !types_compatible_p (oprnd_info->first_op_type, type))
407 /* Try swapping operands if we got a mismatch. */
408 if (i == commutative_op && !swapped)
410 swapped = true;
411 goto again;
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416 "Build SLP failed: different types\n");
418 return 1;
420 if ((dt == vect_constant_def
421 || dt == vect_external_def)
422 && !current_vector_size.is_constant ()
423 && (TREE_CODE (type) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts.length (),
425 TYPE_MODE (type))))
427 if (dump_enabled_p ())
428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
429 "Build SLP failed: invalid type of def "
430 "for variable-length SLP %T\n", oprnd);
431 return -1;
435 /* Check the types of the definitions. */
436 switch (dt)
438 case vect_constant_def:
439 case vect_external_def:
440 break;
442 case vect_reduction_def:
443 case vect_induction_def:
444 case vect_internal_def:
445 oprnd_info->def_stmts.quick_push (def_stmt_info);
446 break;
448 default:
449 /* FORNOW: Not supported. */
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452 "Build SLP failed: illegal type of def %T\n",
453 oprnd);
455 return -1;
459 /* Swap operands. */
460 if (swapped)
462 /* If there are already uses of this stmt in a SLP instance then
463 we've committed to the operand order and can't swap it. */
464 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
466 if (dump_enabled_p ())
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
468 "Build SLP failed: cannot swap operands of "
469 "shared stmt %G", stmt_info->stmt);
470 return -1;
473 if (first_op_cond)
475 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
476 tree cond = gimple_assign_rhs1 (stmt);
477 enum tree_code code = TREE_CODE (cond);
479 /* Swap. */
480 if (*swap == 1)
482 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
483 &TREE_OPERAND (cond, 1));
484 TREE_SET_CODE (cond, swap_tree_comparison (code));
486 /* Invert. */
487 else
489 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
490 gimple_assign_rhs3_ptr (stmt));
491 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
492 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
493 gcc_assert (code != ERROR_MARK);
494 TREE_SET_CODE (cond, code);
497 else
499 unsigned int op = commutative_op + first_op_idx;
500 swap_ssa_operands (stmt_info->stmt,
501 gimple_op_ptr (stmt_info->stmt, op),
502 gimple_op_ptr (stmt_info->stmt, op + 1));
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE, vect_location,
506 "swapped operands to match def types in %G",
507 stmt_info->stmt);
510 *swap = swapped;
511 return 0;
514 /* Return true if call statements CALL1 and CALL2 are similar enough
515 to be combined into the same SLP group. */
517 static bool
518 compatible_calls_p (gcall *call1, gcall *call2)
520 unsigned int nargs = gimple_call_num_args (call1);
521 if (nargs != gimple_call_num_args (call2))
522 return false;
524 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
525 return false;
527 if (gimple_call_internal_p (call1))
529 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
530 TREE_TYPE (gimple_call_lhs (call2))))
531 return false;
532 for (unsigned int i = 0; i < nargs; ++i)
533 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
534 TREE_TYPE (gimple_call_arg (call2, i))))
535 return false;
537 else
539 if (!operand_equal_p (gimple_call_fn (call1),
540 gimple_call_fn (call2), 0))
541 return false;
543 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
544 return false;
546 return true;
549 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
550 caller's attempt to find the vector type in STMT_INFO with the narrowest
551 element type. Return true if VECTYPE is nonnull and if it is valid
552 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
553 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
554 vect_build_slp_tree. */
556 static bool
557 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
558 tree vectype, poly_uint64 *max_nunits)
560 if (!vectype)
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: unsupported data-type in %G\n",
565 stmt_info->stmt);
566 /* Fatal mismatch. */
567 return false;
570 /* If populating the vector type requires unrolling then fail
571 before adjusting *max_nunits for basic-block vectorization. */
572 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
573 unsigned HOST_WIDE_INT const_nunits;
574 if (STMT_VINFO_BB_VINFO (stmt_info)
575 && (!nunits.is_constant (&const_nunits)
576 || const_nunits > group_size))
578 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580 "Build SLP failed: unrolling required "
581 "in basic block SLP\n");
582 /* Fatal mismatch. */
583 return false;
586 /* In case of multiple types we need to detect the smallest type. */
587 vect_update_max_nunits (max_nunits, vectype);
588 return true;
591 /* STMTS is a group of GROUP_SIZE SLP statements in which some
592 statements do the same operation as the first statement and in which
593 the others do ALT_STMT_CODE. Return true if we can take one vector
594 of the first operation and one vector of the second and permute them
595 to get the required result. VECTYPE is the type of the vector that
596 would be permuted. */
598 static bool
599 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
600 unsigned int group_size, tree vectype,
601 tree_code alt_stmt_code)
603 unsigned HOST_WIDE_INT count;
604 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
605 return false;
607 vec_perm_builder sel (count, count, 1);
608 for (unsigned int i = 0; i < count; ++i)
610 unsigned int elt = i;
611 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
612 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
613 elt += count;
614 sel.quick_push (elt);
616 vec_perm_indices indices (sel, 2, count);
617 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
620 /* Verify if the scalar stmts STMTS are isomorphic, require data
621 permutation or are of unsupported types of operation. Return
622 true if they are, otherwise return false and indicate in *MATCHES
623 which stmts are not isomorphic to the first one. If MATCHES[0]
624 is false then this indicates the comparison could not be
625 carried out or the stmts will never be vectorized by SLP.
627 Note COND_EXPR is possibly ismorphic to another one after swapping its
628 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
629 the first stmt by swapping the two operands of comparison; set SWAP[i]
630 to 2 if stmt I is isormorphic to the first stmt by inverting the code
631 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
632 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
634 static bool
635 vect_build_slp_tree_1 (unsigned char *swap,
636 vec<stmt_vec_info> stmts, unsigned int group_size,
637 poly_uint64 *max_nunits, bool *matches,
638 bool *two_operators)
640 unsigned int i;
641 stmt_vec_info first_stmt_info = stmts[0];
642 enum tree_code first_stmt_code = ERROR_MARK;
643 enum tree_code alt_stmt_code = ERROR_MARK;
644 enum tree_code rhs_code = ERROR_MARK;
645 enum tree_code first_cond_code = ERROR_MARK;
646 tree lhs;
647 bool need_same_oprnds = false;
648 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
649 optab optab;
650 int icode;
651 machine_mode optab_op2_mode;
652 machine_mode vec_mode;
653 stmt_vec_info first_load = NULL, prev_first_load = NULL;
655 /* For every stmt in NODE find its def stmt/s. */
656 stmt_vec_info stmt_info;
657 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
659 gimple *stmt = stmt_info->stmt;
660 swap[i] = 0;
661 matches[i] = false;
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
666 /* Fail to vectorize statements marked as unvectorizable. */
667 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
669 if (dump_enabled_p ())
670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
671 "Build SLP failed: unvectorizable statement %G",
672 stmt);
673 /* Fatal mismatch. */
674 matches[0] = false;
675 return false;
678 lhs = gimple_get_lhs (stmt);
679 if (lhs == NULL_TREE)
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: not GIMPLE_ASSIGN nor "
684 "GIMPLE_CALL %G", stmt);
685 /* Fatal mismatch. */
686 matches[0] = false;
687 return false;
690 tree nunits_vectype;
691 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
692 &nunits_vectype)
693 || (nunits_vectype
694 && !vect_record_max_nunits (stmt_info, group_size,
695 nunits_vectype, max_nunits)))
697 /* Fatal mismatch. */
698 matches[0] = false;
699 return false;
702 gcc_assert (vectype);
704 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
706 rhs_code = CALL_EXPR;
707 if ((gimple_call_internal_p (call_stmt)
708 && (!vectorizable_internal_fn_p
709 (gimple_call_internal_fn (call_stmt))))
710 || gimple_call_tail_p (call_stmt)
711 || gimple_call_noreturn_p (call_stmt)
712 || !gimple_call_nothrow_p (call_stmt)
713 || gimple_call_chain (call_stmt))
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "Build SLP failed: unsupported call type %G",
718 call_stmt);
719 /* Fatal mismatch. */
720 matches[0] = false;
721 return false;
724 else
725 rhs_code = gimple_assign_rhs_code (stmt);
727 /* Check the operation. */
728 if (i == 0)
730 first_stmt_code = rhs_code;
732 /* Shift arguments should be equal in all the packed stmts for a
733 vector shift with scalar shift operand. */
734 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
735 || rhs_code == LROTATE_EXPR
736 || rhs_code == RROTATE_EXPR)
738 if (vectype == boolean_type_node)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: shift of a"
743 " boolean.\n");
744 /* Fatal mismatch. */
745 matches[0] = false;
746 return false;
749 vec_mode = TYPE_MODE (vectype);
751 /* First see if we have a vector/vector shift. */
752 optab = optab_for_tree_code (rhs_code, vectype,
753 optab_vector);
755 if (!optab
756 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
758 /* No vector/vector shift, try for a vector/scalar shift. */
759 optab = optab_for_tree_code (rhs_code, vectype,
760 optab_scalar);
762 if (!optab)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "Build SLP failed: no optab.\n");
767 /* Fatal mismatch. */
768 matches[0] = false;
769 return false;
771 icode = (int) optab_handler (optab, vec_mode);
772 if (icode == CODE_FOR_nothing)
774 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: "
777 "op not supported by target.\n");
778 /* Fatal mismatch. */
779 matches[0] = false;
780 return false;
782 optab_op2_mode = insn_data[icode].operand[2].mode;
783 if (!VECTOR_MODE_P (optab_op2_mode))
785 need_same_oprnds = true;
786 first_op1 = gimple_assign_rhs2 (stmt);
790 else if (rhs_code == WIDEN_LSHIFT_EXPR)
792 need_same_oprnds = true;
793 first_op1 = gimple_assign_rhs2 (stmt);
796 else
798 if (first_stmt_code != rhs_code
799 && alt_stmt_code == ERROR_MARK)
800 alt_stmt_code = rhs_code;
801 if (first_stmt_code != rhs_code
802 && (first_stmt_code != IMAGPART_EXPR
803 || rhs_code != REALPART_EXPR)
804 && (first_stmt_code != REALPART_EXPR
805 || rhs_code != IMAGPART_EXPR)
806 /* Handle mismatches in plus/minus by computing both
807 and merging the results. */
808 && !((first_stmt_code == PLUS_EXPR
809 || first_stmt_code == MINUS_EXPR)
810 && (alt_stmt_code == PLUS_EXPR
811 || alt_stmt_code == MINUS_EXPR)
812 && rhs_code == alt_stmt_code)
813 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
814 && (first_stmt_code == ARRAY_REF
815 || first_stmt_code == BIT_FIELD_REF
816 || first_stmt_code == INDIRECT_REF
817 || first_stmt_code == COMPONENT_REF
818 || first_stmt_code == MEM_REF)))
820 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
823 "Build SLP failed: different operation "
824 "in stmt %G", stmt);
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
826 "original stmt %G", first_stmt_info->stmt);
828 /* Mismatch. */
829 continue;
832 if (need_same_oprnds
833 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: different shift "
838 "arguments in %G", stmt);
839 /* Mismatch. */
840 continue;
843 if (rhs_code == CALL_EXPR)
845 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
846 as_a <gcall *> (stmt)))
848 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: different calls in %G",
851 stmt);
852 /* Mismatch. */
853 continue;
858 /* Grouped store or load. */
859 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
861 if (REFERENCE_CLASS_P (lhs))
863 /* Store. */
866 else
868 /* Load. */
869 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
870 if (prev_first_load)
872 /* Check that there are no loads from different interleaving
873 chains in the same node. */
874 if (prev_first_load != first_load)
876 if (dump_enabled_p ())
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
878 vect_location,
879 "Build SLP failed: different "
880 "interleaving chains in one node %G",
881 stmt);
882 /* Mismatch. */
883 continue;
886 else
887 prev_first_load = first_load;
889 } /* Grouped access. */
890 else
892 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
894 /* Not grouped load. */
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
897 "Build SLP failed: not grouped load %G", stmt);
899 /* FORNOW: Not grouped loads are not supported. */
900 /* Fatal mismatch. */
901 matches[0] = false;
902 return false;
905 /* Not memory operation. */
906 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
907 && TREE_CODE_CLASS (rhs_code) != tcc_unary
908 && TREE_CODE_CLASS (rhs_code) != tcc_expression
909 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
910 && rhs_code != CALL_EXPR)
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: operation unsupported %G",
915 stmt);
916 /* Fatal mismatch. */
917 matches[0] = false;
918 return false;
921 if (rhs_code == COND_EXPR)
923 tree cond_expr = gimple_assign_rhs1 (stmt);
924 enum tree_code cond_code = TREE_CODE (cond_expr);
925 enum tree_code swap_code = ERROR_MARK;
926 enum tree_code invert_code = ERROR_MARK;
928 if (i == 0)
929 first_cond_code = TREE_CODE (cond_expr);
930 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
932 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
933 swap_code = swap_tree_comparison (cond_code);
934 invert_code = invert_tree_comparison (cond_code, honor_nans);
937 if (first_cond_code == cond_code)
939 /* Isomorphic can be achieved by swapping. */
940 else if (first_cond_code == swap_code)
941 swap[i] = 1;
942 /* Isomorphic can be achieved by inverting. */
943 else if (first_cond_code == invert_code)
944 swap[i] = 2;
945 else
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
949 "Build SLP failed: different"
950 " operation %G", stmt);
951 /* Mismatch. */
952 continue;
957 matches[i] = true;
960 for (i = 0; i < group_size; ++i)
961 if (!matches[i])
962 return false;
964 /* If we allowed a two-operation SLP node verify the target can cope
965 with the permute we are going to use. */
966 if (alt_stmt_code != ERROR_MARK
967 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
969 if (vectype == boolean_type_node
970 || !vect_two_operations_perm_ok_p (stmts, group_size,
971 vectype, alt_stmt_code))
973 for (i = 0; i < group_size; ++i)
974 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
976 matches[i] = false;
977 if (dump_enabled_p ())
979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
980 "Build SLP failed: different operation "
981 "in stmt %G", stmts[i]->stmt);
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
983 "original stmt %G", first_stmt_info->stmt);
986 return false;
988 *two_operators = true;
991 return true;
994 /* Traits for the hash_set to record failed SLP builds for a stmt set.
995 Note we never remove apart from at destruction time so we do not
996 need a special value for deleted that differs from empty. */
997 struct bst_traits
999 typedef vec <stmt_vec_info> value_type;
1000 typedef vec <stmt_vec_info> compare_type;
1001 static inline hashval_t hash (value_type);
1002 static inline bool equal (value_type existing, value_type candidate);
1003 static inline bool is_empty (value_type x) { return !x.exists (); }
1004 static inline bool is_deleted (value_type x) { return !x.exists (); }
1005 static inline void mark_empty (value_type &x) { x.release (); }
1006 static inline void mark_deleted (value_type &x) { x.release (); }
1007 static inline void remove (value_type &x) { x.release (); }
1009 inline hashval_t
1010 bst_traits::hash (value_type x)
1012 inchash::hash h;
1013 for (unsigned i = 0; i < x.length (); ++i)
1014 h.add_int (gimple_uid (x[i]->stmt));
1015 return h.end ();
1017 inline bool
1018 bst_traits::equal (value_type existing, value_type candidate)
1020 if (existing.length () != candidate.length ())
1021 return false;
1022 for (unsigned i = 0; i < existing.length (); ++i)
1023 if (existing[i] != candidate[i])
1024 return false;
1025 return true;
1028 typedef hash_map <vec <gimple *>, slp_tree,
1029 simple_hashmap_traits <bst_traits, slp_tree> >
1030 scalar_stmts_to_slp_tree_map_t;
1032 static slp_tree
1033 vect_build_slp_tree_2 (vec_info *vinfo,
1034 vec<stmt_vec_info> stmts, unsigned int group_size,
1035 poly_uint64 *max_nunits,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
1037 scalar_stmts_to_slp_tree_map_t *bst_map);
1039 static slp_tree
1040 vect_build_slp_tree (vec_info *vinfo,
1041 vec<stmt_vec_info> stmts, unsigned int group_size,
1042 poly_uint64 *max_nunits,
1043 bool *matches, unsigned *npermutes, unsigned *tree_size,
1044 scalar_stmts_to_slp_tree_map_t *bst_map)
1046 if (slp_tree *leader = bst_map->get (stmts))
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1050 *leader ? "" : "failed ", *leader);
1051 if (*leader)
1052 (*leader)->refcnt++;
1053 return *leader;
1055 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1056 matches, npermutes, tree_size, bst_map);
1057 /* Keep a reference for the bst_map use. */
1058 if (res)
1059 res->refcnt++;
1060 bst_map->put (stmts.copy (), res);
1061 return res;
1064 /* Recursively build an SLP tree starting from NODE.
1065 Fail (and return a value not equal to zero) if def-stmts are not
1066 isomorphic, require data permutation or are of unsupported types of
1067 operation. Otherwise, return 0.
1068 The value returned is the depth in the SLP tree where a mismatch
1069 was found. */
1071 static slp_tree
1072 vect_build_slp_tree_2 (vec_info *vinfo,
1073 vec<stmt_vec_info> stmts, unsigned int group_size,
1074 poly_uint64 *max_nunits,
1075 bool *matches, unsigned *npermutes, unsigned *tree_size,
1076 scalar_stmts_to_slp_tree_map_t *bst_map)
1078 unsigned nops, i, this_tree_size = 0;
1079 poly_uint64 this_max_nunits = *max_nunits;
1080 slp_tree node;
1082 matches[0] = false;
1084 stmt_vec_info stmt_info = stmts[0];
1085 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1086 nops = gimple_call_num_args (stmt);
1087 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1089 nops = gimple_num_ops (stmt) - 1;
1090 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1091 nops++;
1093 else if (is_a <gphi *> (stmt_info->stmt))
1094 nops = 0;
1095 else
1096 return NULL;
1098 /* If the SLP node is a PHI (induction or reduction), terminate
1099 the recursion. */
1100 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1102 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1103 tree vectype = get_vectype_for_scalar_type (scalar_type);
1104 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1105 return NULL;
1107 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1108 /* Induction from different IVs is not supported. */
1109 if (def_type == vect_induction_def)
1111 stmt_vec_info other_info;
1112 FOR_EACH_VEC_ELT (stmts, i, other_info)
1113 if (stmt_info != other_info)
1114 return NULL;
1116 else if (def_type == vect_reduction_def
1117 || def_type == vect_double_reduction_def
1118 || def_type == vect_nested_cycle)
1120 /* Else def types have to match. */
1121 stmt_vec_info other_info;
1122 FOR_EACH_VEC_ELT (stmts, i, other_info)
1124 /* But for reduction chains only check on the first stmt. */
1125 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1126 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1127 continue;
1128 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1129 return NULL;
1132 else
1133 return NULL;
1134 (*tree_size)++;
1135 node = vect_create_new_slp_node (stmts);
1136 return node;
1140 bool two_operators = false;
1141 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1142 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1143 &this_max_nunits, matches, &two_operators))
1144 return NULL;
1146 /* If the SLP node is a load, terminate the recursion. */
1147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1148 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1150 *max_nunits = this_max_nunits;
1151 (*tree_size)++;
1152 node = vect_create_new_slp_node (stmts);
1153 return node;
1156 /* Get at the operands, verifying they are compatible. */
1157 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1158 slp_oprnd_info oprnd_info;
1159 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1161 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1162 stmts, i, &oprnds_info);
1163 if (res != 0)
1164 matches[(res == -1) ? 0 : i] = false;
1165 if (!matches[0])
1166 break;
1168 for (i = 0; i < group_size; ++i)
1169 if (!matches[i])
1171 vect_free_oprnd_info (oprnds_info);
1172 return NULL;
1175 auto_vec<slp_tree, 4> children;
1177 stmt_info = stmts[0];
1179 /* Create SLP_TREE nodes for the definition node/s. */
1180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1182 slp_tree child;
1183 unsigned old_tree_size = this_tree_size;
1184 unsigned int j;
1186 if (oprnd_info->first_dt != vect_internal_def
1187 && oprnd_info->first_dt != vect_reduction_def
1188 && oprnd_info->first_dt != vect_induction_def)
1189 continue;
1191 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1192 group_size, &this_max_nunits,
1193 matches, npermutes,
1194 &this_tree_size, bst_map)) != NULL)
1196 /* If we have all children of child built up from scalars then just
1197 throw that away and build it up this node from scalars. */
1198 if (!SLP_TREE_CHILDREN (child).is_empty ()
1199 /* ??? Rejecting patterns this way doesn't work. We'd have to
1200 do extra work to cancel the pattern so the uses see the
1201 scalar version. */
1202 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1204 slp_tree grandchild;
1206 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1207 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1208 break;
1209 if (!grandchild)
1211 /* Roll back. */
1212 this_tree_size = old_tree_size;
1213 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1214 vect_free_slp_tree (grandchild, false);
1215 SLP_TREE_CHILDREN (child).truncate (0);
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "Building parent vector operands from "
1220 "scalars instead\n");
1221 oprnd_info->def_stmts = vNULL;
1222 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1223 ++this_tree_size;
1224 children.safe_push (child);
1225 continue;
1229 oprnd_info->def_stmts = vNULL;
1230 children.safe_push (child);
1231 continue;
1234 /* If the SLP build failed fatally and we analyze a basic-block
1235 simply treat nodes we fail to build as externally defined
1236 (and thus build vectors from the scalar defs).
1237 The cost model will reject outright expensive cases.
1238 ??? This doesn't treat cases where permutation ultimatively
1239 fails (or we don't try permutation below). Ideally we'd
1240 even compute a permutation that will end up with the maximum
1241 SLP tree size... */
1242 if (is_a <bb_vec_info> (vinfo)
1243 && !matches[0]
1244 /* ??? Rejecting patterns this way doesn't work. We'd have to
1245 do extra work to cancel the pattern so the uses see the
1246 scalar version. */
1247 && !is_pattern_stmt_p (stmt_info))
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "Building vector operands from scalars\n");
1252 this_tree_size++;
1253 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1254 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1255 children.safe_push (child);
1256 oprnd_info->def_stmts = vNULL;
1257 continue;
1260 /* If the SLP build for operand zero failed and operand zero
1261 and one can be commutated try that for the scalar stmts
1262 that failed the match. */
1263 if (i == 0
1264 /* A first scalar stmt mismatch signals a fatal mismatch. */
1265 && matches[0]
1266 /* ??? For COND_EXPRs we can swap the comparison operands
1267 as well as the arms under some constraints. */
1268 && nops == 2
1269 && oprnds_info[1]->first_dt == vect_internal_def
1270 && is_gimple_assign (stmt_info->stmt)
1271 /* Do so only if the number of not successful permutes was nor more
1272 than a cut-ff as re-trying the recursive match on
1273 possibly each level of the tree would expose exponential
1274 behavior. */
1275 && *npermutes < 4)
1277 /* See whether we can swap the matching or the non-matching
1278 stmt operands. */
1279 bool swap_not_matching = true;
1282 for (j = 0; j < group_size; ++j)
1284 if (matches[j] != !swap_not_matching)
1285 continue;
1286 stmt_vec_info stmt_info = stmts[j];
1287 /* Verify if we can swap operands of this stmt. */
1288 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1289 if (!stmt
1290 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1292 if (!swap_not_matching)
1293 goto fail;
1294 swap_not_matching = false;
1295 break;
1297 /* Verify if we can safely swap or if we committed to a
1298 specific operand order already.
1299 ??? Instead of modifying GIMPLE stmts here we could
1300 record whether we want to swap operands in the SLP
1301 node and temporarily do that when processing it
1302 (or wrap operand accessors in a helper). */
1303 else if (swap[j] != 0
1304 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1306 if (!swap_not_matching)
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1310 vect_location,
1311 "Build SLP failed: cannot swap "
1312 "operands of shared stmt %G",
1313 stmts[j]->stmt);
1314 goto fail;
1316 swap_not_matching = false;
1317 break;
1321 while (j != group_size);
1323 /* Swap mismatched definition stmts. */
1324 if (dump_enabled_p ())
1325 dump_printf_loc (MSG_NOTE, vect_location,
1326 "Re-trying with swapped operands of stmts ");
1327 for (j = 0; j < group_size; ++j)
1328 if (matches[j] == !swap_not_matching)
1330 std::swap (oprnds_info[0]->def_stmts[j],
1331 oprnds_info[1]->def_stmts[j]);
1332 if (dump_enabled_p ())
1333 dump_printf (MSG_NOTE, "%d ", j);
1335 if (dump_enabled_p ())
1336 dump_printf (MSG_NOTE, "\n");
1337 /* And try again with scratch 'matches' ... */
1338 bool *tem = XALLOCAVEC (bool, group_size);
1339 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1340 group_size, &this_max_nunits,
1341 tem, npermutes,
1342 &this_tree_size, bst_map)) != NULL)
1344 /* ... so if successful we can apply the operand swapping
1345 to the GIMPLE IL. This is necessary because for example
1346 vect_get_slp_defs uses operand indexes and thus expects
1347 canonical operand order. This is also necessary even
1348 if we end up building the operand from scalars as
1349 we'll continue to process swapped operand two. */
1350 for (j = 0; j < group_size; ++j)
1351 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1352 for (j = 0; j < group_size; ++j)
1353 if (matches[j] == !swap_not_matching)
1355 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1356 /* Avoid swapping operands twice. */
1357 if (gimple_plf (stmt, GF_PLF_1))
1358 continue;
1359 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1360 gimple_assign_rhs2_ptr (stmt));
1361 gimple_set_plf (stmt, GF_PLF_1, true);
1363 /* Verify we swap all duplicates or none. */
1364 if (flag_checking)
1365 for (j = 0; j < group_size; ++j)
1366 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1367 == (matches[j] == !swap_not_matching));
1369 /* If we have all children of child built up from scalars then
1370 just throw that away and build it up this node from scalars. */
1371 if (!SLP_TREE_CHILDREN (child).is_empty ()
1372 /* ??? Rejecting patterns this way doesn't work. We'd have
1373 to do extra work to cancel the pattern so the uses see the
1374 scalar version. */
1375 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1377 unsigned int j;
1378 slp_tree grandchild;
1380 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1381 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1382 break;
1383 if (!grandchild)
1385 /* Roll back. */
1386 this_tree_size = old_tree_size;
1387 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1388 vect_free_slp_tree (grandchild, false);
1389 SLP_TREE_CHILDREN (child).truncate (0);
1391 if (dump_enabled_p ())
1392 dump_printf_loc (MSG_NOTE, vect_location,
1393 "Building parent vector operands from "
1394 "scalars instead\n");
1395 oprnd_info->def_stmts = vNULL;
1396 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1397 ++this_tree_size;
1398 children.safe_push (child);
1399 continue;
1403 oprnd_info->def_stmts = vNULL;
1404 children.safe_push (child);
1405 continue;
1408 ++*npermutes;
1411 fail:
1412 gcc_assert (child == NULL);
1413 FOR_EACH_VEC_ELT (children, j, child)
1414 vect_free_slp_tree (child, false);
1415 vect_free_oprnd_info (oprnds_info);
1416 return NULL;
1419 vect_free_oprnd_info (oprnds_info);
1421 *tree_size += this_tree_size + 1;
1422 *max_nunits = this_max_nunits;
1424 node = vect_create_new_slp_node (stmts);
1425 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1426 SLP_TREE_CHILDREN (node).splice (children);
1427 return node;
1430 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1432 static void
1433 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1434 slp_tree node, hash_set<slp_tree> &visited)
1436 int i;
1437 stmt_vec_info stmt_info;
1438 slp_tree child;
1440 if (visited.add (node))
1441 return;
1443 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1444 dump_user_location_t user_loc = loc.get_user_location ();
1445 dump_printf_loc (metadata, user_loc, "node%s %p\n",
1446 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1447 ? " (external)" : "", node);
1448 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1449 dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
1450 if (SLP_TREE_CHILDREN (node).is_empty ())
1451 return;
1452 dump_printf_loc (metadata, user_loc, "\tchildren");
1453 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1454 dump_printf (dump_kind, " %p", (void *)child);
1455 dump_printf (dump_kind, "\n");
1456 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1457 vect_print_slp_tree (dump_kind, loc, child, visited);
1460 static void
1461 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1462 slp_tree node)
1464 hash_set<slp_tree> visited;
1465 vect_print_slp_tree (dump_kind, loc, node, visited);
1468 /* Mark the tree rooted at NODE with PURE_SLP. */
1470 static void
1471 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1473 int i;
1474 stmt_vec_info stmt_info;
1475 slp_tree child;
1477 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1478 return;
1480 if (visited.add (node))
1481 return;
1483 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1484 STMT_SLP_TYPE (stmt_info) = pure_slp;
1486 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1487 vect_mark_slp_stmts (child, visited);
1490 static void
1491 vect_mark_slp_stmts (slp_tree node)
1493 hash_set<slp_tree> visited;
1494 vect_mark_slp_stmts (node, visited);
1497 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1499 static void
1500 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1502 int i;
1503 stmt_vec_info stmt_info;
1504 slp_tree child;
1506 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1507 return;
1509 if (visited.add (node))
1510 return;
1512 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1514 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1515 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1516 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1519 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1520 vect_mark_slp_stmts_relevant (child, visited);
1523 static void
1524 vect_mark_slp_stmts_relevant (slp_tree node)
1526 hash_set<slp_tree> visited;
1527 vect_mark_slp_stmts_relevant (node, visited);
1531 /* Rearrange the statements of NODE according to PERMUTATION. */
1533 static void
1534 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1535 vec<unsigned> permutation,
1536 hash_set<slp_tree> &visited)
1538 stmt_vec_info stmt_info;
1539 vec<stmt_vec_info> tmp_stmts;
1540 unsigned int i;
1541 slp_tree child;
1543 if (visited.add (node))
1544 return;
1546 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1547 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1549 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1550 tmp_stmts.create (group_size);
1551 tmp_stmts.quick_grow_cleared (group_size);
1553 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1554 tmp_stmts[permutation[i]] = stmt_info;
1556 SLP_TREE_SCALAR_STMTS (node).release ();
1557 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1561 /* Attempt to reorder stmts in a reduction chain so that we don't
1562 require any load permutation. Return true if that was possible,
1563 otherwise return false. */
1565 static bool
1566 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1568 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1569 unsigned int i, j;
1570 unsigned int lidx;
1571 slp_tree node, load;
1573 /* Compare all the permutation sequences to the first one. We know
1574 that at least one load is permuted. */
1575 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1576 if (!node->load_permutation.exists ())
1577 return false;
1578 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1580 if (!load->load_permutation.exists ())
1581 return false;
1582 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1583 if (lidx != node->load_permutation[j])
1584 return false;
1587 /* Check that the loads in the first sequence are different and there
1588 are no gaps between them. */
1589 auto_sbitmap load_index (group_size);
1590 bitmap_clear (load_index);
1591 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1593 if (lidx >= group_size)
1594 return false;
1595 if (bitmap_bit_p (load_index, lidx))
1596 return false;
1598 bitmap_set_bit (load_index, lidx);
1600 for (i = 0; i < group_size; i++)
1601 if (!bitmap_bit_p (load_index, i))
1602 return false;
1604 /* This permutation is valid for reduction. Since the order of the
1605 statements in the nodes is not important unless they are memory
1606 accesses, we can rearrange the statements in all the nodes
1607 according to the order of the loads. */
1608 hash_set<slp_tree> visited;
1609 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1610 node->load_permutation, visited);
1612 /* We are done, no actual permutations need to be generated. */
1613 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1614 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1616 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1617 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1618 /* But we have to keep those permutations that are required because
1619 of handling of gaps. */
1620 if (known_eq (unrolling_factor, 1U)
1621 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1622 && DR_GROUP_GAP (first_stmt_info) == 0))
1623 SLP_TREE_LOAD_PERMUTATION (node).release ();
1624 else
1625 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1626 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1629 return true;
1632 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1634 static void
1635 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1636 hash_set<slp_tree> &visited)
1638 if (visited.add (node))
1639 return;
1641 if (SLP_TREE_CHILDREN (node).length () == 0)
1643 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1644 if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
1645 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
1646 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1647 SLP_INSTANCE_LOADS (inst).safe_push (node);
1649 else
1651 unsigned i;
1652 slp_tree child;
1653 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1654 vect_gather_slp_loads (inst, child, visited);
1658 static void
1659 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1661 hash_set<slp_tree> visited;
1662 vect_gather_slp_loads (inst, node, visited);
1665 /* Check if the required load permutations in the SLP instance
1666 SLP_INSTN are supported. */
1668 static bool
1669 vect_supported_load_permutation_p (slp_instance slp_instn)
1671 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1672 unsigned int i, j, k, next;
1673 slp_tree node;
1675 if (dump_enabled_p ())
1677 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1678 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1679 if (node->load_permutation.exists ())
1680 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1681 dump_printf (MSG_NOTE, "%d ", next);
1682 else
1683 for (k = 0; k < group_size; ++k)
1684 dump_printf (MSG_NOTE, "%d ", k);
1685 dump_printf (MSG_NOTE, "\n");
1688 /* In case of reduction every load permutation is allowed, since the order
1689 of the reduction statements is not important (as opposed to the case of
1690 grouped stores). The only condition we need to check is that all the
1691 load nodes are of the same size and have the same permutation (and then
1692 rearrange all the nodes of the SLP instance according to this
1693 permutation). */
1695 /* Check that all the load nodes are of the same size. */
1696 /* ??? Can't we assert this? */
1697 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1698 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1699 return false;
1701 node = SLP_INSTANCE_TREE (slp_instn);
1702 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1704 /* Reduction (there are no data-refs in the root).
1705 In reduction chain the order of the loads is not important. */
1706 if (!STMT_VINFO_DATA_REF (stmt_info)
1707 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1708 vect_attempt_slp_rearrange_stmts (slp_instn);
1710 /* In basic block vectorization we allow any subchain of an interleaving
1711 chain.
1712 FORNOW: not supported in loop SLP because of realignment compications. */
1713 if (STMT_VINFO_BB_VINFO (stmt_info))
1715 /* Check whether the loads in an instance form a subchain and thus
1716 no permutation is necessary. */
1717 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1719 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1720 continue;
1721 bool subchain_p = true;
1722 stmt_vec_info next_load_info = NULL;
1723 stmt_vec_info load_info;
1724 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1726 if (j != 0
1727 && (next_load_info != load_info
1728 || DR_GROUP_GAP (load_info) != 1))
1730 subchain_p = false;
1731 break;
1733 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1735 if (subchain_p)
1736 SLP_TREE_LOAD_PERMUTATION (node).release ();
1737 else
1739 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1740 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1741 unsigned HOST_WIDE_INT nunits;
1742 unsigned k, maxk = 0;
1743 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1744 if (k > maxk)
1745 maxk = k;
1746 /* In BB vectorization we may not actually use a loaded vector
1747 accessing elements in excess of DR_GROUP_SIZE. */
1748 tree vectype = STMT_VINFO_VECTYPE (group_info);
1749 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1750 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 "BB vectorization with gaps at the end of "
1755 "a load is not supported\n");
1756 return false;
1759 /* Verify the permutation can be generated. */
1760 vec<tree> tem;
1761 unsigned n_perms;
1762 if (!vect_transform_slp_perm_load (node, tem, NULL,
1763 1, slp_instn, true, &n_perms))
1765 if (dump_enabled_p ())
1766 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1767 vect_location,
1768 "unsupported load permutation\n");
1769 return false;
1773 return true;
1776 /* For loop vectorization verify we can generate the permutation. Be
1777 conservative about the vectorization factor, there are permutations
1778 that will use three vector inputs only starting from a specific factor
1779 and the vectorization factor is not yet final.
1780 ??? The SLP instance unrolling factor might not be the maximum one. */
1781 unsigned n_perms;
1782 poly_uint64 test_vf
1783 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1784 LOOP_VINFO_VECT_FACTOR
1785 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1786 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1787 if (node->load_permutation.exists ()
1788 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1789 slp_instn, true, &n_perms))
1790 return false;
1792 return true;
1796 /* Find the last store in SLP INSTANCE. */
1798 stmt_vec_info
1799 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1801 stmt_vec_info last = NULL;
1802 stmt_vec_info stmt_vinfo;
1804 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1806 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1807 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1810 return last;
1813 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1814 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1815 (also containing the first GROUP1_SIZE stmts, since stores are
1816 consecutive), the second containing the remainder.
1817 Return the first stmt in the second group. */
1819 static stmt_vec_info
1820 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1822 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1823 gcc_assert (group1_size > 0);
1824 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1825 gcc_assert (group2_size > 0);
1826 DR_GROUP_SIZE (first_vinfo) = group1_size;
1828 stmt_vec_info stmt_info = first_vinfo;
1829 for (unsigned i = group1_size; i > 1; i--)
1831 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1832 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1834 /* STMT is now the last element of the first group. */
1835 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1836 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1838 DR_GROUP_SIZE (group2) = group2_size;
1839 for (stmt_info = group2; stmt_info;
1840 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1842 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1843 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1846 /* For the second group, the DR_GROUP_GAP is that before the original group,
1847 plus skipping over the first vector. */
1848 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1850 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1851 DR_GROUP_GAP (first_vinfo) += group2_size;
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1855 group1_size, group2_size);
1857 return group2;
1860 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1861 statements and a vector of NUNITS elements. */
1863 static poly_uint64
1864 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1866 return exact_div (common_multiple (nunits, group_size), group_size);
1869 /* Analyze an SLP instance starting from a group of grouped stores. Call
1870 vect_build_slp_tree to build a tree of packed stmts if possible.
1871 Return FALSE if it's impossible to SLP any stmt in the loop. */
1873 static bool
1874 vect_analyze_slp_instance (vec_info *vinfo,
1875 stmt_vec_info stmt_info, unsigned max_tree_size)
1877 slp_instance new_instance;
1878 slp_tree node;
1879 unsigned int group_size;
1880 tree vectype, scalar_type = NULL_TREE;
1881 unsigned int i;
1882 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1883 vec<stmt_vec_info> scalar_stmts;
1885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1887 scalar_type = TREE_TYPE (DR_REF (dr));
1888 vectype = get_vectype_for_scalar_type (scalar_type);
1889 group_size = DR_GROUP_SIZE (stmt_info);
1891 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1893 gcc_assert (is_a <loop_vec_info> (vinfo));
1894 vectype = STMT_VINFO_VECTYPE (stmt_info);
1895 group_size = REDUC_GROUP_SIZE (stmt_info);
1897 else
1899 gcc_assert (is_a <loop_vec_info> (vinfo));
1900 vectype = STMT_VINFO_VECTYPE (stmt_info);
1901 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1904 if (!vectype)
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "Build SLP failed: unsupported data-type %T\n",
1909 scalar_type);
1911 return false;
1913 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1915 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1916 scalar_stmts.create (group_size);
1917 stmt_vec_info next_info = stmt_info;
1918 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1920 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1921 while (next_info)
1923 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1924 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1927 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1929 /* Collect the reduction stmts and store them in
1930 SLP_TREE_SCALAR_STMTS. */
1931 while (next_info)
1933 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1934 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1936 /* Mark the first element of the reduction chain as reduction to properly
1937 transform the node. In the reduction analysis phase only the last
1938 element of the chain is marked as reduction. */
1939 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1940 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
1941 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
1943 else
1945 /* Collect reduction statements. */
1946 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1947 for (i = 0; reductions.iterate (i, &next_info); i++)
1948 scalar_stmts.safe_push (next_info);
1951 /* Build the tree for the SLP instance. */
1952 bool *matches = XALLOCAVEC (bool, group_size);
1953 unsigned npermutes = 0;
1954 scalar_stmts_to_slp_tree_map_t *bst_map
1955 = new scalar_stmts_to_slp_tree_map_t ();
1956 poly_uint64 max_nunits = nunits;
1957 unsigned tree_size = 0;
1958 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1959 &max_nunits, matches, &npermutes,
1960 &tree_size, bst_map);
1961 /* The map keeps a reference on SLP nodes built, release that. */
1962 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
1963 it != bst_map->end (); ++it)
1964 if ((*it).second)
1965 vect_free_slp_tree ((*it).second, false);
1966 delete bst_map;
1967 if (node != NULL)
1969 /* Calculate the unrolling factor based on the smallest type. */
1970 poly_uint64 unrolling_factor
1971 = calculate_unrolling_factor (max_nunits, group_size);
1973 if (maybe_ne (unrolling_factor, 1U)
1974 && is_a <bb_vec_info> (vinfo))
1976 unsigned HOST_WIDE_INT const_max_nunits;
1977 if (!max_nunits.is_constant (&const_max_nunits)
1978 || const_max_nunits > group_size)
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1982 "Build SLP failed: store group "
1983 "size not a multiple of the vector size "
1984 "in basic block SLP\n");
1985 vect_free_slp_tree (node, false);
1986 return false;
1988 /* Fatal mismatch. */
1989 matches[group_size / const_max_nunits * const_max_nunits] = false;
1990 vect_free_slp_tree (node, false);
1992 else
1994 /* Create a new SLP instance. */
1995 new_instance = XNEW (struct _slp_instance);
1996 SLP_INSTANCE_TREE (new_instance) = node;
1997 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1998 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1999 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2000 vect_gather_slp_loads (new_instance, node);
2001 if (dump_enabled_p ())
2002 dump_printf_loc (MSG_NOTE, vect_location,
2003 "SLP size %u vs. limit %u.\n",
2004 tree_size, max_tree_size);
2006 /* Compute the load permutation. */
2007 slp_tree load_node;
2008 bool loads_permuted = false;
2009 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2011 vec<unsigned> load_permutation;
2012 int j;
2013 stmt_vec_info load_info;
2014 bool this_load_permuted = false;
2015 load_permutation.create (group_size);
2016 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2017 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2018 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2020 int load_place = vect_get_place_in_interleaving_chain
2021 (load_info, first_stmt_info);
2022 gcc_assert (load_place != -1);
2023 if (load_place != j)
2024 this_load_permuted = true;
2025 load_permutation.safe_push (load_place);
2027 if (!this_load_permuted
2028 /* The load requires permutation when unrolling exposes
2029 a gap either because the group is larger than the SLP
2030 group-size or because there is a gap between the groups. */
2031 && (known_eq (unrolling_factor, 1U)
2032 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2033 && DR_GROUP_GAP (first_stmt_info) == 0)))
2035 load_permutation.release ();
2036 continue;
2038 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2039 loads_permuted = true;
2042 if (loads_permuted)
2044 if (!vect_supported_load_permutation_p (new_instance))
2046 if (dump_enabled_p ())
2047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2048 "Build SLP failed: unsupported load "
2049 "permutation %G", stmt_info->stmt);
2050 vect_free_slp_instance (new_instance, false);
2051 return false;
2055 /* If the loads and stores can be handled with load/store-lan
2056 instructions do not generate this SLP instance. */
2057 if (is_a <loop_vec_info> (vinfo)
2058 && loads_permuted
2059 && dr && vect_store_lanes_supported (vectype, group_size, false))
2061 slp_tree load_node;
2062 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2064 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2065 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2066 /* Use SLP for strided accesses (or if we can't load-lanes). */
2067 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2068 || ! vect_load_lanes_supported
2069 (STMT_VINFO_VECTYPE (stmt_vinfo),
2070 DR_GROUP_SIZE (stmt_vinfo), false))
2071 break;
2073 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "Built SLP cancelled: can use "
2078 "load/store-lanes\n");
2079 vect_free_slp_instance (new_instance, false);
2080 return false;
2084 vinfo->slp_instances.safe_push (new_instance);
2086 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_NOTE, vect_location,
2089 "Final SLP tree for instance:\n");
2090 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2093 return true;
2096 else
2098 /* Failed to SLP. */
2099 /* Free the allocated memory. */
2100 scalar_stmts.release ();
2103 /* For basic block SLP, try to break the group up into multiples of the
2104 vector size. */
2105 unsigned HOST_WIDE_INT const_nunits;
2106 if (is_a <bb_vec_info> (vinfo)
2107 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2108 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2109 && nunits.is_constant (&const_nunits))
2111 /* We consider breaking the group only on VF boundaries from the existing
2112 start. */
2113 for (i = 0; i < group_size; i++)
2114 if (!matches[i]) break;
2116 if (i >= const_nunits && i < group_size)
2118 /* Split into two groups at the first vector boundary before i. */
2119 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2120 unsigned group1_size = i & ~(const_nunits - 1);
2122 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2123 group1_size);
2124 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2125 max_tree_size);
2126 /* If the first non-match was in the middle of a vector,
2127 skip the rest of that vector. */
2128 if (group1_size < i)
2130 i = group1_size + const_nunits;
2131 if (i < group_size)
2132 rest = vect_split_slp_store_group (rest, const_nunits);
2134 if (i < group_size)
2135 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2136 return res;
2138 /* Even though the first vector did not all match, we might be able to SLP
2139 (some) of the remainder. FORNOW ignore this possibility. */
2142 return false;
2146 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2147 trees of packed scalar stmts if SLP is possible. */
2149 opt_result
2150 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2152 unsigned int i;
2153 stmt_vec_info first_element;
2155 DUMP_VECT_SCOPE ("vect_analyze_slp");
2157 /* Find SLP sequences starting from groups of grouped stores. */
2158 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2159 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2161 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2163 if (loop_vinfo->reduction_chains.length () > 0)
2165 /* Find SLP sequences starting from reduction chains. */
2166 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2167 if (! vect_analyze_slp_instance (vinfo, first_element,
2168 max_tree_size))
2170 /* Dissolve reduction chain group. */
2171 stmt_vec_info vinfo = first_element;
2172 while (vinfo)
2174 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2175 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2176 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2177 vinfo = next;
2179 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2183 /* Find SLP sequences starting from groups of reductions. */
2184 if (loop_vinfo->reductions.length () > 1)
2185 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2186 max_tree_size);
2189 return opt_result::success ();
2193 /* For each possible SLP instance decide whether to SLP it and calculate overall
2194 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2195 least one instance. */
2197 bool
2198 vect_make_slp_decision (loop_vec_info loop_vinfo)
2200 unsigned int i;
2201 poly_uint64 unrolling_factor = 1;
2202 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2203 slp_instance instance;
2204 int decided_to_slp = 0;
2206 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2208 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2210 /* FORNOW: SLP if you can. */
2211 /* All unroll factors have the form current_vector_size * X for some
2212 rational X, so they must have a common multiple. */
2213 unrolling_factor
2214 = force_common_multiple (unrolling_factor,
2215 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2217 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2218 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2219 loop-based vectorization. Such stmts will be marked as HYBRID. */
2220 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2221 decided_to_slp++;
2224 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2226 if (decided_to_slp && dump_enabled_p ())
2228 dump_printf_loc (MSG_NOTE, vect_location,
2229 "Decided to SLP %d instances. Unrolling factor ",
2230 decided_to_slp);
2231 dump_dec (MSG_NOTE, unrolling_factor);
2232 dump_printf (MSG_NOTE, "\n");
2235 return (decided_to_slp > 0);
2239 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2240 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2242 static void
2243 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2244 hash_map<slp_tree, unsigned> &visited)
2246 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2247 imm_use_iterator imm_iter;
2248 gimple *use_stmt;
2249 stmt_vec_info use_vinfo;
2250 slp_tree child;
2251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2252 int j;
2254 /* We need to union stype over the incoming graph edges but we still
2255 want to limit recursion to stay O(N+E). */
2256 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
2258 /* Propagate hybrid down the SLP tree. */
2259 if (stype == hybrid)
2261 else if (HYBRID_SLP_STMT (stmt_vinfo))
2262 stype = hybrid;
2263 else if (!only_edge)
2265 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2266 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2267 /* If we get a pattern stmt here we have to use the LHS of the
2268 original stmt for immediate uses. */
2269 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2270 tree def;
2271 if (gimple_code (stmt) == GIMPLE_PHI)
2272 def = gimple_phi_result (stmt);
2273 else
2274 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2275 if (def)
2276 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2278 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2279 if (!use_vinfo)
2280 continue;
2281 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2282 if (!STMT_SLP_TYPE (use_vinfo)
2283 && (STMT_VINFO_RELEVANT (use_vinfo)
2284 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2285 && !(gimple_code (use_stmt) == GIMPLE_PHI
2286 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2290 "def in non-SLP stmt: %G", use_stmt);
2291 stype = hybrid;
2296 if (stype == hybrid
2297 && !HYBRID_SLP_STMT (stmt_vinfo))
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2301 stmt_vinfo->stmt);
2302 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2305 if (!only_edge)
2306 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2307 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2308 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2311 static void
2312 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2314 hash_map<slp_tree, unsigned> visited;
2315 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
2318 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2320 static tree
2321 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2323 walk_stmt_info *wi = (walk_stmt_info *)data;
2324 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2326 if (wi->is_lhs)
2327 return NULL_TREE;
2329 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2330 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2334 def_stmt_info->stmt);
2335 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2338 return NULL_TREE;
2341 static tree
2342 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2343 walk_stmt_info *wi)
2345 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2346 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2347 /* If the stmt is in a SLP instance then this isn't a reason
2348 to mark use definitions in other SLP instances as hybrid. */
2349 if (! STMT_SLP_TYPE (use_vinfo)
2350 && (STMT_VINFO_RELEVANT (use_vinfo)
2351 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2352 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2353 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2355 else
2356 *handled = true;
2357 return NULL_TREE;
2360 /* Find stmts that must be both vectorized and SLPed. */
2362 void
2363 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2365 unsigned int i;
2366 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2367 slp_instance instance;
2369 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2371 /* First walk all pattern stmt in the loop and mark defs of uses as
2372 hybrid because immediate uses in them are not recorded. */
2373 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2375 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2376 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2377 gsi_next (&gsi))
2379 gimple *stmt = gsi_stmt (gsi);
2380 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2381 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2383 walk_stmt_info wi;
2384 memset (&wi, 0, sizeof (wi));
2385 wi.info = loop_vinfo;
2386 gimple_stmt_iterator gsi2
2387 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2388 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2389 vect_detect_hybrid_slp_1, &wi);
2390 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2391 vect_detect_hybrid_slp_2,
2392 vect_detect_hybrid_slp_1, &wi);
2397 /* Then walk the SLP instance trees marking stmts with uses in
2398 non-SLP stmts as hybrid, also propagating hybrid down the
2399 SLP tree, collecting the above info on-the-fly. */
2400 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2402 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2403 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2404 i, pure_slp);
2409 /* Initialize a bb_vec_info struct for the statements between
2410 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2412 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2413 gimple_stmt_iterator region_end_in,
2414 vec_info_shared *shared)
2415 : vec_info (vec_info::bb, init_cost (NULL), shared),
2416 bb (gsi_bb (region_begin_in)),
2417 region_begin (region_begin_in),
2418 region_end (region_end_in)
2420 gimple_stmt_iterator gsi;
2422 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2423 gsi_next (&gsi))
2425 gimple *stmt = gsi_stmt (gsi);
2426 gimple_set_uid (stmt, 0);
2427 add_stmt (stmt);
2430 bb->aux = this;
2434 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2435 stmts in the basic block. */
2437 _bb_vec_info::~_bb_vec_info ()
2439 for (gimple_stmt_iterator si = region_begin;
2440 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2441 /* Reset region marker. */
2442 gimple_set_uid (gsi_stmt (si), -1);
2444 bb->aux = NULL;
2447 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2448 given then that child nodes have already been processed, and that
2449 their def types currently match their SLP node's def type. */
2451 static bool
2452 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2453 slp_instance node_instance,
2454 stmt_vector_for_cost *cost_vec)
2456 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2457 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2459 /* For BB vectorization vector types are assigned here.
2460 Memory accesses already got their vector type assigned
2461 in vect_analyze_data_refs. */
2462 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2463 if (bb_vinfo
2464 && ! STMT_VINFO_DATA_REF (stmt_info))
2466 tree vectype, nunits_vectype;
2467 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2468 &nunits_vectype))
2469 /* We checked this when building the node. */
2470 gcc_unreachable ();
2471 if (vectype == boolean_type_node)
2473 vectype = vect_get_mask_type_for_stmt (stmt_info);
2474 if (!vectype)
2475 /* vect_get_mask_type_for_stmt has already explained the
2476 failure. */
2477 return false;
2480 stmt_vec_info sstmt_info;
2481 unsigned int i;
2482 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2483 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2486 /* Calculate the number of vector statements to be created for the
2487 scalar stmts in this node. For SLP reductions it is equal to the
2488 number of vector statements in the children (which has already been
2489 calculated by the recursive call). Otherwise it is the number of
2490 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2491 VF divided by the number of elements in a vector. */
2492 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2493 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2494 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2495 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2496 else
2498 poly_uint64 vf;
2499 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2500 vf = loop_vinfo->vectorization_factor;
2501 else
2502 vf = 1;
2503 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2504 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2505 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2506 = vect_get_num_vectors (vf * group_size, vectype);
2509 bool dummy;
2510 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2513 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2514 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2516 Return true if the operations are supported. */
2518 static bool
2519 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2520 slp_instance node_instance,
2521 scalar_stmts_to_slp_tree_map_t *visited,
2522 scalar_stmts_to_slp_tree_map_t *lvisited,
2523 stmt_vector_for_cost *cost_vec)
2525 int i, j;
2526 slp_tree child;
2528 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2529 return true;
2531 /* If we already analyzed the exact same set of scalar stmts we're done.
2532 We share the generated vector stmts for those. */
2533 slp_tree *leader;
2534 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2535 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2537 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2538 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2539 return true;
2542 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2543 doesn't result in any issue since we throw away the lvisited set
2544 when we fail. */
2545 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2547 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2548 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2549 visited, lvisited, cost_vec))
2550 return false;
2552 /* ??? We have to catch the case late where two first scalar stmts appear
2553 in multiple SLP children with different def type and fail. Remember
2554 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2555 match it when that is vect_internal_def. */
2556 auto_vec<vect_def_type, 4> dt;
2557 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2558 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2559 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2561 /* Push SLP node def-type to stmt operands. */
2562 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2563 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2564 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2565 = SLP_TREE_DEF_TYPE (child);
2567 /* Check everything worked out. */
2568 bool res = true;
2569 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2570 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2572 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2573 != SLP_TREE_DEF_TYPE (child))
2574 res = false;
2576 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
2577 res = false;
2578 if (!res && dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2580 "not vectorized: same operand with different "
2581 "def type in stmt.\n");
2583 if (res)
2584 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2585 cost_vec);
2587 /* Restore def-types. */
2588 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2589 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2591 return res;
2595 /* Analyze statements in SLP instances of VINFO. Return true if the
2596 operations are supported. */
2598 bool
2599 vect_slp_analyze_operations (vec_info *vinfo)
2601 slp_instance instance;
2602 int i;
2604 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2606 scalar_stmts_to_slp_tree_map_t *visited
2607 = new scalar_stmts_to_slp_tree_map_t ();
2608 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2610 scalar_stmts_to_slp_tree_map_t lvisited;
2611 stmt_vector_for_cost cost_vec;
2612 cost_vec.create (2);
2613 if (!vect_slp_analyze_node_operations (vinfo,
2614 SLP_INSTANCE_TREE (instance),
2615 instance, visited, &lvisited,
2616 &cost_vec))
2618 slp_tree node = SLP_INSTANCE_TREE (instance);
2619 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2620 if (dump_enabled_p ())
2621 dump_printf_loc (MSG_NOTE, vect_location,
2622 "removing SLP instance operations starting from: %G",
2623 stmt_info->stmt);
2624 vect_free_slp_instance (instance, false);
2625 vinfo->slp_instances.ordered_remove (i);
2626 cost_vec.release ();
2628 else
2630 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2631 x != lvisited.end(); ++x)
2632 visited->put ((*x).first.copy (), (*x).second);
2633 i++;
2635 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2636 cost_vec.release ();
2639 delete visited;
2641 return !vinfo->slp_instances.is_empty ();
2645 /* Compute the scalar cost of the SLP node NODE and its children
2646 and return it. Do not account defs that are marked in LIFE and
2647 update LIFE according to uses of NODE. */
2649 static void
2650 vect_bb_slp_scalar_cost (basic_block bb,
2651 slp_tree node, vec<bool, va_heap> *life,
2652 stmt_vector_for_cost *cost_vec,
2653 hash_set<slp_tree> &visited)
2655 unsigned i;
2656 stmt_vec_info stmt_info;
2657 slp_tree child;
2659 if (visited.add (node))
2660 return;
2662 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2664 gimple *stmt = stmt_info->stmt;
2665 vec_info *vinfo = stmt_info->vinfo;
2666 ssa_op_iter op_iter;
2667 def_operand_p def_p;
2669 if ((*life)[i])
2670 continue;
2672 /* If there is a non-vectorized use of the defs then the scalar
2673 stmt is kept live in which case we do not account it or any
2674 required defs in the SLP children in the scalar cost. This
2675 way we make the vectorization more costly when compared to
2676 the scalar cost. */
2677 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2679 imm_use_iterator use_iter;
2680 gimple *use_stmt;
2681 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2682 if (!is_gimple_debug (use_stmt))
2684 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2685 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2687 (*life)[i] = true;
2688 BREAK_FROM_IMM_USE_STMT (use_iter);
2692 if ((*life)[i])
2693 continue;
2695 /* Count scalar stmts only once. */
2696 if (gimple_visited_p (stmt))
2697 continue;
2698 gimple_set_visited (stmt, true);
2700 vect_cost_for_stmt kind;
2701 if (STMT_VINFO_DATA_REF (stmt_info))
2703 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2704 kind = scalar_load;
2705 else
2706 kind = scalar_store;
2708 else
2709 kind = scalar_stmt;
2710 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2713 auto_vec<bool, 20> subtree_life;
2714 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2716 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2718 /* Do not directly pass LIFE to the recursive call, copy it to
2719 confine changes in the callee to the current child/subtree. */
2720 subtree_life.safe_splice (*life);
2721 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2722 visited);
2723 subtree_life.truncate (0);
2728 static void
2729 vect_bb_slp_scalar_cost (basic_block bb,
2730 slp_tree node, vec<bool, va_heap> *life,
2731 stmt_vector_for_cost *cost_vec)
2733 hash_set<slp_tree> visited;
2734 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2737 /* Check if vectorization of the basic block is profitable. */
2739 static bool
2740 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2742 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2743 slp_instance instance;
2744 int i;
2745 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2746 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2748 /* Calculate scalar cost. */
2749 stmt_vector_for_cost scalar_costs;
2750 scalar_costs.create (0);
2751 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2753 auto_vec<bool, 20> life;
2754 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2755 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2756 SLP_INSTANCE_TREE (instance),
2757 &life, &scalar_costs);
2759 void *target_cost_data = init_cost (NULL);
2760 add_stmt_costs (target_cost_data, &scalar_costs);
2761 scalar_costs.release ();
2762 unsigned dummy;
2763 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2764 destroy_cost_data (target_cost_data);
2766 /* Unset visited flag. */
2767 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2768 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2769 gimple_set_visited (gsi_stmt (gsi), false);
2771 /* Complete the target-specific cost calculation. */
2772 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2773 &vec_inside_cost, &vec_epilogue_cost);
2775 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2777 if (dump_enabled_p ())
2779 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2780 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2781 vec_inside_cost);
2782 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2783 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2784 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2787 /* Vectorization is profitable if its cost is more than the cost of scalar
2788 version. Note that we err on the vector side for equal cost because
2789 the cost estimate is otherwise quite pessimistic (constant uses are
2790 free on the scalar side but cost a load on the vector side for
2791 example). */
2792 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2793 return false;
2795 return true;
2798 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2799 if so and sets fatal to true if failure is independent of
2800 current_vector_size. */
2802 static bb_vec_info
2803 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2804 gimple_stmt_iterator region_end,
2805 vec<data_reference_p> datarefs, int n_stmts,
2806 bool &fatal, vec_info_shared *shared)
2808 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2810 bb_vec_info bb_vinfo;
2811 slp_instance instance;
2812 int i;
2813 poly_uint64 min_vf = 2;
2815 /* The first group of checks is independent of the vector size. */
2816 fatal = true;
2818 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2822 "not vectorized: too many instructions in "
2823 "basic block.\n");
2824 free_data_refs (datarefs);
2825 return NULL;
2828 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2829 if (!bb_vinfo)
2830 return NULL;
2832 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2833 bb_vinfo->shared->save_datarefs ();
2835 /* Analyze the data references. */
2837 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2839 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2841 "not vectorized: unhandled data-ref in basic "
2842 "block.\n");
2844 delete bb_vinfo;
2845 return NULL;
2848 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2852 "not vectorized: not enough data-refs in "
2853 "basic block.\n");
2855 delete bb_vinfo;
2856 return NULL;
2859 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2861 if (dump_enabled_p ())
2862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2863 "not vectorized: unhandled data access in "
2864 "basic block.\n");
2866 delete bb_vinfo;
2867 return NULL;
2870 /* If there are no grouped stores in the region there is no need
2871 to continue with pattern recog as vect_analyze_slp will fail
2872 anyway. */
2873 if (bb_vinfo->grouped_stores.is_empty ())
2875 if (dump_enabled_p ())
2876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2877 "not vectorized: no grouped stores in "
2878 "basic block.\n");
2880 delete bb_vinfo;
2881 return NULL;
2884 /* While the rest of the analysis below depends on it in some way. */
2885 fatal = false;
2887 vect_pattern_recog (bb_vinfo);
2889 /* Check the SLP opportunities in the basic block, analyze and build SLP
2890 trees. */
2891 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2893 if (dump_enabled_p ())
2895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2896 "Failed to SLP the basic block.\n");
2897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2898 "not vectorized: failed to find SLP opportunities "
2899 "in basic block.\n");
2902 delete bb_vinfo;
2903 return NULL;
2906 vect_record_base_alignments (bb_vinfo);
2908 /* Analyze and verify the alignment of data references and the
2909 dependence in the SLP instances. */
2910 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2912 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2913 || ! vect_slp_analyze_instance_dependence (instance))
2915 slp_tree node = SLP_INSTANCE_TREE (instance);
2916 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2917 if (dump_enabled_p ())
2918 dump_printf_loc (MSG_NOTE, vect_location,
2919 "removing SLP instance operations starting from: %G",
2920 stmt_info->stmt);
2921 vect_free_slp_instance (instance, false);
2922 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2923 continue;
2926 /* Mark all the statements that we want to vectorize as pure SLP and
2927 relevant. */
2928 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2929 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2931 i++;
2933 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2935 delete bb_vinfo;
2936 return NULL;
2939 if (!vect_slp_analyze_operations (bb_vinfo))
2941 if (dump_enabled_p ())
2942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2943 "not vectorized: bad operation in basic block.\n");
2945 delete bb_vinfo;
2946 return NULL;
2949 /* Cost model: check if the vectorization is worthwhile. */
2950 if (!unlimited_cost_model (NULL)
2951 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2955 "not vectorized: vectorization is not "
2956 "profitable.\n");
2958 delete bb_vinfo;
2959 return NULL;
2962 if (dump_enabled_p ())
2963 dump_printf_loc (MSG_NOTE, vect_location,
2964 "Basic block will be vectorized using SLP\n");
2966 return bb_vinfo;
2970 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2971 true if anything in the basic-block was vectorized. */
2973 bool
2974 vect_slp_bb (basic_block bb)
2976 bb_vec_info bb_vinfo;
2977 gimple_stmt_iterator gsi;
2978 bool any_vectorized = false;
2979 auto_vector_sizes vector_sizes;
2981 /* Autodetect first vector size we try. */
2982 current_vector_size = 0;
2983 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2984 unsigned int next_size = 0;
2986 gsi = gsi_start_bb (bb);
2988 poly_uint64 autodetected_vector_size = 0;
2989 while (1)
2991 if (gsi_end_p (gsi))
2992 break;
2994 gimple_stmt_iterator region_begin = gsi;
2995 vec<data_reference_p> datarefs = vNULL;
2996 int insns = 0;
2998 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3000 gimple *stmt = gsi_stmt (gsi);
3001 if (is_gimple_debug (stmt))
3002 continue;
3003 insns++;
3005 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3006 vect_location = stmt;
3008 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3009 break;
3012 /* Skip leading unhandled stmts. */
3013 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3015 gsi_next (&gsi);
3016 continue;
3019 gimple_stmt_iterator region_end = gsi;
3021 bool vectorized = false;
3022 bool fatal = false;
3023 vec_info_shared shared;
3024 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3025 datarefs, insns, fatal, &shared);
3026 if (bb_vinfo
3027 && dbg_cnt (vect_slp))
3029 if (dump_enabled_p ())
3030 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3032 bb_vinfo->shared->check_datarefs ();
3033 vect_schedule_slp (bb_vinfo);
3035 unsigned HOST_WIDE_INT bytes;
3036 if (dump_enabled_p ())
3038 if (current_vector_size.is_constant (&bytes))
3039 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3040 "basic block part vectorized using %wu byte "
3041 "vectors\n", bytes);
3042 else
3043 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3044 "basic block part vectorized using variable "
3045 "length vectors\n");
3048 vectorized = true;
3050 delete bb_vinfo;
3052 any_vectorized |= vectorized;
3054 if (next_size == 0)
3055 autodetected_vector_size = current_vector_size;
3057 if (next_size < vector_sizes.length ()
3058 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3059 next_size += 1;
3061 if (vectorized
3062 || next_size == vector_sizes.length ()
3063 || known_eq (current_vector_size, 0U)
3064 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3065 vector sizes will fail do not bother iterating. */
3066 || fatal)
3068 if (gsi_end_p (region_end))
3069 break;
3071 /* Skip the unhandled stmt. */
3072 gsi_next (&gsi);
3074 /* And reset vector sizes. */
3075 current_vector_size = 0;
3076 next_size = 0;
3078 else
3080 /* Try the next biggest vector size. */
3081 current_vector_size = vector_sizes[next_size++];
3082 if (dump_enabled_p ())
3084 dump_printf_loc (MSG_NOTE, vect_location,
3085 "***** Re-trying analysis with "
3086 "vector size ");
3087 dump_dec (MSG_NOTE, current_vector_size);
3088 dump_printf (MSG_NOTE, "\n");
3091 /* Start over. */
3092 gsi = region_begin;
3096 return any_vectorized;
3100 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3102 static bool
3103 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
3105 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3106 tree op, vectype;
3107 enum vect_def_type dt;
3109 /* For comparison and COND_EXPR type is chosen depending
3110 on the non-constant other comparison operand. */
3111 if (TREE_CODE_CLASS (code) == tcc_comparison)
3113 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3114 op = gimple_assign_rhs1 (stmt);
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3117 gcc_unreachable ();
3119 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3122 if (code == COND_EXPR)
3124 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3125 tree cond = gimple_assign_rhs1 (stmt);
3127 if (TREE_CODE (cond) == SSA_NAME)
3128 op = cond;
3129 else
3130 op = TREE_OPERAND (cond, 0);
3132 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3133 gcc_unreachable ();
3135 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3138 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3141 /* Build a variable-length vector in which the elements in ELTS are repeated
3142 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3143 RESULTS and add any new instructions to SEQ.
3145 The approach we use is:
3147 (1) Find a vector mode VM with integer elements of mode IM.
3149 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3150 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3151 from small vectors to IM.
3153 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3155 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3156 correct byte contents.
3158 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3160 We try to find the largest IM for which this sequence works, in order
3161 to cut down on the number of interleaves. */
3163 void
3164 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3165 unsigned int nresults, vec<tree> &results)
3167 unsigned int nelts = elts.length ();
3168 tree element_type = TREE_TYPE (vector_type);
3170 /* (1) Find a vector mode VM with integer elements of mode IM. */
3171 unsigned int nvectors = 1;
3172 tree new_vector_type;
3173 tree permutes[2];
3174 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3175 &nvectors, &new_vector_type,
3176 permutes))
3177 gcc_unreachable ();
3179 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3180 unsigned int partial_nelts = nelts / nvectors;
3181 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3183 tree_vector_builder partial_elts;
3184 auto_vec<tree, 32> pieces (nvectors * 2);
3185 pieces.quick_grow (nvectors * 2);
3186 for (unsigned int i = 0; i < nvectors; ++i)
3188 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3189 ELTS' has mode IM. */
3190 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3191 for (unsigned int j = 0; j < partial_nelts; ++j)
3192 partial_elts.quick_push (elts[i * partial_nelts + j]);
3193 tree t = gimple_build_vector (seq, &partial_elts);
3194 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3195 TREE_TYPE (new_vector_type), t);
3197 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3198 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3201 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3202 correct byte contents.
3204 We need to repeat the following operation log2(nvectors) times:
3206 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3207 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3209 However, if each input repeats every N elements and the VF is
3210 a multiple of N * 2, the HI result is the same as the LO. */
3211 unsigned int in_start = 0;
3212 unsigned int out_start = nvectors;
3213 unsigned int hi_start = nvectors / 2;
3214 /* A bound on the number of outputs needed to produce NRESULTS results
3215 in the final iteration. */
3216 unsigned int noutputs_bound = nvectors * nresults;
3217 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3219 noutputs_bound /= 2;
3220 unsigned int limit = MIN (noutputs_bound, nvectors);
3221 for (unsigned int i = 0; i < limit; ++i)
3223 if ((i & 1) != 0
3224 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3225 2 * in_repeat))
3227 pieces[out_start + i] = pieces[out_start + i - 1];
3228 continue;
3231 tree output = make_ssa_name (new_vector_type);
3232 tree input1 = pieces[in_start + (i / 2)];
3233 tree input2 = pieces[in_start + (i / 2) + hi_start];
3234 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3235 input1, input2,
3236 permutes[i & 1]);
3237 gimple_seq_add_stmt (seq, stmt);
3238 pieces[out_start + i] = output;
3240 std::swap (in_start, out_start);
3243 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3244 results.reserve (nresults);
3245 for (unsigned int i = 0; i < nresults; ++i)
3246 if (i < nvectors)
3247 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3248 pieces[in_start + i]));
3249 else
3250 results.quick_push (results[i - nvectors]);
3254 /* For constant and loop invariant defs of SLP_NODE this function returns
3255 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3256 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3257 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3258 REDUC_INDEX is the index of the reduction operand in the statements, unless
3259 it is -1. */
3261 static void
3262 vect_get_constant_vectors (tree op, slp_tree slp_node,
3263 vec<tree> *vec_oprnds,
3264 unsigned int op_num, unsigned int number_of_vectors)
3266 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3267 stmt_vec_info stmt_vinfo = stmts[0];
3268 gimple *stmt = stmt_vinfo->stmt;
3269 unsigned HOST_WIDE_INT nunits;
3270 tree vec_cst;
3271 unsigned j, number_of_places_left_in_vector;
3272 tree vector_type;
3273 tree vop;
3274 int group_size = stmts.length ();
3275 unsigned int vec_num, i;
3276 unsigned number_of_copies = 1;
3277 vec<tree> voprnds;
3278 voprnds.create (number_of_vectors);
3279 bool constant_p, is_store;
3280 tree neutral_op = NULL;
3281 enum tree_code code = gimple_expr_code (stmt);
3282 gimple_seq ctor_seq = NULL;
3283 auto_vec<tree, 16> permute_results;
3285 /* Check if vector type is a boolean vector. */
3286 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3287 && vect_mask_constant_operand_p (stmt_vinfo))
3288 vector_type
3289 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3290 else
3291 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3293 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3295 is_store = true;
3296 op = gimple_assign_rhs1 (stmt);
3298 else
3299 is_store = false;
3301 gcc_assert (op);
3303 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3304 created vectors. It is greater than 1 if unrolling is performed.
3306 For example, we have two scalar operands, s1 and s2 (e.g., group of
3307 strided accesses of size two), while NUNITS is four (i.e., four scalars
3308 of this type can be packed in a vector). The output vector will contain
3309 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3310 will be 2).
3312 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3313 containing the operands.
3315 For example, NUNITS is four as before, and the group size is 8
3316 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3317 {s5, s6, s7, s8}. */
3319 /* When using duplicate_and_interleave, we just need one element for
3320 each scalar statement. */
3321 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3322 nunits = group_size;
3324 number_of_copies = nunits * number_of_vectors / group_size;
3326 number_of_places_left_in_vector = nunits;
3327 constant_p = true;
3328 tree_vector_builder elts (vector_type, nunits, 1);
3329 elts.quick_grow (nunits);
3330 bool place_after_defs = false;
3331 for (j = 0; j < number_of_copies; j++)
3333 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3335 stmt = stmt_vinfo->stmt;
3336 if (is_store)
3337 op = gimple_assign_rhs1 (stmt);
3338 else
3340 switch (code)
3342 case COND_EXPR:
3344 tree cond = gimple_assign_rhs1 (stmt);
3345 if (TREE_CODE (cond) == SSA_NAME)
3346 op = gimple_op (stmt, op_num + 1);
3347 else if (op_num == 0 || op_num == 1)
3348 op = TREE_OPERAND (cond, op_num);
3349 else
3351 if (op_num == 2)
3352 op = gimple_assign_rhs2 (stmt);
3353 else
3354 op = gimple_assign_rhs3 (stmt);
3357 break;
3359 case CALL_EXPR:
3360 op = gimple_call_arg (stmt, op_num);
3361 break;
3363 case LSHIFT_EXPR:
3364 case RSHIFT_EXPR:
3365 case LROTATE_EXPR:
3366 case RROTATE_EXPR:
3367 op = gimple_op (stmt, op_num + 1);
3368 /* Unlike the other binary operators, shifts/rotates have
3369 the shift count being int, instead of the same type as
3370 the lhs, so make sure the scalar is the right type if
3371 we are dealing with vectors of
3372 long long/long/short/char. */
3373 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3374 op = fold_convert (TREE_TYPE (vector_type), op);
3375 break;
3377 default:
3378 op = gimple_op (stmt, op_num + 1);
3379 break;
3383 /* Create 'vect_ = {op0,op1,...,opn}'. */
3384 number_of_places_left_in_vector--;
3385 tree orig_op = op;
3386 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3388 if (CONSTANT_CLASS_P (op))
3390 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3392 /* Can't use VIEW_CONVERT_EXPR for booleans because
3393 of possibly different sizes of scalar value and
3394 vector element. */
3395 if (integer_zerop (op))
3396 op = build_int_cst (TREE_TYPE (vector_type), 0);
3397 else if (integer_onep (op))
3398 op = build_all_ones_cst (TREE_TYPE (vector_type));
3399 else
3400 gcc_unreachable ();
3402 else
3403 op = fold_unary (VIEW_CONVERT_EXPR,
3404 TREE_TYPE (vector_type), op);
3405 gcc_assert (op && CONSTANT_CLASS_P (op));
3407 else
3409 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3410 gimple *init_stmt;
3411 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3413 tree true_val
3414 = build_all_ones_cst (TREE_TYPE (vector_type));
3415 tree false_val
3416 = build_zero_cst (TREE_TYPE (vector_type));
3417 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3418 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3419 op, true_val,
3420 false_val);
3422 else
3424 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3425 op);
3426 init_stmt
3427 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3428 op);
3430 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3431 op = new_temp;
3434 elts[number_of_places_left_in_vector] = op;
3435 if (!CONSTANT_CLASS_P (op))
3436 constant_p = false;
3437 if (TREE_CODE (orig_op) == SSA_NAME
3438 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3439 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3440 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3441 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3442 place_after_defs = true;
3444 if (number_of_places_left_in_vector == 0)
3446 if (constant_p
3447 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3448 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3449 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3450 else
3452 if (vec_oprnds->is_empty ())
3453 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3454 number_of_vectors,
3455 permute_results);
3456 vec_cst = permute_results[number_of_vectors - j - 1];
3458 tree init;
3459 gimple_stmt_iterator gsi;
3460 if (place_after_defs)
3462 stmt_vec_info last_stmt_info
3463 = vect_find_last_scalar_stmt_in_slp (slp_node);
3464 gsi = gsi_for_stmt (last_stmt_info->stmt);
3465 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3466 &gsi);
3468 else
3469 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3470 NULL);
3471 if (ctor_seq != NULL)
3473 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3474 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3475 ctor_seq = NULL;
3477 voprnds.quick_push (init);
3478 place_after_defs = false;
3479 number_of_places_left_in_vector = nunits;
3480 constant_p = true;
3481 elts.new_vector (vector_type, nunits, 1);
3482 elts.quick_grow (nunits);
3487 /* Since the vectors are created in the reverse order, we should invert
3488 them. */
3489 vec_num = voprnds.length ();
3490 for (j = vec_num; j != 0; j--)
3492 vop = voprnds[j - 1];
3493 vec_oprnds->quick_push (vop);
3496 voprnds.release ();
3498 /* In case that VF is greater than the unrolling factor needed for the SLP
3499 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3500 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3501 to replicate the vectors. */
3502 while (number_of_vectors > vec_oprnds->length ())
3504 tree neutral_vec = NULL;
3506 if (neutral_op)
3508 if (!neutral_vec)
3509 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3511 vec_oprnds->quick_push (neutral_vec);
3513 else
3515 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3516 vec_oprnds->quick_push (vop);
3522 /* Get vectorized definitions from SLP_NODE that contains corresponding
3523 vectorized def-stmts. */
3525 static void
3526 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3528 tree vec_oprnd;
3529 stmt_vec_info vec_def_stmt_info;
3530 unsigned int i;
3532 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3534 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3536 gcc_assert (vec_def_stmt_info);
3537 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3538 vec_oprnd = gimple_phi_result (vec_def_phi);
3539 else
3540 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3541 vec_oprnds->quick_push (vec_oprnd);
3546 /* Get vectorized definitions for SLP_NODE.
3547 If the scalar definitions are loop invariants or constants, collect them and
3548 call vect_get_constant_vectors() to create vector stmts.
3549 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3550 must be stored in the corresponding child of SLP_NODE, and we call
3551 vect_get_slp_vect_defs () to retrieve them. */
3553 void
3554 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3555 vec<vec<tree> > *vec_oprnds)
3557 int number_of_vects = 0, i;
3558 unsigned int child_index = 0;
3559 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3560 slp_tree child = NULL;
3561 vec<tree> vec_defs;
3562 tree oprnd;
3563 bool vectorized_defs;
3565 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3566 FOR_EACH_VEC_ELT (ops, i, oprnd)
3568 /* For each operand we check if it has vectorized definitions in a child
3569 node or we need to create them (for invariants and constants). We
3570 check if the LHS of the first stmt of the next child matches OPRND.
3571 If it does, we found the correct child. Otherwise, we call
3572 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3573 to check this child node for the next operand. */
3574 vectorized_defs = false;
3575 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3577 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3579 /* We have to check both pattern and original def, if available. */
3580 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3582 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3583 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3584 tree first_def_op;
3586 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3587 first_def_op = gimple_phi_result (first_def);
3588 else
3589 first_def_op = gimple_get_lhs (first_def_info->stmt);
3590 if (operand_equal_p (oprnd, first_def_op, 0)
3591 || (related
3592 && operand_equal_p (oprnd,
3593 gimple_get_lhs (related->stmt), 0)))
3595 /* The number of vector defs is determined by the number of
3596 vector statements in the node from which we get those
3597 statements. */
3598 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3599 vectorized_defs = true;
3600 child_index++;
3603 else
3604 child_index++;
3607 if (!vectorized_defs)
3609 if (i == 0)
3611 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3612 /* Number of vector stmts was calculated according to LHS in
3613 vect_schedule_slp_instance (), fix it by replacing LHS with
3614 RHS, if necessary. See vect_get_smallest_scalar_type () for
3615 details. */
3616 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3617 &rhs_size_unit);
3618 if (rhs_size_unit != lhs_size_unit)
3620 number_of_vects *= rhs_size_unit;
3621 number_of_vects /= lhs_size_unit;
3626 /* Allocate memory for vectorized defs. */
3627 vec_defs = vNULL;
3628 vec_defs.create (number_of_vects);
3630 /* For reduction defs we call vect_get_constant_vectors (), since we are
3631 looking for initial loop invariant values. */
3632 if (vectorized_defs)
3633 /* The defs are already vectorized. */
3634 vect_get_slp_vect_defs (child, &vec_defs);
3635 else
3636 /* Build vectors from scalar defs. */
3637 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3638 number_of_vects);
3640 vec_oprnds->quick_push (vec_defs);
3644 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3645 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3646 permute statements for the SLP node NODE of the SLP instance
3647 SLP_NODE_INSTANCE. */
3649 bool
3650 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3651 gimple_stmt_iterator *gsi, poly_uint64 vf,
3652 slp_instance slp_node_instance, bool analyze_only,
3653 unsigned *n_perms)
3655 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3656 vec_info *vinfo = stmt_info->vinfo;
3657 int vec_index = 0;
3658 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3659 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3660 unsigned int mask_element;
3661 machine_mode mode;
3663 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3664 return false;
3666 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3668 mode = TYPE_MODE (vectype);
3669 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3671 /* Initialize the vect stmts of NODE to properly insert the generated
3672 stmts later. */
3673 if (! analyze_only)
3674 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3675 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3676 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3678 /* Generate permutation masks for every NODE. Number of masks for each NODE
3679 is equal to GROUP_SIZE.
3680 E.g., we have a group of three nodes with three loads from the same
3681 location in each node, and the vector size is 4. I.e., we have a
3682 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3683 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3684 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3687 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3688 The last mask is illegal since we assume two operands for permute
3689 operation, and the mask element values can't be outside that range.
3690 Hence, the last mask must be converted into {2,5,5,5}.
3691 For the first two permutations we need the first and the second input
3692 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3693 we need the second and the third vectors: {b1,c1,a2,b2} and
3694 {c2,a3,b3,c3}. */
3696 int vect_stmts_counter = 0;
3697 unsigned int index = 0;
3698 int first_vec_index = -1;
3699 int second_vec_index = -1;
3700 bool noop_p = true;
3701 *n_perms = 0;
3703 vec_perm_builder mask;
3704 unsigned int nelts_to_build;
3705 unsigned int nvectors_per_build;
3706 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3707 && multiple_p (nunits, group_size));
3708 if (repeating_p)
3710 /* A single vector contains a whole number of copies of the node, so:
3711 (a) all permutes can use the same mask; and
3712 (b) the permutes only need a single vector input. */
3713 mask.new_vector (nunits, group_size, 3);
3714 nelts_to_build = mask.encoded_nelts ();
3715 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3717 else
3719 /* We need to construct a separate mask for each vector statement. */
3720 unsigned HOST_WIDE_INT const_nunits, const_vf;
3721 if (!nunits.is_constant (&const_nunits)
3722 || !vf.is_constant (&const_vf))
3723 return false;
3724 mask.new_vector (const_nunits, const_nunits, 1);
3725 nelts_to_build = const_vf * group_size;
3726 nvectors_per_build = 1;
3729 unsigned int count = mask.encoded_nelts ();
3730 mask.quick_grow (count);
3731 vec_perm_indices indices;
3733 for (unsigned int j = 0; j < nelts_to_build; j++)
3735 unsigned int iter_num = j / group_size;
3736 unsigned int stmt_num = j % group_size;
3737 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3738 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3739 if (repeating_p)
3741 first_vec_index = 0;
3742 mask_element = i;
3744 else
3746 /* Enforced before the loop when !repeating_p. */
3747 unsigned int const_nunits = nunits.to_constant ();
3748 vec_index = i / const_nunits;
3749 mask_element = i % const_nunits;
3750 if (vec_index == first_vec_index
3751 || first_vec_index == -1)
3753 first_vec_index = vec_index;
3755 else if (vec_index == second_vec_index
3756 || second_vec_index == -1)
3758 second_vec_index = vec_index;
3759 mask_element += const_nunits;
3761 else
3763 if (dump_enabled_p ())
3764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3765 "permutation requires at "
3766 "least three vectors %G",
3767 stmt_info->stmt);
3768 gcc_assert (analyze_only);
3769 return false;
3772 gcc_assert (mask_element < 2 * const_nunits);
3775 if (mask_element != index)
3776 noop_p = false;
3777 mask[index++] = mask_element;
3779 if (index == count && !noop_p)
3781 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3782 if (!can_vec_perm_const_p (mode, indices))
3784 if (dump_enabled_p ())
3786 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3787 vect_location,
3788 "unsupported vect permute { ");
3789 for (i = 0; i < count; ++i)
3791 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3792 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3794 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3796 gcc_assert (analyze_only);
3797 return false;
3800 ++*n_perms;
3803 if (index == count)
3805 if (!analyze_only)
3807 tree mask_vec = NULL_TREE;
3809 if (! noop_p)
3810 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3812 if (second_vec_index == -1)
3813 second_vec_index = first_vec_index;
3815 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3817 /* Generate the permute statement if necessary. */
3818 tree first_vec = dr_chain[first_vec_index + ri];
3819 tree second_vec = dr_chain[second_vec_index + ri];
3820 stmt_vec_info perm_stmt_info;
3821 if (! noop_p)
3823 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3824 tree perm_dest
3825 = vect_create_destination_var (gimple_assign_lhs (stmt),
3826 vectype);
3827 perm_dest = make_ssa_name (perm_dest);
3828 gassign *perm_stmt
3829 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3830 first_vec, second_vec,
3831 mask_vec);
3832 perm_stmt_info
3833 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3834 gsi);
3836 else
3837 /* If mask was NULL_TREE generate the requested
3838 identity transform. */
3839 perm_stmt_info = vinfo->lookup_def (first_vec);
3841 /* Store the vector statement in NODE. */
3842 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3843 = perm_stmt_info;
3847 index = 0;
3848 first_vec_index = -1;
3849 second_vec_index = -1;
3850 noop_p = true;
3854 return true;
3857 /* Vectorize SLP instance tree in postorder. */
3859 static void
3860 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3861 scalar_stmts_to_slp_tree_map_t *bst_map)
3863 gimple_stmt_iterator si;
3864 stmt_vec_info stmt_info;
3865 unsigned int group_size;
3866 tree vectype;
3867 int i, j;
3868 slp_tree child;
3870 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3871 return;
3873 /* See if we have already vectorized the node in the graph of the
3874 SLP instance. */
3875 if (SLP_TREE_VEC_STMTS (node).exists ())
3876 return;
3878 /* See if we have already vectorized the same set of stmts and reuse their
3879 vectorized stmts across instances. */
3880 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3882 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3883 return;
3886 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3887 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3888 vect_schedule_slp_instance (child, instance, bst_map);
3890 /* Push SLP node def-type to stmts. */
3891 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3892 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3894 stmt_vec_info child_stmt_info;
3895 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3896 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3899 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3901 /* VECTYPE is the type of the destination. */
3902 vectype = STMT_VINFO_VECTYPE (stmt_info);
3903 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3904 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3906 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3907 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3909 if (dump_enabled_p ())
3910 dump_printf_loc (MSG_NOTE, vect_location,
3911 "------>vectorizing SLP node starting from: %G",
3912 stmt_info->stmt);
3914 /* Vectorized stmts go before the last scalar stmt which is where
3915 all uses are ready. */
3916 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3917 si = gsi_for_stmt (last_stmt_info->stmt);
3919 /* Mark the first element of the reduction chain as reduction to properly
3920 transform the node. In the analysis phase only the last element of the
3921 chain is marked as reduction. */
3922 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3923 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3924 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3926 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3927 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3930 /* Handle two-operation SLP nodes by vectorizing the group with
3931 both operations and then performing a merge. */
3932 if (SLP_TREE_TWO_OPERATORS (node))
3934 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3935 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3936 enum tree_code ocode = ERROR_MARK;
3937 stmt_vec_info ostmt_info;
3938 vec_perm_builder mask (group_size, group_size, 1);
3939 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3941 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3942 if (gimple_assign_rhs_code (ostmt) != code0)
3944 mask.quick_push (1);
3945 ocode = gimple_assign_rhs_code (ostmt);
3947 else
3948 mask.quick_push (0);
3950 if (ocode != ERROR_MARK)
3952 vec<stmt_vec_info> v0;
3953 vec<stmt_vec_info> v1;
3954 unsigned j;
3955 tree tmask = NULL_TREE;
3956 vect_transform_stmt (stmt_info, &si, node, instance);
3957 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3958 SLP_TREE_VEC_STMTS (node).truncate (0);
3959 gimple_assign_set_rhs_code (stmt, ocode);
3960 vect_transform_stmt (stmt_info, &si, node, instance);
3961 gimple_assign_set_rhs_code (stmt, code0);
3962 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3963 SLP_TREE_VEC_STMTS (node).truncate (0);
3964 tree meltype = build_nonstandard_integer_type
3965 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3966 tree mvectype = get_same_sized_vectype (meltype, vectype);
3967 unsigned k = 0, l;
3968 for (j = 0; j < v0.length (); ++j)
3970 /* Enforced by vect_build_slp_tree, which rejects variable-length
3971 vectors for SLP_TREE_TWO_OPERATORS. */
3972 unsigned int const_nunits = nunits.to_constant ();
3973 tree_vector_builder melts (mvectype, const_nunits, 1);
3974 for (l = 0; l < const_nunits; ++l)
3976 if (k >= group_size)
3977 k = 0;
3978 tree t = build_int_cst (meltype,
3979 mask[k++] * const_nunits + l);
3980 melts.quick_push (t);
3982 tmask = melts.build ();
3984 /* ??? Not all targets support a VEC_PERM_EXPR with a
3985 constant mask that would translate to a vec_merge RTX
3986 (with their vec_perm_const_ok). We can either not
3987 vectorize in that case or let veclower do its job.
3988 Unfortunately that isn't too great and at least for
3989 plus/minus we'd eventually like to match targets
3990 vector addsub instructions. */
3991 gimple *vstmt;
3992 vstmt = gimple_build_assign (make_ssa_name (vectype),
3993 VEC_PERM_EXPR,
3994 gimple_assign_lhs (v0[j]->stmt),
3995 gimple_assign_lhs (v1[j]->stmt),
3996 tmask);
3997 SLP_TREE_VEC_STMTS (node).quick_push
3998 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4000 v0.release ();
4001 v1.release ();
4002 return;
4005 vect_transform_stmt (stmt_info, &si, node, instance);
4007 /* Restore stmt def-types. */
4008 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4009 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4011 stmt_vec_info child_stmt_info;
4012 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4013 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4017 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4018 For loop vectorization this is done in vectorizable_call, but for SLP
4019 it needs to be deferred until end of vect_schedule_slp, because multiple
4020 SLP instances may refer to the same scalar stmt. */
4022 static void
4023 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4025 gimple *new_stmt;
4026 gimple_stmt_iterator gsi;
4027 int i;
4028 slp_tree child;
4029 tree lhs;
4030 stmt_vec_info stmt_info;
4032 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4033 return;
4035 if (visited.add (node))
4036 return;
4038 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4039 vect_remove_slp_scalar_calls (child, visited);
4041 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4043 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4044 if (!stmt || gimple_bb (stmt) == NULL)
4045 continue;
4046 if (is_pattern_stmt_p (stmt_info)
4047 || !PURE_SLP_STMT (stmt_info))
4048 continue;
4049 lhs = gimple_call_lhs (stmt);
4050 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4051 gsi = gsi_for_stmt (stmt);
4052 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4053 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4057 static void
4058 vect_remove_slp_scalar_calls (slp_tree node)
4060 hash_set<slp_tree> visited;
4061 vect_remove_slp_scalar_calls (node, visited);
4064 /* Generate vector code for all SLP instances in the loop/basic block. */
4066 void
4067 vect_schedule_slp (vec_info *vinfo)
4069 vec<slp_instance> slp_instances;
4070 slp_instance instance;
4071 unsigned int i;
4073 scalar_stmts_to_slp_tree_map_t *bst_map
4074 = new scalar_stmts_to_slp_tree_map_t ();
4075 slp_instances = vinfo->slp_instances;
4076 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4078 /* Schedule the tree of INSTANCE. */
4079 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4080 instance, bst_map);
4081 if (dump_enabled_p ())
4082 dump_printf_loc (MSG_NOTE, vect_location,
4083 "vectorizing stmts using SLP.\n");
4085 delete bst_map;
4087 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4089 slp_tree root = SLP_INSTANCE_TREE (instance);
4090 stmt_vec_info store_info;
4091 unsigned int j;
4093 /* Remove scalar call stmts. Do not do this for basic-block
4094 vectorization as not all uses may be vectorized.
4095 ??? Why should this be necessary? DCE should be able to
4096 remove the stmts itself.
4097 ??? For BB vectorization we can as well remove scalar
4098 stmts starting from the SLP tree root if they have no
4099 uses. */
4100 if (is_a <loop_vec_info> (vinfo))
4101 vect_remove_slp_scalar_calls (root);
4103 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4104 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4106 if (!STMT_VINFO_DATA_REF (store_info))
4107 break;
4109 store_info = vect_orig_stmt (store_info);
4110 /* Free the attached stmt_vec_info and remove the stmt. */
4111 vinfo->remove_stmt (store_info);