PR libstdc++/87308 adjust regex used in std::any pretty printer
[official-gcc.git] / gcc / tree-vect-slp.c
blob9e805d07726c8dd0028d94fde7029ed4087de262
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
54 static void
55 vect_free_slp_tree (slp_tree node, bool final_p)
57 int i;
58 slp_tree child;
60 if (--node->refcnt != 0)
61 return;
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
64 vect_free_slp_tree (child, final_p);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
70 if (!final_p)
72 stmt_vec_info stmt_info;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
80 SLP_TREE_CHILDREN (node).release ();
81 SLP_TREE_SCALAR_STMTS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
85 free (node);
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
101 /* Create an SLP node for SCALAR_STMTS. */
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_VEC_STMTS (node).create (0);
126 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
127 SLP_TREE_CHILDREN (node).create (nops);
128 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
129 SLP_TREE_TWO_OPERATORS (node) = false;
130 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
131 node->refcnt = 1;
133 unsigned i;
134 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
135 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
137 return node;
141 /* This structure is used in creation of an SLP tree. Each instance
142 corresponds to the same operand in a group of scalar stmts in an SLP
143 node. */
144 typedef struct _slp_oprnd_info
146 /* Def-stmts for the operands. */
147 vec<stmt_vec_info> def_stmts;
148 /* Information about the first statement, its vector def-type, type, the
149 operand itself in case it's constant, and an indication if it's a pattern
150 stmt. */
151 tree first_op_type;
152 enum vect_def_type first_dt;
153 bool first_pattern;
154 bool second_pattern;
155 } *slp_oprnd_info;
158 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
159 operand. */
160 static vec<slp_oprnd_info>
161 vect_create_oprnd_info (int nops, int group_size)
163 int i;
164 slp_oprnd_info oprnd_info;
165 vec<slp_oprnd_info> oprnds_info;
167 oprnds_info.create (nops);
168 for (i = 0; i < nops; i++)
170 oprnd_info = XNEW (struct _slp_oprnd_info);
171 oprnd_info->def_stmts.create (group_size);
172 oprnd_info->first_dt = vect_uninitialized_def;
173 oprnd_info->first_op_type = NULL_TREE;
174 oprnd_info->first_pattern = false;
175 oprnd_info->second_pattern = false;
176 oprnds_info.quick_push (oprnd_info);
179 return oprnds_info;
183 /* Free operands info. */
185 static void
186 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
188 int i;
189 slp_oprnd_info oprnd_info;
191 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
193 oprnd_info->def_stmts.release ();
194 XDELETE (oprnd_info);
197 oprnds_info.release ();
201 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
202 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
203 of the chain. */
206 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
207 stmt_vec_info first_stmt_info)
209 stmt_vec_info next_stmt_info = first_stmt_info;
210 int result = 0;
212 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
213 return -1;
217 if (next_stmt_info == stmt_info)
218 return result;
219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
220 if (next_stmt_info)
221 result += DR_GROUP_GAP (next_stmt_info);
223 while (next_stmt_info);
225 return -1;
228 /* Check whether it is possible to load COUNT elements of type ELT_MODE
229 using the method implemented by duplicate_and_interleave. Return true
230 if so, returning the number of intermediate vectors in *NVECTORS_OUT
231 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
232 (if nonnull). */
234 bool
235 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
236 unsigned int *nvectors_out,
237 tree *vector_type_out,
238 tree *permutes)
240 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
241 poly_int64 nelts;
242 unsigned int nvectors = 1;
243 for (;;)
245 scalar_int_mode int_mode;
246 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
247 if (multiple_p (current_vector_size, elt_bytes, &nelts)
248 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
250 tree int_type = build_nonstandard_integer_type
251 (GET_MODE_BITSIZE (int_mode), 1);
252 tree vector_type = build_vector_type (int_type, nelts);
253 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
255 vec_perm_builder sel1 (nelts, 2, 3);
256 vec_perm_builder sel2 (nelts, 2, 3);
257 poly_int64 half_nelts = exact_div (nelts, 2);
258 for (unsigned int i = 0; i < 3; ++i)
260 sel1.quick_push (i);
261 sel1.quick_push (i + nelts);
262 sel2.quick_push (half_nelts + i);
263 sel2.quick_push (half_nelts + i + nelts);
265 vec_perm_indices indices1 (sel1, 2, nelts);
266 vec_perm_indices indices2 (sel2, 2, nelts);
267 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
268 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
270 if (nvectors_out)
271 *nvectors_out = nvectors;
272 if (vector_type_out)
273 *vector_type_out = vector_type;
274 if (permutes)
276 permutes[0] = vect_gen_perm_mask_checked (vector_type,
277 indices1);
278 permutes[1] = vect_gen_perm_mask_checked (vector_type,
279 indices2);
281 return true;
285 if (!multiple_p (elt_bytes, 2, &elt_bytes))
286 return false;
287 nvectors *= 2;
291 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
292 they are of a valid type and that they match the defs of the first stmt of
293 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
294 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
295 indicates swap is required for cond_expr stmts. Specifically, *SWAP
296 is 1 if STMT is cond and operands of comparison need to be swapped;
297 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
298 If there is any operand swap in this function, *SWAP is set to non-zero
299 value.
300 If there was a fatal error return -1; if the error could be corrected by
301 swapping operands of father node of this one, return 1; if everything is
302 ok return 0. */
303 static int
304 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
305 vec<stmt_vec_info> stmts, unsigned stmt_num,
306 vec<slp_oprnd_info> *oprnds_info)
308 stmt_vec_info stmt_info = stmts[stmt_num];
309 tree oprnd;
310 unsigned int i, number_of_oprnds;
311 enum vect_def_type dt = vect_uninitialized_def;
312 bool pattern = false;
313 slp_oprnd_info oprnd_info;
314 int first_op_idx = 1;
315 unsigned int commutative_op = -1U;
316 bool first_op_cond = false;
317 bool first = stmt_num == 0;
318 bool second = stmt_num == 1;
320 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
322 number_of_oprnds = gimple_call_num_args (stmt);
323 first_op_idx = 3;
324 if (gimple_call_internal_p (stmt))
326 internal_fn ifn = gimple_call_internal_fn (stmt);
327 commutative_op = first_commutative_argument (ifn);
330 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
332 enum tree_code code = gimple_assign_rhs_code (stmt);
333 number_of_oprnds = gimple_num_ops (stmt) - 1;
334 /* Swap can only be done for cond_expr if asked to, otherwise we
335 could result in different comparison code to the first stmt. */
336 if (gimple_assign_rhs_code (stmt) == COND_EXPR
337 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
339 first_op_cond = true;
340 number_of_oprnds++;
342 else
343 commutative_op = commutative_tree_code (code) ? 0U : -1U;
345 else
346 return -1;
348 bool swapped = (*swap != 0);
349 gcc_assert (!swapped || first_op_cond);
350 for (i = 0; i < number_of_oprnds; i++)
352 again:
353 if (first_op_cond)
355 /* Map indicating how operands of cond_expr should be swapped. */
356 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
357 int *map = maps[*swap];
359 if (i < 2)
360 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
361 first_op_idx), map[i]);
362 else
363 oprnd = gimple_op (stmt_info->stmt, map[i]);
365 else
366 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
368 oprnd_info = (*oprnds_info)[i];
370 stmt_vec_info def_stmt_info;
371 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
373 if (dump_enabled_p ())
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
375 "Build SLP failed: can't analyze def for %T\n",
376 oprnd);
378 return -1;
381 if (second)
382 oprnd_info->second_pattern = pattern;
384 if (first)
386 oprnd_info->first_dt = dt;
387 oprnd_info->first_pattern = pattern;
388 oprnd_info->first_op_type = TREE_TYPE (oprnd);
390 else
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
397 tree type = TREE_TYPE (oprnd);
398 if ((oprnd_info->first_dt != dt
399 && !(oprnd_info->first_dt == vect_reduction_def
400 && dt == vect_internal_def)
401 && !((oprnd_info->first_dt == vect_external_def
402 || oprnd_info->first_dt == vect_constant_def)
403 && (dt == vect_external_def
404 || dt == vect_constant_def)))
405 || !types_compatible_p (oprnd_info->first_op_type, type))
407 /* Try swapping operands if we got a mismatch. */
408 if (i == commutative_op && !swapped)
410 swapped = true;
411 goto again;
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416 "Build SLP failed: different types\n");
418 return 1;
420 if ((dt == vect_constant_def
421 || dt == vect_external_def)
422 && !current_vector_size.is_constant ()
423 && (TREE_CODE (type) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts.length (),
425 TYPE_MODE (type))))
427 if (dump_enabled_p ())
428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
429 "Build SLP failed: invalid type of def "
430 "for variable-length SLP %T\n", oprnd);
431 return -1;
435 /* Check the types of the definitions. */
436 switch (dt)
438 case vect_constant_def:
439 case vect_external_def:
440 break;
442 case vect_reduction_def:
443 case vect_induction_def:
444 case vect_internal_def:
445 oprnd_info->def_stmts.quick_push (def_stmt_info);
446 break;
448 default:
449 /* FORNOW: Not supported. */
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452 "Build SLP failed: illegal type of def %T\n",
453 oprnd);
455 return -1;
459 /* Swap operands. */
460 if (swapped)
462 /* If there are already uses of this stmt in a SLP instance then
463 we've committed to the operand order and can't swap it. */
464 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
466 if (dump_enabled_p ())
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
468 "Build SLP failed: cannot swap operands of "
469 "shared stmt %G", stmt_info->stmt);
470 return -1;
473 if (first_op_cond)
475 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
476 tree cond = gimple_assign_rhs1 (stmt);
477 enum tree_code code = TREE_CODE (cond);
479 /* Swap. */
480 if (*swap == 1)
482 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
483 &TREE_OPERAND (cond, 1));
484 TREE_SET_CODE (cond, swap_tree_comparison (code));
486 /* Invert. */
487 else
489 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
490 gimple_assign_rhs3_ptr (stmt));
491 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
492 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
493 gcc_assert (code != ERROR_MARK);
494 TREE_SET_CODE (cond, code);
497 else
499 unsigned int op = commutative_op + first_op_idx;
500 swap_ssa_operands (stmt_info->stmt,
501 gimple_op_ptr (stmt_info->stmt, op),
502 gimple_op_ptr (stmt_info->stmt, op + 1));
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE, vect_location,
506 "swapped operands to match def types in %G",
507 stmt_info->stmt);
510 *swap = swapped;
511 return 0;
514 /* Return true if call statements CALL1 and CALL2 are similar enough
515 to be combined into the same SLP group. */
517 static bool
518 compatible_calls_p (gcall *call1, gcall *call2)
520 unsigned int nargs = gimple_call_num_args (call1);
521 if (nargs != gimple_call_num_args (call2))
522 return false;
524 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
525 return false;
527 if (gimple_call_internal_p (call1))
529 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
530 TREE_TYPE (gimple_call_lhs (call2))))
531 return false;
532 for (unsigned int i = 0; i < nargs; ++i)
533 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
534 TREE_TYPE (gimple_call_arg (call2, i))))
535 return false;
537 else
539 if (!operand_equal_p (gimple_call_fn (call1),
540 gimple_call_fn (call2), 0))
541 return false;
543 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
544 return false;
546 return true;
549 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
550 caller's attempt to find the vector type in STMT_INFO with the narrowest
551 element type. Return true if VECTYPE is nonnull and if it is valid
552 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
553 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
554 vect_build_slp_tree. */
556 static bool
557 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
558 tree vectype, poly_uint64 *max_nunits)
560 if (!vectype)
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: unsupported data-type in %G\n",
565 stmt_info->stmt);
566 /* Fatal mismatch. */
567 return false;
570 /* If populating the vector type requires unrolling then fail
571 before adjusting *max_nunits for basic-block vectorization. */
572 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
573 unsigned HOST_WIDE_INT const_nunits;
574 if (STMT_VINFO_BB_VINFO (stmt_info)
575 && (!nunits.is_constant (&const_nunits)
576 || const_nunits > group_size))
578 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
580 "Build SLP failed: unrolling required "
581 "in basic block SLP\n");
582 /* Fatal mismatch. */
583 return false;
586 /* In case of multiple types we need to detect the smallest type. */
587 vect_update_max_nunits (max_nunits, vectype);
588 return true;
591 /* STMTS is a group of GROUP_SIZE SLP statements in which some
592 statements do the same operation as the first statement and in which
593 the others do ALT_STMT_CODE. Return true if we can take one vector
594 of the first operation and one vector of the second and permute them
595 to get the required result. VECTYPE is the type of the vector that
596 would be permuted. */
598 static bool
599 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
600 unsigned int group_size, tree vectype,
601 tree_code alt_stmt_code)
603 unsigned HOST_WIDE_INT count;
604 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
605 return false;
607 vec_perm_builder sel (count, count, 1);
608 for (unsigned int i = 0; i < count; ++i)
610 unsigned int elt = i;
611 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
612 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
613 elt += count;
614 sel.quick_push (elt);
616 vec_perm_indices indices (sel, 2, count);
617 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
620 /* Verify if the scalar stmts STMTS are isomorphic, require data
621 permutation or are of unsupported types of operation. Return
622 true if they are, otherwise return false and indicate in *MATCHES
623 which stmts are not isomorphic to the first one. If MATCHES[0]
624 is false then this indicates the comparison could not be
625 carried out or the stmts will never be vectorized by SLP.
627 Note COND_EXPR is possibly ismorphic to another one after swapping its
628 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
629 the first stmt by swapping the two operands of comparison; set SWAP[i]
630 to 2 if stmt I is isormorphic to the first stmt by inverting the code
631 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
632 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
634 static bool
635 vect_build_slp_tree_1 (unsigned char *swap,
636 vec<stmt_vec_info> stmts, unsigned int group_size,
637 poly_uint64 *max_nunits, bool *matches,
638 bool *two_operators)
640 unsigned int i;
641 stmt_vec_info first_stmt_info = stmts[0];
642 enum tree_code first_stmt_code = ERROR_MARK;
643 enum tree_code alt_stmt_code = ERROR_MARK;
644 enum tree_code rhs_code = ERROR_MARK;
645 enum tree_code first_cond_code = ERROR_MARK;
646 tree lhs;
647 bool need_same_oprnds = false;
648 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
649 optab optab;
650 int icode;
651 machine_mode optab_op2_mode;
652 machine_mode vec_mode;
653 stmt_vec_info first_load = NULL, prev_first_load = NULL;
655 /* For every stmt in NODE find its def stmt/s. */
656 stmt_vec_info stmt_info;
657 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
659 gimple *stmt = stmt_info->stmt;
660 swap[i] = 0;
661 matches[i] = false;
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
666 /* Fail to vectorize statements marked as unvectorizable. */
667 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
669 if (dump_enabled_p ())
670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
671 "Build SLP failed: unvectorizable statement %G",
672 stmt);
673 /* Fatal mismatch. */
674 matches[0] = false;
675 return false;
678 lhs = gimple_get_lhs (stmt);
679 if (lhs == NULL_TREE)
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: not GIMPLE_ASSIGN nor "
684 "GIMPLE_CALL %G", stmt);
685 /* Fatal mismatch. */
686 matches[0] = false;
687 return false;
690 tree nunits_vectype;
691 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
692 &nunits_vectype)
693 || (nunits_vectype
694 && !vect_record_max_nunits (stmt_info, group_size,
695 nunits_vectype, max_nunits)))
697 /* Fatal mismatch. */
698 matches[0] = false;
699 return false;
702 gcc_assert (vectype);
704 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
706 rhs_code = CALL_EXPR;
707 if ((gimple_call_internal_p (call_stmt)
708 && (!vectorizable_internal_fn_p
709 (gimple_call_internal_fn (call_stmt))))
710 || gimple_call_tail_p (call_stmt)
711 || gimple_call_noreturn_p (call_stmt)
712 || !gimple_call_nothrow_p (call_stmt)
713 || gimple_call_chain (call_stmt))
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "Build SLP failed: unsupported call type %G",
718 call_stmt);
719 /* Fatal mismatch. */
720 matches[0] = false;
721 return false;
724 else
725 rhs_code = gimple_assign_rhs_code (stmt);
727 /* Check the operation. */
728 if (i == 0)
730 first_stmt_code = rhs_code;
732 /* Shift arguments should be equal in all the packed stmts for a
733 vector shift with scalar shift operand. */
734 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
735 || rhs_code == LROTATE_EXPR
736 || rhs_code == RROTATE_EXPR)
738 if (vectype == boolean_type_node)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: shift of a"
743 " boolean.\n");
744 /* Fatal mismatch. */
745 matches[0] = false;
746 return false;
749 vec_mode = TYPE_MODE (vectype);
751 /* First see if we have a vector/vector shift. */
752 optab = optab_for_tree_code (rhs_code, vectype,
753 optab_vector);
755 if (!optab
756 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
758 /* No vector/vector shift, try for a vector/scalar shift. */
759 optab = optab_for_tree_code (rhs_code, vectype,
760 optab_scalar);
762 if (!optab)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "Build SLP failed: no optab.\n");
767 /* Fatal mismatch. */
768 matches[0] = false;
769 return false;
771 icode = (int) optab_handler (optab, vec_mode);
772 if (icode == CODE_FOR_nothing)
774 if (dump_enabled_p ())
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: "
777 "op not supported by target.\n");
778 /* Fatal mismatch. */
779 matches[0] = false;
780 return false;
782 optab_op2_mode = insn_data[icode].operand[2].mode;
783 if (!VECTOR_MODE_P (optab_op2_mode))
785 need_same_oprnds = true;
786 first_op1 = gimple_assign_rhs2 (stmt);
790 else if (rhs_code == WIDEN_LSHIFT_EXPR)
792 need_same_oprnds = true;
793 first_op1 = gimple_assign_rhs2 (stmt);
796 else
798 if (first_stmt_code != rhs_code
799 && alt_stmt_code == ERROR_MARK)
800 alt_stmt_code = rhs_code;
801 if (first_stmt_code != rhs_code
802 && (first_stmt_code != IMAGPART_EXPR
803 || rhs_code != REALPART_EXPR)
804 && (first_stmt_code != REALPART_EXPR
805 || rhs_code != IMAGPART_EXPR)
806 /* Handle mismatches in plus/minus by computing both
807 and merging the results. */
808 && !((first_stmt_code == PLUS_EXPR
809 || first_stmt_code == MINUS_EXPR)
810 && (alt_stmt_code == PLUS_EXPR
811 || alt_stmt_code == MINUS_EXPR)
812 && rhs_code == alt_stmt_code)
813 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
814 && (first_stmt_code == ARRAY_REF
815 || first_stmt_code == BIT_FIELD_REF
816 || first_stmt_code == INDIRECT_REF
817 || first_stmt_code == COMPONENT_REF
818 || first_stmt_code == MEM_REF)))
820 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
823 "Build SLP failed: different operation "
824 "in stmt %G", stmt);
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
826 "original stmt %G", first_stmt_info->stmt);
828 /* Mismatch. */
829 continue;
832 if (need_same_oprnds
833 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
835 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: different shift "
838 "arguments in %G", stmt);
839 /* Mismatch. */
840 continue;
843 if (rhs_code == CALL_EXPR)
845 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
846 as_a <gcall *> (stmt)))
848 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: different calls in %G",
851 stmt);
852 /* Mismatch. */
853 continue;
858 /* Grouped store or load. */
859 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
861 if (REFERENCE_CLASS_P (lhs))
863 /* Store. */
866 else
868 /* Load. */
869 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
870 if (prev_first_load)
872 /* Check that there are no loads from different interleaving
873 chains in the same node. */
874 if (prev_first_load != first_load)
876 if (dump_enabled_p ())
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
878 vect_location,
879 "Build SLP failed: different "
880 "interleaving chains in one node %G",
881 stmt);
882 /* Mismatch. */
883 continue;
886 else
887 prev_first_load = first_load;
889 } /* Grouped access. */
890 else
892 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
894 /* Not grouped load. */
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
897 "Build SLP failed: not grouped load %G", stmt);
899 /* FORNOW: Not grouped loads are not supported. */
900 /* Fatal mismatch. */
901 matches[0] = false;
902 return false;
905 /* Not memory operation. */
906 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
907 && TREE_CODE_CLASS (rhs_code) != tcc_unary
908 && TREE_CODE_CLASS (rhs_code) != tcc_expression
909 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
910 && rhs_code != CALL_EXPR)
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: operation unsupported %G",
915 stmt);
916 /* Fatal mismatch. */
917 matches[0] = false;
918 return false;
921 if (rhs_code == COND_EXPR)
923 tree cond_expr = gimple_assign_rhs1 (stmt);
924 enum tree_code cond_code = TREE_CODE (cond_expr);
925 enum tree_code swap_code = ERROR_MARK;
926 enum tree_code invert_code = ERROR_MARK;
928 if (i == 0)
929 first_cond_code = TREE_CODE (cond_expr);
930 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
932 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
933 swap_code = swap_tree_comparison (cond_code);
934 invert_code = invert_tree_comparison (cond_code, honor_nans);
937 if (first_cond_code == cond_code)
939 /* Isomorphic can be achieved by swapping. */
940 else if (first_cond_code == swap_code)
941 swap[i] = 1;
942 /* Isomorphic can be achieved by inverting. */
943 else if (first_cond_code == invert_code)
944 swap[i] = 2;
945 else
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
949 "Build SLP failed: different"
950 " operation %G", stmt);
951 /* Mismatch. */
952 continue;
957 matches[i] = true;
960 for (i = 0; i < group_size; ++i)
961 if (!matches[i])
962 return false;
964 /* If we allowed a two-operation SLP node verify the target can cope
965 with the permute we are going to use. */
966 if (alt_stmt_code != ERROR_MARK
967 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
969 if (vectype == boolean_type_node
970 || !vect_two_operations_perm_ok_p (stmts, group_size,
971 vectype, alt_stmt_code))
973 for (i = 0; i < group_size; ++i)
974 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
976 matches[i] = false;
977 if (dump_enabled_p ())
979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
980 "Build SLP failed: different operation "
981 "in stmt %G", stmts[i]->stmt);
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
983 "original stmt %G", first_stmt_info->stmt);
986 return false;
988 *two_operators = true;
991 return true;
994 /* Traits for the hash_set to record failed SLP builds for a stmt set.
995 Note we never remove apart from at destruction time so we do not
996 need a special value for deleted that differs from empty. */
997 struct bst_traits
999 typedef vec <stmt_vec_info> value_type;
1000 typedef vec <stmt_vec_info> compare_type;
1001 static inline hashval_t hash (value_type);
1002 static inline bool equal (value_type existing, value_type candidate);
1003 static inline bool is_empty (value_type x) { return !x.exists (); }
1004 static inline bool is_deleted (value_type x) { return !x.exists (); }
1005 static inline void mark_empty (value_type &x) { x.release (); }
1006 static inline void mark_deleted (value_type &x) { x.release (); }
1007 static inline void remove (value_type &x) { x.release (); }
1009 inline hashval_t
1010 bst_traits::hash (value_type x)
1012 inchash::hash h;
1013 for (unsigned i = 0; i < x.length (); ++i)
1014 h.add_int (gimple_uid (x[i]->stmt));
1015 return h.end ();
1017 inline bool
1018 bst_traits::equal (value_type existing, value_type candidate)
1020 if (existing.length () != candidate.length ())
1021 return false;
1022 for (unsigned i = 0; i < existing.length (); ++i)
1023 if (existing[i] != candidate[i])
1024 return false;
1025 return true;
1028 typedef hash_map <vec <gimple *>, slp_tree,
1029 simple_hashmap_traits <bst_traits, slp_tree> >
1030 scalar_stmts_to_slp_tree_map_t;
1032 static slp_tree
1033 vect_build_slp_tree_2 (vec_info *vinfo,
1034 vec<stmt_vec_info> stmts, unsigned int group_size,
1035 poly_uint64 *max_nunits,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
1037 unsigned max_tree_size,
1038 scalar_stmts_to_slp_tree_map_t *bst_map);
1040 static slp_tree
1041 vect_build_slp_tree (vec_info *vinfo,
1042 vec<stmt_vec_info> stmts, unsigned int group_size,
1043 poly_uint64 *max_nunits,
1044 bool *matches, unsigned *npermutes, unsigned *tree_size,
1045 unsigned max_tree_size,
1046 scalar_stmts_to_slp_tree_map_t *bst_map)
1048 if (slp_tree *leader = bst_map->get (stmts))
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1052 *leader ? "" : "failed ", *leader);
1053 if (*leader)
1054 (*leader)->refcnt++;
1055 return *leader;
1057 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1058 matches, npermutes, tree_size,
1059 max_tree_size, bst_map);
1060 /* Keep a reference for the bst_map use. */
1061 if (res)
1062 res->refcnt++;
1063 bst_map->put (stmts.copy (), res);
1064 return res;
1067 /* Recursively build an SLP tree starting from NODE.
1068 Fail (and return a value not equal to zero) if def-stmts are not
1069 isomorphic, require data permutation or are of unsupported types of
1070 operation. Otherwise, return 0.
1071 The value returned is the depth in the SLP tree where a mismatch
1072 was found. */
1074 static slp_tree
1075 vect_build_slp_tree_2 (vec_info *vinfo,
1076 vec<stmt_vec_info> stmts, unsigned int group_size,
1077 poly_uint64 *max_nunits,
1078 bool *matches, unsigned *npermutes, unsigned *tree_size,
1079 unsigned max_tree_size,
1080 scalar_stmts_to_slp_tree_map_t *bst_map)
1082 unsigned nops, i, this_tree_size = 0;
1083 poly_uint64 this_max_nunits = *max_nunits;
1084 slp_tree node;
1086 matches[0] = false;
1088 stmt_vec_info stmt_info = stmts[0];
1089 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1090 nops = gimple_call_num_args (stmt);
1091 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1093 nops = gimple_num_ops (stmt) - 1;
1094 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1095 nops++;
1097 else if (is_a <gphi *> (stmt_info->stmt))
1098 nops = 0;
1099 else
1100 return NULL;
1102 /* If the SLP node is a PHI (induction or reduction), terminate
1103 the recursion. */
1104 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1106 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1107 tree vectype = get_vectype_for_scalar_type (scalar_type);
1108 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1109 return NULL;
1111 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1112 /* Induction from different IVs is not supported. */
1113 if (def_type == vect_induction_def)
1115 stmt_vec_info other_info;
1116 FOR_EACH_VEC_ELT (stmts, i, other_info)
1117 if (stmt_info != other_info)
1118 return NULL;
1120 else if (def_type == vect_reduction_def
1121 || def_type == vect_double_reduction_def
1122 || def_type == vect_nested_cycle)
1124 /* Else def types have to match. */
1125 stmt_vec_info other_info;
1126 FOR_EACH_VEC_ELT (stmts, i, other_info)
1128 /* But for reduction chains only check on the first stmt. */
1129 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1130 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1131 continue;
1132 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1133 return NULL;
1136 else
1137 return NULL;
1138 node = vect_create_new_slp_node (stmts);
1139 return node;
1143 bool two_operators = false;
1144 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1145 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1146 &this_max_nunits, matches, &two_operators))
1147 return NULL;
1149 /* If the SLP node is a load, terminate the recursion. */
1150 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1151 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1153 *max_nunits = this_max_nunits;
1154 node = vect_create_new_slp_node (stmts);
1155 return node;
1158 /* Get at the operands, verifying they are compatible. */
1159 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1160 slp_oprnd_info oprnd_info;
1161 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1163 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1164 stmts, i, &oprnds_info);
1165 if (res != 0)
1166 matches[(res == -1) ? 0 : i] = false;
1167 if (!matches[0])
1168 break;
1170 for (i = 0; i < group_size; ++i)
1171 if (!matches[i])
1173 vect_free_oprnd_info (oprnds_info);
1174 return NULL;
1177 auto_vec<slp_tree, 4> children;
1179 stmt_info = stmts[0];
1181 if (tree_size)
1182 max_tree_size -= *tree_size;
1184 /* Create SLP_TREE nodes for the definition node/s. */
1185 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1187 slp_tree child;
1188 unsigned old_tree_size = this_tree_size;
1189 unsigned int j;
1191 if (oprnd_info->first_dt != vect_internal_def
1192 && oprnd_info->first_dt != vect_reduction_def
1193 && oprnd_info->first_dt != vect_induction_def)
1194 continue;
1196 if (++this_tree_size > max_tree_size)
1198 if (dump_enabled_p ())
1199 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1200 vect_location,
1201 "Build SLP failed: SLP tree too large\n");
1202 FOR_EACH_VEC_ELT (children, j, child)
1203 vect_free_slp_tree (child, false);
1204 vect_free_oprnd_info (oprnds_info);
1205 return NULL;
1208 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1209 group_size, &this_max_nunits,
1210 matches, npermutes,
1211 &this_tree_size,
1212 max_tree_size, bst_map)) != NULL)
1214 /* If we have all children of child built up from scalars then just
1215 throw that away and build it up this node from scalars. */
1216 if (!SLP_TREE_CHILDREN (child).is_empty ()
1217 /* ??? Rejecting patterns this way doesn't work. We'd have to
1218 do extra work to cancel the pattern so the uses see the
1219 scalar version. */
1220 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1222 slp_tree grandchild;
1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1225 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1226 break;
1227 if (!grandchild)
1229 /* Roll back. */
1230 this_tree_size = old_tree_size;
1231 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1232 vect_free_slp_tree (grandchild, false);
1233 SLP_TREE_CHILDREN (child).truncate (0);
1235 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "Building parent vector operands from "
1238 "scalars instead\n");
1239 oprnd_info->def_stmts = vNULL;
1240 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1241 children.safe_push (child);
1242 continue;
1246 oprnd_info->def_stmts = vNULL;
1247 children.safe_push (child);
1248 continue;
1251 /* If the SLP build failed fatally and we analyze a basic-block
1252 simply treat nodes we fail to build as externally defined
1253 (and thus build vectors from the scalar defs).
1254 The cost model will reject outright expensive cases.
1255 ??? This doesn't treat cases where permutation ultimatively
1256 fails (or we don't try permutation below). Ideally we'd
1257 even compute a permutation that will end up with the maximum
1258 SLP tree size... */
1259 if (is_a <bb_vec_info> (vinfo)
1260 && !matches[0]
1261 /* ??? Rejecting patterns this way doesn't work. We'd have to
1262 do extra work to cancel the pattern so the uses see the
1263 scalar version. */
1264 && !is_pattern_stmt_p (stmt_info))
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE, vect_location,
1268 "Building vector operands from scalars\n");
1269 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1270 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1271 children.safe_push (child);
1272 oprnd_info->def_stmts = vNULL;
1273 continue;
1276 /* If the SLP build for operand zero failed and operand zero
1277 and one can be commutated try that for the scalar stmts
1278 that failed the match. */
1279 if (i == 0
1280 /* A first scalar stmt mismatch signals a fatal mismatch. */
1281 && matches[0]
1282 /* ??? For COND_EXPRs we can swap the comparison operands
1283 as well as the arms under some constraints. */
1284 && nops == 2
1285 && oprnds_info[1]->first_dt == vect_internal_def
1286 && is_gimple_assign (stmt_info->stmt)
1287 /* Do so only if the number of not successful permutes was nor more
1288 than a cut-ff as re-trying the recursive match on
1289 possibly each level of the tree would expose exponential
1290 behavior. */
1291 && *npermutes < 4)
1293 /* See whether we can swap the matching or the non-matching
1294 stmt operands. */
1295 bool swap_not_matching = true;
1298 for (j = 0; j < group_size; ++j)
1300 if (matches[j] != !swap_not_matching)
1301 continue;
1302 stmt_vec_info stmt_info = stmts[j];
1303 /* Verify if we can swap operands of this stmt. */
1304 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1305 if (!stmt
1306 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1308 if (!swap_not_matching)
1309 goto fail;
1310 swap_not_matching = false;
1311 break;
1313 /* Verify if we can safely swap or if we committed to a
1314 specific operand order already.
1315 ??? Instead of modifying GIMPLE stmts here we could
1316 record whether we want to swap operands in the SLP
1317 node and temporarily do that when processing it
1318 (or wrap operand accessors in a helper). */
1319 else if (swap[j] != 0
1320 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1322 if (!swap_not_matching)
1324 if (dump_enabled_p ())
1325 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1326 vect_location,
1327 "Build SLP failed: cannot swap "
1328 "operands of shared stmt %G",
1329 stmts[j]->stmt);
1330 goto fail;
1332 swap_not_matching = false;
1333 break;
1337 while (j != group_size);
1339 /* Swap mismatched definition stmts. */
1340 if (dump_enabled_p ())
1341 dump_printf_loc (MSG_NOTE, vect_location,
1342 "Re-trying with swapped operands of stmts ");
1343 for (j = 0; j < group_size; ++j)
1344 if (matches[j] == !swap_not_matching)
1346 std::swap (oprnds_info[0]->def_stmts[j],
1347 oprnds_info[1]->def_stmts[j]);
1348 if (dump_enabled_p ())
1349 dump_printf (MSG_NOTE, "%d ", j);
1351 if (dump_enabled_p ())
1352 dump_printf (MSG_NOTE, "\n");
1353 /* And try again with scratch 'matches' ... */
1354 bool *tem = XALLOCAVEC (bool, group_size);
1355 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1356 group_size, &this_max_nunits,
1357 tem, npermutes,
1358 &this_tree_size,
1359 max_tree_size, bst_map)) != NULL)
1361 /* ... so if successful we can apply the operand swapping
1362 to the GIMPLE IL. This is necessary because for example
1363 vect_get_slp_defs uses operand indexes and thus expects
1364 canonical operand order. This is also necessary even
1365 if we end up building the operand from scalars as
1366 we'll continue to process swapped operand two. */
1367 for (j = 0; j < group_size; ++j)
1368 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1369 for (j = 0; j < group_size; ++j)
1370 if (matches[j] == !swap_not_matching)
1372 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1373 /* Avoid swapping operands twice. */
1374 if (gimple_plf (stmt, GF_PLF_1))
1375 continue;
1376 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1377 gimple_assign_rhs2_ptr (stmt));
1378 gimple_set_plf (stmt, GF_PLF_1, true);
1380 /* Verify we swap all duplicates or none. */
1381 if (flag_checking)
1382 for (j = 0; j < group_size; ++j)
1383 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1384 == (matches[j] == !swap_not_matching));
1386 /* If we have all children of child built up from scalars then
1387 just throw that away and build it up this node from scalars. */
1388 if (!SLP_TREE_CHILDREN (child).is_empty ()
1389 /* ??? Rejecting patterns this way doesn't work. We'd have
1390 to do extra work to cancel the pattern so the uses see the
1391 scalar version. */
1392 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1394 unsigned int j;
1395 slp_tree grandchild;
1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1398 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1399 break;
1400 if (!grandchild)
1402 /* Roll back. */
1403 this_tree_size = old_tree_size;
1404 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1405 vect_free_slp_tree (grandchild, false);
1406 SLP_TREE_CHILDREN (child).truncate (0);
1408 if (dump_enabled_p ())
1409 dump_printf_loc (MSG_NOTE, vect_location,
1410 "Building parent vector operands from "
1411 "scalars instead\n");
1412 oprnd_info->def_stmts = vNULL;
1413 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1414 children.safe_push (child);
1415 continue;
1419 oprnd_info->def_stmts = vNULL;
1420 children.safe_push (child);
1421 continue;
1424 ++*npermutes;
1427 fail:
1428 gcc_assert (child == NULL);
1429 FOR_EACH_VEC_ELT (children, j, child)
1430 vect_free_slp_tree (child, false);
1431 vect_free_oprnd_info (oprnds_info);
1432 return NULL;
1435 vect_free_oprnd_info (oprnds_info);
1437 if (tree_size)
1438 *tree_size += this_tree_size;
1439 *max_nunits = this_max_nunits;
1441 node = vect_create_new_slp_node (stmts);
1442 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1443 SLP_TREE_CHILDREN (node).splice (children);
1444 return node;
1447 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1449 static void
1450 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1451 slp_tree node, hash_set<slp_tree> &visited)
1453 int i;
1454 stmt_vec_info stmt_info;
1455 slp_tree child;
1457 if (visited.add (node))
1458 return;
1460 dump_printf_loc (dump_kind, loc, "node%s %p\n",
1461 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1462 ? " (external)" : "", node);
1463 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1464 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
1465 if (SLP_TREE_CHILDREN (node).is_empty ())
1466 return;
1467 dump_printf_loc (dump_kind, loc, "\tchildren");
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1469 dump_printf (dump_kind, " %p", (void *)child);
1470 dump_printf (dump_kind, "\n");
1471 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1472 vect_print_slp_tree (dump_kind, loc, child, visited);
1475 static void
1476 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1477 slp_tree node)
1479 hash_set<slp_tree> visited;
1480 vect_print_slp_tree (dump_kind, loc, node, visited);
1483 /* Mark the tree rooted at NODE with PURE_SLP. */
1485 static void
1486 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1488 int i;
1489 stmt_vec_info stmt_info;
1490 slp_tree child;
1492 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1493 return;
1495 if (visited.add (node))
1496 return;
1498 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1499 STMT_SLP_TYPE (stmt_info) = pure_slp;
1501 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1502 vect_mark_slp_stmts (child, visited);
1505 static void
1506 vect_mark_slp_stmts (slp_tree node)
1508 hash_set<slp_tree> visited;
1509 vect_mark_slp_stmts (node, visited);
1512 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1514 static void
1515 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1517 int i;
1518 stmt_vec_info stmt_info;
1519 slp_tree child;
1521 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1522 return;
1524 if (visited.add (node))
1525 return;
1527 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1529 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1530 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1531 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1534 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1535 vect_mark_slp_stmts_relevant (child, visited);
1538 static void
1539 vect_mark_slp_stmts_relevant (slp_tree node)
1541 hash_set<slp_tree> visited;
1542 vect_mark_slp_stmts_relevant (node, visited);
1546 /* Rearrange the statements of NODE according to PERMUTATION. */
1548 static void
1549 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1550 vec<unsigned> permutation,
1551 hash_set<slp_tree> &visited)
1553 stmt_vec_info stmt_info;
1554 vec<stmt_vec_info> tmp_stmts;
1555 unsigned int i;
1556 slp_tree child;
1558 if (visited.add (node))
1559 return;
1561 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1562 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1564 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1565 tmp_stmts.create (group_size);
1566 tmp_stmts.quick_grow_cleared (group_size);
1568 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1569 tmp_stmts[permutation[i]] = stmt_info;
1571 SLP_TREE_SCALAR_STMTS (node).release ();
1572 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1576 /* Attempt to reorder stmts in a reduction chain so that we don't
1577 require any load permutation. Return true if that was possible,
1578 otherwise return false. */
1580 static bool
1581 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1583 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1584 unsigned int i, j;
1585 unsigned int lidx;
1586 slp_tree node, load;
1588 /* Compare all the permutation sequences to the first one. We know
1589 that at least one load is permuted. */
1590 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1591 if (!node->load_permutation.exists ())
1592 return false;
1593 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1595 if (!load->load_permutation.exists ())
1596 return false;
1597 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1598 if (lidx != node->load_permutation[j])
1599 return false;
1602 /* Check that the loads in the first sequence are different and there
1603 are no gaps between them. */
1604 auto_sbitmap load_index (group_size);
1605 bitmap_clear (load_index);
1606 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1608 if (lidx >= group_size)
1609 return false;
1610 if (bitmap_bit_p (load_index, lidx))
1611 return false;
1613 bitmap_set_bit (load_index, lidx);
1615 for (i = 0; i < group_size; i++)
1616 if (!bitmap_bit_p (load_index, i))
1617 return false;
1619 /* This permutation is valid for reduction. Since the order of the
1620 statements in the nodes is not important unless they are memory
1621 accesses, we can rearrange the statements in all the nodes
1622 according to the order of the loads. */
1623 hash_set<slp_tree> visited;
1624 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1625 node->load_permutation, visited);
1627 /* We are done, no actual permutations need to be generated. */
1628 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1629 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1631 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1632 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1633 /* But we have to keep those permutations that are required because
1634 of handling of gaps. */
1635 if (known_eq (unrolling_factor, 1U)
1636 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1637 && DR_GROUP_GAP (first_stmt_info) == 0))
1638 SLP_TREE_LOAD_PERMUTATION (node).release ();
1639 else
1640 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1641 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1644 return true;
1647 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1649 static void
1650 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1651 hash_set<slp_tree> &visited)
1653 if (visited.add (node))
1654 return;
1656 if (SLP_TREE_CHILDREN (node).length () == 0)
1658 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1659 if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
1660 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
1661 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1662 SLP_INSTANCE_LOADS (inst).safe_push (node);
1664 else
1666 unsigned i;
1667 slp_tree child;
1668 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1669 vect_gather_slp_loads (inst, child, visited);
1673 static void
1674 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1676 hash_set<slp_tree> visited;
1677 vect_gather_slp_loads (inst, node, visited);
1680 /* Check if the required load permutations in the SLP instance
1681 SLP_INSTN are supported. */
1683 static bool
1684 vect_supported_load_permutation_p (slp_instance slp_instn)
1686 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1687 unsigned int i, j, k, next;
1688 slp_tree node;
1690 if (dump_enabled_p ())
1692 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1693 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1694 if (node->load_permutation.exists ())
1695 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1696 dump_printf (MSG_NOTE, "%d ", next);
1697 else
1698 for (k = 0; k < group_size; ++k)
1699 dump_printf (MSG_NOTE, "%d ", k);
1700 dump_printf (MSG_NOTE, "\n");
1703 /* In case of reduction every load permutation is allowed, since the order
1704 of the reduction statements is not important (as opposed to the case of
1705 grouped stores). The only condition we need to check is that all the
1706 load nodes are of the same size and have the same permutation (and then
1707 rearrange all the nodes of the SLP instance according to this
1708 permutation). */
1710 /* Check that all the load nodes are of the same size. */
1711 /* ??? Can't we assert this? */
1712 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1713 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1714 return false;
1716 node = SLP_INSTANCE_TREE (slp_instn);
1717 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1719 /* Reduction (there are no data-refs in the root).
1720 In reduction chain the order of the loads is not important. */
1721 if (!STMT_VINFO_DATA_REF (stmt_info)
1722 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1723 vect_attempt_slp_rearrange_stmts (slp_instn);
1725 /* In basic block vectorization we allow any subchain of an interleaving
1726 chain.
1727 FORNOW: not supported in loop SLP because of realignment compications. */
1728 if (STMT_VINFO_BB_VINFO (stmt_info))
1730 /* Check whether the loads in an instance form a subchain and thus
1731 no permutation is necessary. */
1732 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1734 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1735 continue;
1736 bool subchain_p = true;
1737 stmt_vec_info next_load_info = NULL;
1738 stmt_vec_info load_info;
1739 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1741 if (j != 0
1742 && (next_load_info != load_info
1743 || DR_GROUP_GAP (load_info) != 1))
1745 subchain_p = false;
1746 break;
1748 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1750 if (subchain_p)
1751 SLP_TREE_LOAD_PERMUTATION (node).release ();
1752 else
1754 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1755 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1756 unsigned HOST_WIDE_INT nunits;
1757 unsigned k, maxk = 0;
1758 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1759 if (k > maxk)
1760 maxk = k;
1761 /* In BB vectorization we may not actually use a loaded vector
1762 accessing elements in excess of DR_GROUP_SIZE. */
1763 tree vectype = STMT_VINFO_VECTYPE (group_info);
1764 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1765 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1767 if (dump_enabled_p ())
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "BB vectorization with gaps at the end of "
1770 "a load is not supported\n");
1771 return false;
1774 /* Verify the permutation can be generated. */
1775 vec<tree> tem;
1776 unsigned n_perms;
1777 if (!vect_transform_slp_perm_load (node, tem, NULL,
1778 1, slp_instn, true, &n_perms))
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1782 vect_location,
1783 "unsupported load permutation\n");
1784 return false;
1788 return true;
1791 /* For loop vectorization verify we can generate the permutation. Be
1792 conservative about the vectorization factor, there are permutations
1793 that will use three vector inputs only starting from a specific factor
1794 and the vectorization factor is not yet final.
1795 ??? The SLP instance unrolling factor might not be the maximum one. */
1796 unsigned n_perms;
1797 poly_uint64 test_vf
1798 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1799 LOOP_VINFO_VECT_FACTOR
1800 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1801 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1802 if (node->load_permutation.exists ()
1803 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1804 slp_instn, true, &n_perms))
1805 return false;
1807 return true;
1811 /* Find the last store in SLP INSTANCE. */
1813 stmt_vec_info
1814 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1816 stmt_vec_info last = NULL;
1817 stmt_vec_info stmt_vinfo;
1819 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1821 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1822 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1825 return last;
1828 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1829 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1830 (also containing the first GROUP1_SIZE stmts, since stores are
1831 consecutive), the second containing the remainder.
1832 Return the first stmt in the second group. */
1834 static stmt_vec_info
1835 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1837 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1838 gcc_assert (group1_size > 0);
1839 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1840 gcc_assert (group2_size > 0);
1841 DR_GROUP_SIZE (first_vinfo) = group1_size;
1843 stmt_vec_info stmt_info = first_vinfo;
1844 for (unsigned i = group1_size; i > 1; i--)
1846 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1847 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1849 /* STMT is now the last element of the first group. */
1850 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1851 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1853 DR_GROUP_SIZE (group2) = group2_size;
1854 for (stmt_info = group2; stmt_info;
1855 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1857 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1858 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1861 /* For the second group, the DR_GROUP_GAP is that before the original group,
1862 plus skipping over the first vector. */
1863 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1865 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1866 DR_GROUP_GAP (first_vinfo) += group2_size;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1870 group1_size, group2_size);
1872 return group2;
1875 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1876 statements and a vector of NUNITS elements. */
1878 static poly_uint64
1879 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1881 return exact_div (common_multiple (nunits, group_size), group_size);
1884 /* Analyze an SLP instance starting from a group of grouped stores. Call
1885 vect_build_slp_tree to build a tree of packed stmts if possible.
1886 Return FALSE if it's impossible to SLP any stmt in the loop. */
1888 static bool
1889 vect_analyze_slp_instance (vec_info *vinfo,
1890 stmt_vec_info stmt_info, unsigned max_tree_size)
1892 slp_instance new_instance;
1893 slp_tree node;
1894 unsigned int group_size;
1895 tree vectype, scalar_type = NULL_TREE;
1896 unsigned int i;
1897 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1898 vec<stmt_vec_info> scalar_stmts;
1900 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1902 scalar_type = TREE_TYPE (DR_REF (dr));
1903 vectype = get_vectype_for_scalar_type (scalar_type);
1904 group_size = DR_GROUP_SIZE (stmt_info);
1906 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1908 gcc_assert (is_a <loop_vec_info> (vinfo));
1909 vectype = STMT_VINFO_VECTYPE (stmt_info);
1910 group_size = REDUC_GROUP_SIZE (stmt_info);
1912 else
1914 gcc_assert (is_a <loop_vec_info> (vinfo));
1915 vectype = STMT_VINFO_VECTYPE (stmt_info);
1916 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1919 if (!vectype)
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 "Build SLP failed: unsupported data-type %T\n",
1924 scalar_type);
1926 return false;
1928 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1930 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1931 scalar_stmts.create (group_size);
1932 stmt_vec_info next_info = stmt_info;
1933 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1935 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1936 while (next_info)
1938 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1939 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
1942 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1944 /* Collect the reduction stmts and store them in
1945 SLP_TREE_SCALAR_STMTS. */
1946 while (next_info)
1948 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1949 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
1951 /* Mark the first element of the reduction chain as reduction to properly
1952 transform the node. In the reduction analysis phase only the last
1953 element of the chain is marked as reduction. */
1954 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
1956 else
1958 /* Collect reduction statements. */
1959 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1960 for (i = 0; reductions.iterate (i, &next_info); i++)
1961 scalar_stmts.safe_push (next_info);
1964 /* Build the tree for the SLP instance. */
1965 bool *matches = XALLOCAVEC (bool, group_size);
1966 unsigned npermutes = 0;
1967 scalar_stmts_to_slp_tree_map_t *bst_map
1968 = new scalar_stmts_to_slp_tree_map_t ();
1969 poly_uint64 max_nunits = nunits;
1970 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1971 &max_nunits, matches, &npermutes,
1972 NULL, max_tree_size, bst_map);
1973 /* The map keeps a reference on SLP nodes built, release that. */
1974 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
1975 it != bst_map->end (); ++it)
1976 if ((*it).second)
1977 vect_free_slp_tree ((*it).second, false);
1978 delete bst_map;
1979 if (node != NULL)
1981 /* Calculate the unrolling factor based on the smallest type. */
1982 poly_uint64 unrolling_factor
1983 = calculate_unrolling_factor (max_nunits, group_size);
1985 if (maybe_ne (unrolling_factor, 1U)
1986 && is_a <bb_vec_info> (vinfo))
1988 unsigned HOST_WIDE_INT const_max_nunits;
1989 if (!max_nunits.is_constant (&const_max_nunits)
1990 || const_max_nunits > group_size)
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1994 "Build SLP failed: store group "
1995 "size not a multiple of the vector size "
1996 "in basic block SLP\n");
1997 vect_free_slp_tree (node, false);
1998 return false;
2000 /* Fatal mismatch. */
2001 matches[group_size / const_max_nunits * const_max_nunits] = false;
2002 vect_free_slp_tree (node, false);
2004 else
2006 /* Create a new SLP instance. */
2007 new_instance = XNEW (struct _slp_instance);
2008 SLP_INSTANCE_TREE (new_instance) = node;
2009 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2010 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2011 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2012 vect_gather_slp_loads (new_instance, node);
2014 /* Compute the load permutation. */
2015 slp_tree load_node;
2016 bool loads_permuted = false;
2017 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2019 vec<unsigned> load_permutation;
2020 int j;
2021 stmt_vec_info load_info;
2022 bool this_load_permuted = false;
2023 load_permutation.create (group_size);
2024 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2025 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2026 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2028 int load_place = vect_get_place_in_interleaving_chain
2029 (load_info, first_stmt_info);
2030 gcc_assert (load_place != -1);
2031 if (load_place != j)
2032 this_load_permuted = true;
2033 load_permutation.safe_push (load_place);
2035 if (!this_load_permuted
2036 /* The load requires permutation when unrolling exposes
2037 a gap either because the group is larger than the SLP
2038 group-size or because there is a gap between the groups. */
2039 && (known_eq (unrolling_factor, 1U)
2040 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2041 && DR_GROUP_GAP (first_stmt_info) == 0)))
2043 load_permutation.release ();
2044 continue;
2046 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2047 loads_permuted = true;
2050 if (loads_permuted)
2052 if (!vect_supported_load_permutation_p (new_instance))
2054 if (dump_enabled_p ())
2055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2056 "Build SLP failed: unsupported load "
2057 "permutation %G", stmt_info->stmt);
2058 vect_free_slp_instance (new_instance, false);
2059 return false;
2063 /* If the loads and stores can be handled with load/store-lan
2064 instructions do not generate this SLP instance. */
2065 if (is_a <loop_vec_info> (vinfo)
2066 && loads_permuted
2067 && dr && vect_store_lanes_supported (vectype, group_size, false))
2069 slp_tree load_node;
2070 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2072 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2073 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2074 /* Use SLP for strided accesses (or if we can't load-lanes). */
2075 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2076 || ! vect_load_lanes_supported
2077 (STMT_VINFO_VECTYPE (stmt_vinfo),
2078 DR_GROUP_SIZE (stmt_vinfo), false))
2079 break;
2081 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2085 "Built SLP cancelled: can use "
2086 "load/store-lanes\n");
2087 vect_free_slp_instance (new_instance, false);
2088 return false;
2092 vinfo->slp_instances.safe_push (new_instance);
2094 if (dump_enabled_p ())
2096 dump_printf_loc (MSG_NOTE, vect_location,
2097 "Final SLP tree for instance:\n");
2098 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2101 return true;
2104 else
2106 /* Failed to SLP. */
2107 /* Free the allocated memory. */
2108 scalar_stmts.release ();
2111 /* For basic block SLP, try to break the group up into multiples of the
2112 vector size. */
2113 unsigned HOST_WIDE_INT const_nunits;
2114 if (is_a <bb_vec_info> (vinfo)
2115 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2116 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2117 && nunits.is_constant (&const_nunits))
2119 /* We consider breaking the group only on VF boundaries from the existing
2120 start. */
2121 for (i = 0; i < group_size; i++)
2122 if (!matches[i]) break;
2124 if (i >= const_nunits && i < group_size)
2126 /* Split into two groups at the first vector boundary before i. */
2127 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2128 unsigned group1_size = i & ~(const_nunits - 1);
2130 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2131 group1_size);
2132 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2133 max_tree_size);
2134 /* If the first non-match was in the middle of a vector,
2135 skip the rest of that vector. */
2136 if (group1_size < i)
2138 i = group1_size + const_nunits;
2139 if (i < group_size)
2140 rest = vect_split_slp_store_group (rest, const_nunits);
2142 if (i < group_size)
2143 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2144 return res;
2146 /* Even though the first vector did not all match, we might be able to SLP
2147 (some) of the remainder. FORNOW ignore this possibility. */
2150 return false;
2154 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2155 trees of packed scalar stmts if SLP is possible. */
2157 opt_result
2158 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2160 unsigned int i;
2161 stmt_vec_info first_element;
2163 DUMP_VECT_SCOPE ("vect_analyze_slp");
2165 /* Find SLP sequences starting from groups of grouped stores. */
2166 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2167 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2169 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2171 if (loop_vinfo->reduction_chains.length () > 0)
2173 /* Find SLP sequences starting from reduction chains. */
2174 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2175 if (! vect_analyze_slp_instance (vinfo, first_element,
2176 max_tree_size))
2178 /* Dissolve reduction chain group. */
2179 stmt_vec_info vinfo = first_element;
2180 while (vinfo)
2182 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2183 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2184 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2185 vinfo = next;
2187 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2191 /* Find SLP sequences starting from groups of reductions. */
2192 if (loop_vinfo->reductions.length () > 1)
2193 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2194 max_tree_size);
2197 return opt_result::success ();
2201 /* For each possible SLP instance decide whether to SLP it and calculate overall
2202 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2203 least one instance. */
2205 bool
2206 vect_make_slp_decision (loop_vec_info loop_vinfo)
2208 unsigned int i;
2209 poly_uint64 unrolling_factor = 1;
2210 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2211 slp_instance instance;
2212 int decided_to_slp = 0;
2214 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2216 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2218 /* FORNOW: SLP if you can. */
2219 /* All unroll factors have the form current_vector_size * X for some
2220 rational X, so they must have a common multiple. */
2221 unrolling_factor
2222 = force_common_multiple (unrolling_factor,
2223 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2225 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2226 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2227 loop-based vectorization. Such stmts will be marked as HYBRID. */
2228 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2229 decided_to_slp++;
2232 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2234 if (decided_to_slp && dump_enabled_p ())
2236 dump_printf_loc (MSG_NOTE, vect_location,
2237 "Decided to SLP %d instances. Unrolling factor ",
2238 decided_to_slp);
2239 dump_dec (MSG_NOTE, unrolling_factor);
2240 dump_printf (MSG_NOTE, "\n");
2243 return (decided_to_slp > 0);
2247 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2248 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2250 static void
2251 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2252 hash_map<slp_tree, unsigned> &visited)
2254 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2255 imm_use_iterator imm_iter;
2256 gimple *use_stmt;
2257 stmt_vec_info use_vinfo;
2258 slp_tree child;
2259 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2260 int j;
2262 /* We need to union stype over the incoming graph edges but we still
2263 want to limit recursion to stay O(N+E). */
2264 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
2266 /* Propagate hybrid down the SLP tree. */
2267 if (stype == hybrid)
2269 else if (HYBRID_SLP_STMT (stmt_vinfo))
2270 stype = hybrid;
2271 else if (!only_edge)
2273 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2274 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2275 /* If we get a pattern stmt here we have to use the LHS of the
2276 original stmt for immediate uses. */
2277 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2278 tree def;
2279 if (gimple_code (stmt) == GIMPLE_PHI)
2280 def = gimple_phi_result (stmt);
2281 else
2282 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2283 if (def)
2284 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2286 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2287 if (!use_vinfo)
2288 continue;
2289 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2290 if (!STMT_SLP_TYPE (use_vinfo)
2291 && (STMT_VINFO_RELEVANT (use_vinfo)
2292 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2293 && !(gimple_code (use_stmt) == GIMPLE_PHI
2294 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2298 "def in non-SLP stmt: %G", use_stmt);
2299 stype = hybrid;
2304 if (stype == hybrid
2305 && !HYBRID_SLP_STMT (stmt_vinfo))
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2309 stmt_vinfo->stmt);
2310 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2313 if (!only_edge)
2314 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2315 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2316 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2319 static void
2320 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2322 hash_map<slp_tree, unsigned> visited;
2323 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
2326 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2328 static tree
2329 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2331 walk_stmt_info *wi = (walk_stmt_info *)data;
2332 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2334 if (wi->is_lhs)
2335 return NULL_TREE;
2337 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2338 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2342 def_stmt_info->stmt);
2343 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2346 return NULL_TREE;
2349 static tree
2350 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2351 walk_stmt_info *wi)
2353 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2354 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2355 /* If the stmt is in a SLP instance then this isn't a reason
2356 to mark use definitions in other SLP instances as hybrid. */
2357 if (! STMT_SLP_TYPE (use_vinfo)
2358 && (STMT_VINFO_RELEVANT (use_vinfo)
2359 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2360 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2361 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2363 else
2364 *handled = true;
2365 return NULL_TREE;
2368 /* Find stmts that must be both vectorized and SLPed. */
2370 void
2371 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2373 unsigned int i;
2374 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2375 slp_instance instance;
2377 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2379 /* First walk all pattern stmt in the loop and mark defs of uses as
2380 hybrid because immediate uses in them are not recorded. */
2381 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2383 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2384 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2385 gsi_next (&gsi))
2387 gimple *stmt = gsi_stmt (gsi);
2388 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2389 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2391 walk_stmt_info wi;
2392 memset (&wi, 0, sizeof (wi));
2393 wi.info = loop_vinfo;
2394 gimple_stmt_iterator gsi2
2395 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2396 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2397 vect_detect_hybrid_slp_1, &wi);
2398 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2399 vect_detect_hybrid_slp_2,
2400 vect_detect_hybrid_slp_1, &wi);
2405 /* Then walk the SLP instance trees marking stmts with uses in
2406 non-SLP stmts as hybrid, also propagating hybrid down the
2407 SLP tree, collecting the above info on-the-fly. */
2408 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2410 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2411 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2412 i, pure_slp);
2417 /* Initialize a bb_vec_info struct for the statements between
2418 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2420 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2421 gimple_stmt_iterator region_end_in,
2422 vec_info_shared *shared)
2423 : vec_info (vec_info::bb, init_cost (NULL), shared),
2424 bb (gsi_bb (region_begin_in)),
2425 region_begin (region_begin_in),
2426 region_end (region_end_in)
2428 gimple_stmt_iterator gsi;
2430 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2431 gsi_next (&gsi))
2433 gimple *stmt = gsi_stmt (gsi);
2434 gimple_set_uid (stmt, 0);
2435 add_stmt (stmt);
2438 bb->aux = this;
2442 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2443 stmts in the basic block. */
2445 _bb_vec_info::~_bb_vec_info ()
2447 for (gimple_stmt_iterator si = region_begin;
2448 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2449 /* Reset region marker. */
2450 gimple_set_uid (gsi_stmt (si), -1);
2452 bb->aux = NULL;
2455 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2456 given then that child nodes have already been processed, and that
2457 their def types currently match their SLP node's def type. */
2459 static bool
2460 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2461 slp_instance node_instance,
2462 stmt_vector_for_cost *cost_vec)
2464 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2465 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2467 /* For BB vectorization vector types are assigned here.
2468 Memory accesses already got their vector type assigned
2469 in vect_analyze_data_refs. */
2470 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2471 if (bb_vinfo
2472 && ! STMT_VINFO_DATA_REF (stmt_info))
2474 tree vectype, nunits_vectype;
2475 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2476 &nunits_vectype))
2477 /* We checked this when building the node. */
2478 gcc_unreachable ();
2479 if (vectype == boolean_type_node)
2481 vectype = vect_get_mask_type_for_stmt (stmt_info);
2482 if (!vectype)
2483 /* vect_get_mask_type_for_stmt has already explained the
2484 failure. */
2485 return false;
2488 stmt_vec_info sstmt_info;
2489 unsigned int i;
2490 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2491 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2494 /* Calculate the number of vector statements to be created for the
2495 scalar stmts in this node. For SLP reductions it is equal to the
2496 number of vector statements in the children (which has already been
2497 calculated by the recursive call). Otherwise it is the number of
2498 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2499 VF divided by the number of elements in a vector. */
2500 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2501 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2502 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2503 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2504 else
2506 poly_uint64 vf;
2507 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2508 vf = loop_vinfo->vectorization_factor;
2509 else
2510 vf = 1;
2511 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2512 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2513 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2514 = vect_get_num_vectors (vf * group_size, vectype);
2517 bool dummy;
2518 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2521 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2522 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2524 Return true if the operations are supported. */
2526 static bool
2527 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2528 slp_instance node_instance,
2529 scalar_stmts_to_slp_tree_map_t *visited,
2530 scalar_stmts_to_slp_tree_map_t *lvisited,
2531 stmt_vector_for_cost *cost_vec)
2533 int i, j;
2534 slp_tree child;
2536 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2537 return true;
2539 /* If we already analyzed the exact same set of scalar stmts we're done.
2540 We share the generated vector stmts for those. */
2541 slp_tree *leader;
2542 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2543 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2545 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2546 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2547 return true;
2550 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2551 doesn't result in any issue since we throw away the lvisited set
2552 when we fail. */
2553 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2555 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2556 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2557 visited, lvisited, cost_vec))
2558 return false;
2560 /* ??? We have to catch the case late where two first scalar stmts appear
2561 in multiple SLP children with different def type and fail. Remember
2562 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2563 match it when that is vect_internal_def. */
2564 auto_vec<vect_def_type, 4> dt;
2565 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2566 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2567 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2569 /* Push SLP node def-type to stmt operands. */
2570 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2571 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2572 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2573 = SLP_TREE_DEF_TYPE (child);
2575 /* Check everything worked out. */
2576 bool res = true;
2577 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2578 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2580 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2581 != SLP_TREE_DEF_TYPE (child))
2582 res = false;
2584 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
2585 res = false;
2586 if (!res && dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "not vectorized: same operand with different "
2589 "def type in stmt.\n");
2591 if (res)
2592 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2593 cost_vec);
2595 /* Restore def-types. */
2596 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2597 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2599 return res;
2603 /* Analyze statements in SLP instances of VINFO. Return true if the
2604 operations are supported. */
2606 bool
2607 vect_slp_analyze_operations (vec_info *vinfo)
2609 slp_instance instance;
2610 int i;
2612 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2614 scalar_stmts_to_slp_tree_map_t *visited
2615 = new scalar_stmts_to_slp_tree_map_t ();
2616 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2618 scalar_stmts_to_slp_tree_map_t lvisited;
2619 stmt_vector_for_cost cost_vec;
2620 cost_vec.create (2);
2621 if (!vect_slp_analyze_node_operations (vinfo,
2622 SLP_INSTANCE_TREE (instance),
2623 instance, visited, &lvisited,
2624 &cost_vec))
2626 slp_tree node = SLP_INSTANCE_TREE (instance);
2627 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2628 if (dump_enabled_p ())
2629 dump_printf_loc (MSG_NOTE, vect_location,
2630 "removing SLP instance operations starting from: %G",
2631 stmt_info->stmt);
2632 vect_free_slp_instance (instance, false);
2633 vinfo->slp_instances.ordered_remove (i);
2634 cost_vec.release ();
2636 else
2638 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2639 x != lvisited.end(); ++x)
2640 visited->put ((*x).first.copy (), (*x).second);
2641 i++;
2643 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2644 cost_vec.release ();
2647 delete visited;
2649 return !vinfo->slp_instances.is_empty ();
2653 /* Compute the scalar cost of the SLP node NODE and its children
2654 and return it. Do not account defs that are marked in LIFE and
2655 update LIFE according to uses of NODE. */
2657 static void
2658 vect_bb_slp_scalar_cost (basic_block bb,
2659 slp_tree node, vec<bool, va_heap> *life,
2660 stmt_vector_for_cost *cost_vec,
2661 hash_set<slp_tree> &visited)
2663 unsigned i;
2664 stmt_vec_info stmt_info;
2665 slp_tree child;
2667 if (visited.add (node))
2668 return;
2670 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2672 gimple *stmt = stmt_info->stmt;
2673 vec_info *vinfo = stmt_info->vinfo;
2674 ssa_op_iter op_iter;
2675 def_operand_p def_p;
2677 if ((*life)[i])
2678 continue;
2680 /* If there is a non-vectorized use of the defs then the scalar
2681 stmt is kept live in which case we do not account it or any
2682 required defs in the SLP children in the scalar cost. This
2683 way we make the vectorization more costly when compared to
2684 the scalar cost. */
2685 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2687 imm_use_iterator use_iter;
2688 gimple *use_stmt;
2689 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2690 if (!is_gimple_debug (use_stmt))
2692 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2693 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2695 (*life)[i] = true;
2696 BREAK_FROM_IMM_USE_STMT (use_iter);
2700 if ((*life)[i])
2701 continue;
2703 /* Count scalar stmts only once. */
2704 if (gimple_visited_p (stmt))
2705 continue;
2706 gimple_set_visited (stmt, true);
2708 vect_cost_for_stmt kind;
2709 if (STMT_VINFO_DATA_REF (stmt_info))
2711 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2712 kind = scalar_load;
2713 else
2714 kind = scalar_store;
2716 else
2717 kind = scalar_stmt;
2718 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2721 auto_vec<bool, 20> subtree_life;
2722 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2724 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2726 /* Do not directly pass LIFE to the recursive call, copy it to
2727 confine changes in the callee to the current child/subtree. */
2728 subtree_life.safe_splice (*life);
2729 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2730 visited);
2731 subtree_life.truncate (0);
2736 static void
2737 vect_bb_slp_scalar_cost (basic_block bb,
2738 slp_tree node, vec<bool, va_heap> *life,
2739 stmt_vector_for_cost *cost_vec)
2741 hash_set<slp_tree> visited;
2742 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2745 /* Check if vectorization of the basic block is profitable. */
2747 static bool
2748 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2750 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2751 slp_instance instance;
2752 int i;
2753 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2754 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2756 /* Calculate scalar cost. */
2757 stmt_vector_for_cost scalar_costs;
2758 scalar_costs.create (0);
2759 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2761 auto_vec<bool, 20> life;
2762 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2763 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2764 SLP_INSTANCE_TREE (instance),
2765 &life, &scalar_costs);
2767 void *target_cost_data = init_cost (NULL);
2768 add_stmt_costs (target_cost_data, &scalar_costs);
2769 scalar_costs.release ();
2770 unsigned dummy;
2771 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2772 destroy_cost_data (target_cost_data);
2774 /* Unset visited flag. */
2775 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2776 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2777 gimple_set_visited (gsi_stmt (gsi), false);
2779 /* Complete the target-specific cost calculation. */
2780 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2781 &vec_inside_cost, &vec_epilogue_cost);
2783 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2785 if (dump_enabled_p ())
2787 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2788 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2789 vec_inside_cost);
2790 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2791 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2792 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2795 /* Vectorization is profitable if its cost is more than the cost of scalar
2796 version. Note that we err on the vector side for equal cost because
2797 the cost estimate is otherwise quite pessimistic (constant uses are
2798 free on the scalar side but cost a load on the vector side for
2799 example). */
2800 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2801 return false;
2803 return true;
2806 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2807 if so and sets fatal to true if failure is independent of
2808 current_vector_size. */
2810 static bb_vec_info
2811 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2812 gimple_stmt_iterator region_end,
2813 vec<data_reference_p> datarefs, int n_stmts,
2814 bool &fatal, vec_info_shared *shared)
2816 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2818 bb_vec_info bb_vinfo;
2819 slp_instance instance;
2820 int i;
2821 poly_uint64 min_vf = 2;
2823 /* The first group of checks is independent of the vector size. */
2824 fatal = true;
2826 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2830 "not vectorized: too many instructions in "
2831 "basic block.\n");
2832 free_data_refs (datarefs);
2833 return NULL;
2836 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2837 if (!bb_vinfo)
2838 return NULL;
2840 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2841 bb_vinfo->shared->save_datarefs ();
2843 /* Analyze the data references. */
2845 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2847 if (dump_enabled_p ())
2848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2849 "not vectorized: unhandled data-ref in basic "
2850 "block.\n");
2852 delete bb_vinfo;
2853 return NULL;
2856 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2858 if (dump_enabled_p ())
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "not vectorized: not enough data-refs in "
2861 "basic block.\n");
2863 delete bb_vinfo;
2864 return NULL;
2867 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2871 "not vectorized: unhandled data access in "
2872 "basic block.\n");
2874 delete bb_vinfo;
2875 return NULL;
2878 /* If there are no grouped stores in the region there is no need
2879 to continue with pattern recog as vect_analyze_slp will fail
2880 anyway. */
2881 if (bb_vinfo->grouped_stores.is_empty ())
2883 if (dump_enabled_p ())
2884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2885 "not vectorized: no grouped stores in "
2886 "basic block.\n");
2888 delete bb_vinfo;
2889 return NULL;
2892 /* While the rest of the analysis below depends on it in some way. */
2893 fatal = false;
2895 vect_pattern_recog (bb_vinfo);
2897 /* Check the SLP opportunities in the basic block, analyze and build SLP
2898 trees. */
2899 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2901 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2904 "Failed to SLP the basic block.\n");
2905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2906 "not vectorized: failed to find SLP opportunities "
2907 "in basic block.\n");
2910 delete bb_vinfo;
2911 return NULL;
2914 vect_record_base_alignments (bb_vinfo);
2916 /* Analyze and verify the alignment of data references and the
2917 dependence in the SLP instances. */
2918 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2920 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2921 || ! vect_slp_analyze_instance_dependence (instance))
2923 slp_tree node = SLP_INSTANCE_TREE (instance);
2924 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2925 if (dump_enabled_p ())
2926 dump_printf_loc (MSG_NOTE, vect_location,
2927 "removing SLP instance operations starting from: %G",
2928 stmt_info->stmt);
2929 vect_free_slp_instance (instance, false);
2930 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2931 continue;
2934 /* Mark all the statements that we want to vectorize as pure SLP and
2935 relevant. */
2936 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2937 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2939 i++;
2941 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2943 delete bb_vinfo;
2944 return NULL;
2947 if (!vect_slp_analyze_operations (bb_vinfo))
2949 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2951 "not vectorized: bad operation in basic block.\n");
2953 delete bb_vinfo;
2954 return NULL;
2957 /* Cost model: check if the vectorization is worthwhile. */
2958 if (!unlimited_cost_model (NULL)
2959 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2961 if (dump_enabled_p ())
2962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2963 "not vectorized: vectorization is not "
2964 "profitable.\n");
2966 delete bb_vinfo;
2967 return NULL;
2970 if (dump_enabled_p ())
2971 dump_printf_loc (MSG_NOTE, vect_location,
2972 "Basic block will be vectorized using SLP\n");
2974 return bb_vinfo;
2978 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
2979 true if anything in the basic-block was vectorized. */
2981 bool
2982 vect_slp_bb (basic_block bb)
2984 bb_vec_info bb_vinfo;
2985 gimple_stmt_iterator gsi;
2986 bool any_vectorized = false;
2987 auto_vector_sizes vector_sizes;
2989 /* Autodetect first vector size we try. */
2990 current_vector_size = 0;
2991 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2992 unsigned int next_size = 0;
2994 gsi = gsi_start_bb (bb);
2996 poly_uint64 autodetected_vector_size = 0;
2997 while (1)
2999 if (gsi_end_p (gsi))
3000 break;
3002 gimple_stmt_iterator region_begin = gsi;
3003 vec<data_reference_p> datarefs = vNULL;
3004 int insns = 0;
3006 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3008 gimple *stmt = gsi_stmt (gsi);
3009 if (is_gimple_debug (stmt))
3010 continue;
3011 insns++;
3013 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3014 vect_location = stmt;
3016 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3017 break;
3020 /* Skip leading unhandled stmts. */
3021 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3023 gsi_next (&gsi);
3024 continue;
3027 gimple_stmt_iterator region_end = gsi;
3029 bool vectorized = false;
3030 bool fatal = false;
3031 vec_info_shared shared;
3032 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3033 datarefs, insns, fatal, &shared);
3034 if (bb_vinfo
3035 && dbg_cnt (vect_slp))
3037 if (dump_enabled_p ())
3038 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3040 bb_vinfo->shared->check_datarefs ();
3041 vect_schedule_slp (bb_vinfo);
3043 unsigned HOST_WIDE_INT bytes;
3044 if (dump_enabled_p ())
3046 if (current_vector_size.is_constant (&bytes))
3047 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3048 "basic block part vectorized using %wu byte "
3049 "vectors\n", bytes);
3050 else
3051 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3052 "basic block part vectorized using variable "
3053 "length vectors\n");
3056 vectorized = true;
3058 delete bb_vinfo;
3060 any_vectorized |= vectorized;
3062 if (next_size == 0)
3063 autodetected_vector_size = current_vector_size;
3065 if (next_size < vector_sizes.length ()
3066 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3067 next_size += 1;
3069 if (vectorized
3070 || next_size == vector_sizes.length ()
3071 || known_eq (current_vector_size, 0U)
3072 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3073 vector sizes will fail do not bother iterating. */
3074 || fatal)
3076 if (gsi_end_p (region_end))
3077 break;
3079 /* Skip the unhandled stmt. */
3080 gsi_next (&gsi);
3082 /* And reset vector sizes. */
3083 current_vector_size = 0;
3084 next_size = 0;
3086 else
3088 /* Try the next biggest vector size. */
3089 current_vector_size = vector_sizes[next_size++];
3090 if (dump_enabled_p ())
3092 dump_printf_loc (MSG_NOTE, vect_location,
3093 "***** Re-trying analysis with "
3094 "vector size ");
3095 dump_dec (MSG_NOTE, current_vector_size);
3096 dump_printf (MSG_NOTE, "\n");
3099 /* Start over. */
3100 gsi = region_begin;
3104 return any_vectorized;
3108 /* Return 1 if vector type of boolean constant which is OPNUM
3109 operand in statement STMT_VINFO is a boolean vector. */
3111 static bool
3112 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
3114 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3115 tree op, vectype;
3116 enum vect_def_type dt;
3118 /* For comparison and COND_EXPR type is chosen depending
3119 on the other comparison operand. */
3120 if (TREE_CODE_CLASS (code) == tcc_comparison)
3122 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3123 if (opnum)
3124 op = gimple_assign_rhs1 (stmt);
3125 else
3126 op = gimple_assign_rhs2 (stmt);
3128 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3129 gcc_unreachable ();
3131 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3134 if (code == COND_EXPR)
3136 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3137 tree cond = gimple_assign_rhs1 (stmt);
3139 if (TREE_CODE (cond) == SSA_NAME)
3140 op = cond;
3141 else if (opnum)
3142 op = TREE_OPERAND (cond, 1);
3143 else
3144 op = TREE_OPERAND (cond, 0);
3146 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3147 gcc_unreachable ();
3149 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3152 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3155 /* Build a variable-length vector in which the elements in ELTS are repeated
3156 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3157 RESULTS and add any new instructions to SEQ.
3159 The approach we use is:
3161 (1) Find a vector mode VM with integer elements of mode IM.
3163 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3164 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3165 from small vectors to IM.
3167 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3169 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3170 correct byte contents.
3172 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3174 We try to find the largest IM for which this sequence works, in order
3175 to cut down on the number of interleaves. */
3177 void
3178 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3179 unsigned int nresults, vec<tree> &results)
3181 unsigned int nelts = elts.length ();
3182 tree element_type = TREE_TYPE (vector_type);
3184 /* (1) Find a vector mode VM with integer elements of mode IM. */
3185 unsigned int nvectors = 1;
3186 tree new_vector_type;
3187 tree permutes[2];
3188 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3189 &nvectors, &new_vector_type,
3190 permutes))
3191 gcc_unreachable ();
3193 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3194 unsigned int partial_nelts = nelts / nvectors;
3195 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3197 tree_vector_builder partial_elts;
3198 auto_vec<tree, 32> pieces (nvectors * 2);
3199 pieces.quick_grow (nvectors * 2);
3200 for (unsigned int i = 0; i < nvectors; ++i)
3202 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3203 ELTS' has mode IM. */
3204 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3205 for (unsigned int j = 0; j < partial_nelts; ++j)
3206 partial_elts.quick_push (elts[i * partial_nelts + j]);
3207 tree t = gimple_build_vector (seq, &partial_elts);
3208 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3209 TREE_TYPE (new_vector_type), t);
3211 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3212 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3215 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3216 correct byte contents.
3218 We need to repeat the following operation log2(nvectors) times:
3220 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3221 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3223 However, if each input repeats every N elements and the VF is
3224 a multiple of N * 2, the HI result is the same as the LO. */
3225 unsigned int in_start = 0;
3226 unsigned int out_start = nvectors;
3227 unsigned int hi_start = nvectors / 2;
3228 /* A bound on the number of outputs needed to produce NRESULTS results
3229 in the final iteration. */
3230 unsigned int noutputs_bound = nvectors * nresults;
3231 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3233 noutputs_bound /= 2;
3234 unsigned int limit = MIN (noutputs_bound, nvectors);
3235 for (unsigned int i = 0; i < limit; ++i)
3237 if ((i & 1) != 0
3238 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3239 2 * in_repeat))
3241 pieces[out_start + i] = pieces[out_start + i - 1];
3242 continue;
3245 tree output = make_ssa_name (new_vector_type);
3246 tree input1 = pieces[in_start + (i / 2)];
3247 tree input2 = pieces[in_start + (i / 2) + hi_start];
3248 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3249 input1, input2,
3250 permutes[i & 1]);
3251 gimple_seq_add_stmt (seq, stmt);
3252 pieces[out_start + i] = output;
3254 std::swap (in_start, out_start);
3257 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3258 results.reserve (nresults);
3259 for (unsigned int i = 0; i < nresults; ++i)
3260 if (i < nvectors)
3261 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3262 pieces[in_start + i]));
3263 else
3264 results.quick_push (results[i - nvectors]);
3268 /* For constant and loop invariant defs of SLP_NODE this function returns
3269 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3270 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3271 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3272 REDUC_INDEX is the index of the reduction operand in the statements, unless
3273 it is -1. */
3275 static void
3276 vect_get_constant_vectors (tree op, slp_tree slp_node,
3277 vec<tree> *vec_oprnds,
3278 unsigned int op_num, unsigned int number_of_vectors)
3280 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3281 stmt_vec_info stmt_vinfo = stmts[0];
3282 gimple *stmt = stmt_vinfo->stmt;
3283 unsigned HOST_WIDE_INT nunits;
3284 tree vec_cst;
3285 unsigned j, number_of_places_left_in_vector;
3286 tree vector_type;
3287 tree vop;
3288 int group_size = stmts.length ();
3289 unsigned int vec_num, i;
3290 unsigned number_of_copies = 1;
3291 vec<tree> voprnds;
3292 voprnds.create (number_of_vectors);
3293 bool constant_p, is_store;
3294 tree neutral_op = NULL;
3295 enum tree_code code = gimple_expr_code (stmt);
3296 gimple_seq ctor_seq = NULL;
3297 auto_vec<tree, 16> permute_results;
3299 /* Check if vector type is a boolean vector. */
3300 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3301 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3302 vector_type
3303 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3304 else
3305 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3307 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3309 is_store = true;
3310 op = gimple_assign_rhs1 (stmt);
3312 else
3313 is_store = false;
3315 gcc_assert (op);
3317 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3318 created vectors. It is greater than 1 if unrolling is performed.
3320 For example, we have two scalar operands, s1 and s2 (e.g., group of
3321 strided accesses of size two), while NUNITS is four (i.e., four scalars
3322 of this type can be packed in a vector). The output vector will contain
3323 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3324 will be 2).
3326 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3327 containing the operands.
3329 For example, NUNITS is four as before, and the group size is 8
3330 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3331 {s5, s6, s7, s8}. */
3333 /* When using duplicate_and_interleave, we just need one element for
3334 each scalar statement. */
3335 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3336 nunits = group_size;
3338 number_of_copies = nunits * number_of_vectors / group_size;
3340 number_of_places_left_in_vector = nunits;
3341 constant_p = true;
3342 tree_vector_builder elts (vector_type, nunits, 1);
3343 elts.quick_grow (nunits);
3344 bool place_after_defs = false;
3345 for (j = 0; j < number_of_copies; j++)
3347 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3349 stmt = stmt_vinfo->stmt;
3350 if (is_store)
3351 op = gimple_assign_rhs1 (stmt);
3352 else
3354 switch (code)
3356 case COND_EXPR:
3358 tree cond = gimple_assign_rhs1 (stmt);
3359 if (TREE_CODE (cond) == SSA_NAME)
3360 op = gimple_op (stmt, op_num + 1);
3361 else if (op_num == 0 || op_num == 1)
3362 op = TREE_OPERAND (cond, op_num);
3363 else
3365 if (op_num == 2)
3366 op = gimple_assign_rhs2 (stmt);
3367 else
3368 op = gimple_assign_rhs3 (stmt);
3371 break;
3373 case CALL_EXPR:
3374 op = gimple_call_arg (stmt, op_num);
3375 break;
3377 case LSHIFT_EXPR:
3378 case RSHIFT_EXPR:
3379 case LROTATE_EXPR:
3380 case RROTATE_EXPR:
3381 op = gimple_op (stmt, op_num + 1);
3382 /* Unlike the other binary operators, shifts/rotates have
3383 the shift count being int, instead of the same type as
3384 the lhs, so make sure the scalar is the right type if
3385 we are dealing with vectors of
3386 long long/long/short/char. */
3387 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3388 op = fold_convert (TREE_TYPE (vector_type), op);
3389 break;
3391 default:
3392 op = gimple_op (stmt, op_num + 1);
3393 break;
3397 /* Create 'vect_ = {op0,op1,...,opn}'. */
3398 number_of_places_left_in_vector--;
3399 tree orig_op = op;
3400 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3402 if (CONSTANT_CLASS_P (op))
3404 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3406 /* Can't use VIEW_CONVERT_EXPR for booleans because
3407 of possibly different sizes of scalar value and
3408 vector element. */
3409 if (integer_zerop (op))
3410 op = build_int_cst (TREE_TYPE (vector_type), 0);
3411 else if (integer_onep (op))
3412 op = build_all_ones_cst (TREE_TYPE (vector_type));
3413 else
3414 gcc_unreachable ();
3416 else
3417 op = fold_unary (VIEW_CONVERT_EXPR,
3418 TREE_TYPE (vector_type), op);
3419 gcc_assert (op && CONSTANT_CLASS_P (op));
3421 else
3423 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3424 gimple *init_stmt;
3425 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3427 tree true_val
3428 = build_all_ones_cst (TREE_TYPE (vector_type));
3429 tree false_val
3430 = build_zero_cst (TREE_TYPE (vector_type));
3431 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3432 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3433 op, true_val,
3434 false_val);
3436 else
3438 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3439 op);
3440 init_stmt
3441 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3442 op);
3444 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3445 op = new_temp;
3448 elts[number_of_places_left_in_vector] = op;
3449 if (!CONSTANT_CLASS_P (op))
3450 constant_p = false;
3451 if (TREE_CODE (orig_op) == SSA_NAME
3452 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3453 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3454 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3455 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3456 place_after_defs = true;
3458 if (number_of_places_left_in_vector == 0)
3460 if (constant_p
3461 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3462 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3463 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3464 else
3466 if (vec_oprnds->is_empty ())
3467 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3468 number_of_vectors,
3469 permute_results);
3470 vec_cst = permute_results[number_of_vectors - j - 1];
3472 tree init;
3473 gimple_stmt_iterator gsi;
3474 if (place_after_defs)
3476 stmt_vec_info last_stmt_info
3477 = vect_find_last_scalar_stmt_in_slp (slp_node);
3478 gsi = gsi_for_stmt (last_stmt_info->stmt);
3479 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3480 &gsi);
3482 else
3483 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3484 NULL);
3485 if (ctor_seq != NULL)
3487 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3488 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3489 ctor_seq = NULL;
3491 voprnds.quick_push (init);
3492 place_after_defs = false;
3493 number_of_places_left_in_vector = nunits;
3494 constant_p = true;
3495 elts.new_vector (vector_type, nunits, 1);
3496 elts.quick_grow (nunits);
3501 /* Since the vectors are created in the reverse order, we should invert
3502 them. */
3503 vec_num = voprnds.length ();
3504 for (j = vec_num; j != 0; j--)
3506 vop = voprnds[j - 1];
3507 vec_oprnds->quick_push (vop);
3510 voprnds.release ();
3512 /* In case that VF is greater than the unrolling factor needed for the SLP
3513 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3514 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3515 to replicate the vectors. */
3516 while (number_of_vectors > vec_oprnds->length ())
3518 tree neutral_vec = NULL;
3520 if (neutral_op)
3522 if (!neutral_vec)
3523 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3525 vec_oprnds->quick_push (neutral_vec);
3527 else
3529 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3530 vec_oprnds->quick_push (vop);
3536 /* Get vectorized definitions from SLP_NODE that contains corresponding
3537 vectorized def-stmts. */
3539 static void
3540 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3542 tree vec_oprnd;
3543 stmt_vec_info vec_def_stmt_info;
3544 unsigned int i;
3546 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3548 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3550 gcc_assert (vec_def_stmt_info);
3551 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3552 vec_oprnd = gimple_phi_result (vec_def_phi);
3553 else
3554 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3555 vec_oprnds->quick_push (vec_oprnd);
3560 /* Get vectorized definitions for SLP_NODE.
3561 If the scalar definitions are loop invariants or constants, collect them and
3562 call vect_get_constant_vectors() to create vector stmts.
3563 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3564 must be stored in the corresponding child of SLP_NODE, and we call
3565 vect_get_slp_vect_defs () to retrieve them. */
3567 void
3568 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3569 vec<vec<tree> > *vec_oprnds)
3571 int number_of_vects = 0, i;
3572 unsigned int child_index = 0;
3573 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3574 slp_tree child = NULL;
3575 vec<tree> vec_defs;
3576 tree oprnd;
3577 bool vectorized_defs;
3579 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3580 FOR_EACH_VEC_ELT (ops, i, oprnd)
3582 /* For each operand we check if it has vectorized definitions in a child
3583 node or we need to create them (for invariants and constants). We
3584 check if the LHS of the first stmt of the next child matches OPRND.
3585 If it does, we found the correct child. Otherwise, we call
3586 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3587 to check this child node for the next operand. */
3588 vectorized_defs = false;
3589 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3591 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3593 /* We have to check both pattern and original def, if available. */
3594 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3596 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3597 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3598 tree first_def_op;
3600 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3601 first_def_op = gimple_phi_result (first_def);
3602 else
3603 first_def_op = gimple_get_lhs (first_def_info->stmt);
3604 if (operand_equal_p (oprnd, first_def_op, 0)
3605 || (related
3606 && operand_equal_p (oprnd,
3607 gimple_get_lhs (related->stmt), 0)))
3609 /* The number of vector defs is determined by the number of
3610 vector statements in the node from which we get those
3611 statements. */
3612 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3613 vectorized_defs = true;
3614 child_index++;
3617 else
3618 child_index++;
3621 if (!vectorized_defs)
3623 if (i == 0)
3625 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3626 /* Number of vector stmts was calculated according to LHS in
3627 vect_schedule_slp_instance (), fix it by replacing LHS with
3628 RHS, if necessary. See vect_get_smallest_scalar_type () for
3629 details. */
3630 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3631 &rhs_size_unit);
3632 if (rhs_size_unit != lhs_size_unit)
3634 number_of_vects *= rhs_size_unit;
3635 number_of_vects /= lhs_size_unit;
3640 /* Allocate memory for vectorized defs. */
3641 vec_defs = vNULL;
3642 vec_defs.create (number_of_vects);
3644 /* For reduction defs we call vect_get_constant_vectors (), since we are
3645 looking for initial loop invariant values. */
3646 if (vectorized_defs)
3647 /* The defs are already vectorized. */
3648 vect_get_slp_vect_defs (child, &vec_defs);
3649 else
3650 /* Build vectors from scalar defs. */
3651 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3652 number_of_vects);
3654 vec_oprnds->quick_push (vec_defs);
3658 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3659 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3660 permute statements for the SLP node NODE of the SLP instance
3661 SLP_NODE_INSTANCE. */
3663 bool
3664 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3665 gimple_stmt_iterator *gsi, poly_uint64 vf,
3666 slp_instance slp_node_instance, bool analyze_only,
3667 unsigned *n_perms)
3669 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3670 vec_info *vinfo = stmt_info->vinfo;
3671 int vec_index = 0;
3672 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3673 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3674 unsigned int mask_element;
3675 machine_mode mode;
3677 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3678 return false;
3680 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3682 mode = TYPE_MODE (vectype);
3683 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3685 /* Initialize the vect stmts of NODE to properly insert the generated
3686 stmts later. */
3687 if (! analyze_only)
3688 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3689 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3690 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3692 /* Generate permutation masks for every NODE. Number of masks for each NODE
3693 is equal to GROUP_SIZE.
3694 E.g., we have a group of three nodes with three loads from the same
3695 location in each node, and the vector size is 4. I.e., we have a
3696 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3697 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3698 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3701 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3702 The last mask is illegal since we assume two operands for permute
3703 operation, and the mask element values can't be outside that range.
3704 Hence, the last mask must be converted into {2,5,5,5}.
3705 For the first two permutations we need the first and the second input
3706 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3707 we need the second and the third vectors: {b1,c1,a2,b2} and
3708 {c2,a3,b3,c3}. */
3710 int vect_stmts_counter = 0;
3711 unsigned int index = 0;
3712 int first_vec_index = -1;
3713 int second_vec_index = -1;
3714 bool noop_p = true;
3715 *n_perms = 0;
3717 vec_perm_builder mask;
3718 unsigned int nelts_to_build;
3719 unsigned int nvectors_per_build;
3720 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3721 && multiple_p (nunits, group_size));
3722 if (repeating_p)
3724 /* A single vector contains a whole number of copies of the node, so:
3725 (a) all permutes can use the same mask; and
3726 (b) the permutes only need a single vector input. */
3727 mask.new_vector (nunits, group_size, 3);
3728 nelts_to_build = mask.encoded_nelts ();
3729 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3731 else
3733 /* We need to construct a separate mask for each vector statement. */
3734 unsigned HOST_WIDE_INT const_nunits, const_vf;
3735 if (!nunits.is_constant (&const_nunits)
3736 || !vf.is_constant (&const_vf))
3737 return false;
3738 mask.new_vector (const_nunits, const_nunits, 1);
3739 nelts_to_build = const_vf * group_size;
3740 nvectors_per_build = 1;
3743 unsigned int count = mask.encoded_nelts ();
3744 mask.quick_grow (count);
3745 vec_perm_indices indices;
3747 for (unsigned int j = 0; j < nelts_to_build; j++)
3749 unsigned int iter_num = j / group_size;
3750 unsigned int stmt_num = j % group_size;
3751 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3752 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3753 if (repeating_p)
3755 first_vec_index = 0;
3756 mask_element = i;
3758 else
3760 /* Enforced before the loop when !repeating_p. */
3761 unsigned int const_nunits = nunits.to_constant ();
3762 vec_index = i / const_nunits;
3763 mask_element = i % const_nunits;
3764 if (vec_index == first_vec_index
3765 || first_vec_index == -1)
3767 first_vec_index = vec_index;
3769 else if (vec_index == second_vec_index
3770 || second_vec_index == -1)
3772 second_vec_index = vec_index;
3773 mask_element += const_nunits;
3775 else
3777 if (dump_enabled_p ())
3778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3779 "permutation requires at "
3780 "least three vectors %G",
3781 stmt_info->stmt);
3782 gcc_assert (analyze_only);
3783 return false;
3786 gcc_assert (mask_element < 2 * const_nunits);
3789 if (mask_element != index)
3790 noop_p = false;
3791 mask[index++] = mask_element;
3793 if (index == count && !noop_p)
3795 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3796 if (!can_vec_perm_const_p (mode, indices))
3798 if (dump_enabled_p ())
3800 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3801 vect_location,
3802 "unsupported vect permute { ");
3803 for (i = 0; i < count; ++i)
3805 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3806 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3808 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3810 gcc_assert (analyze_only);
3811 return false;
3814 ++*n_perms;
3817 if (index == count)
3819 if (!analyze_only)
3821 tree mask_vec = NULL_TREE;
3823 if (! noop_p)
3824 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3826 if (second_vec_index == -1)
3827 second_vec_index = first_vec_index;
3829 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3831 /* Generate the permute statement if necessary. */
3832 tree first_vec = dr_chain[first_vec_index + ri];
3833 tree second_vec = dr_chain[second_vec_index + ri];
3834 stmt_vec_info perm_stmt_info;
3835 if (! noop_p)
3837 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3838 tree perm_dest
3839 = vect_create_destination_var (gimple_assign_lhs (stmt),
3840 vectype);
3841 perm_dest = make_ssa_name (perm_dest);
3842 gassign *perm_stmt
3843 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3844 first_vec, second_vec,
3845 mask_vec);
3846 perm_stmt_info
3847 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3848 gsi);
3850 else
3851 /* If mask was NULL_TREE generate the requested
3852 identity transform. */
3853 perm_stmt_info = vinfo->lookup_def (first_vec);
3855 /* Store the vector statement in NODE. */
3856 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3857 = perm_stmt_info;
3861 index = 0;
3862 first_vec_index = -1;
3863 second_vec_index = -1;
3864 noop_p = true;
3868 return true;
3871 /* Vectorize SLP instance tree in postorder. */
3873 static void
3874 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3875 scalar_stmts_to_slp_tree_map_t *bst_map)
3877 gimple_stmt_iterator si;
3878 stmt_vec_info stmt_info;
3879 unsigned int group_size;
3880 tree vectype;
3881 int i, j;
3882 slp_tree child;
3884 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3885 return;
3887 /* See if we have already vectorized the node in the graph of the
3888 SLP instance. */
3889 if (SLP_TREE_VEC_STMTS (node).exists ())
3890 return;
3892 /* See if we have already vectorized the same set of stmts and reuse their
3893 vectorized stmts across instances. */
3894 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3896 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3897 return;
3900 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3901 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3902 vect_schedule_slp_instance (child, instance, bst_map);
3904 /* Push SLP node def-type to stmts. */
3905 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3906 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3908 stmt_vec_info child_stmt_info;
3909 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3910 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3913 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3915 /* VECTYPE is the type of the destination. */
3916 vectype = STMT_VINFO_VECTYPE (stmt_info);
3917 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3918 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3920 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3921 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3923 if (dump_enabled_p ())
3924 dump_printf_loc (MSG_NOTE, vect_location,
3925 "------>vectorizing SLP node starting from: %G",
3926 stmt_info->stmt);
3928 /* Vectorized stmts go before the last scalar stmt which is where
3929 all uses are ready. */
3930 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3931 si = gsi_for_stmt (last_stmt_info->stmt);
3933 /* Mark the first element of the reduction chain as reduction to properly
3934 transform the node. In the analysis phase only the last element of the
3935 chain is marked as reduction. */
3936 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3937 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3938 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3940 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3941 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3944 /* Handle two-operation SLP nodes by vectorizing the group with
3945 both operations and then performing a merge. */
3946 if (SLP_TREE_TWO_OPERATORS (node))
3948 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3949 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3950 enum tree_code ocode = ERROR_MARK;
3951 stmt_vec_info ostmt_info;
3952 vec_perm_builder mask (group_size, group_size, 1);
3953 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3955 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3956 if (gimple_assign_rhs_code (ostmt) != code0)
3958 mask.quick_push (1);
3959 ocode = gimple_assign_rhs_code (ostmt);
3961 else
3962 mask.quick_push (0);
3964 if (ocode != ERROR_MARK)
3966 vec<stmt_vec_info> v0;
3967 vec<stmt_vec_info> v1;
3968 unsigned j;
3969 tree tmask = NULL_TREE;
3970 vect_transform_stmt (stmt_info, &si, node, instance);
3971 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3972 SLP_TREE_VEC_STMTS (node).truncate (0);
3973 gimple_assign_set_rhs_code (stmt, ocode);
3974 vect_transform_stmt (stmt_info, &si, node, instance);
3975 gimple_assign_set_rhs_code (stmt, code0);
3976 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3977 SLP_TREE_VEC_STMTS (node).truncate (0);
3978 tree meltype = build_nonstandard_integer_type
3979 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3980 tree mvectype = get_same_sized_vectype (meltype, vectype);
3981 unsigned k = 0, l;
3982 for (j = 0; j < v0.length (); ++j)
3984 /* Enforced by vect_build_slp_tree, which rejects variable-length
3985 vectors for SLP_TREE_TWO_OPERATORS. */
3986 unsigned int const_nunits = nunits.to_constant ();
3987 tree_vector_builder melts (mvectype, const_nunits, 1);
3988 for (l = 0; l < const_nunits; ++l)
3990 if (k >= group_size)
3991 k = 0;
3992 tree t = build_int_cst (meltype,
3993 mask[k++] * const_nunits + l);
3994 melts.quick_push (t);
3996 tmask = melts.build ();
3998 /* ??? Not all targets support a VEC_PERM_EXPR with a
3999 constant mask that would translate to a vec_merge RTX
4000 (with their vec_perm_const_ok). We can either not
4001 vectorize in that case or let veclower do its job.
4002 Unfortunately that isn't too great and at least for
4003 plus/minus we'd eventually like to match targets
4004 vector addsub instructions. */
4005 gimple *vstmt;
4006 vstmt = gimple_build_assign (make_ssa_name (vectype),
4007 VEC_PERM_EXPR,
4008 gimple_assign_lhs (v0[j]->stmt),
4009 gimple_assign_lhs (v1[j]->stmt),
4010 tmask);
4011 SLP_TREE_VEC_STMTS (node).quick_push
4012 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4014 v0.release ();
4015 v1.release ();
4016 return;
4019 vect_transform_stmt (stmt_info, &si, node, instance);
4021 /* Restore stmt def-types. */
4022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4023 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4025 stmt_vec_info child_stmt_info;
4026 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4027 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4031 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4032 For loop vectorization this is done in vectorizable_call, but for SLP
4033 it needs to be deferred until end of vect_schedule_slp, because multiple
4034 SLP instances may refer to the same scalar stmt. */
4036 static void
4037 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4039 gimple *new_stmt;
4040 gimple_stmt_iterator gsi;
4041 int i;
4042 slp_tree child;
4043 tree lhs;
4044 stmt_vec_info stmt_info;
4046 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4047 return;
4049 if (visited.add (node))
4050 return;
4052 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4053 vect_remove_slp_scalar_calls (child, visited);
4055 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4057 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4058 if (!stmt || gimple_bb (stmt) == NULL)
4059 continue;
4060 if (is_pattern_stmt_p (stmt_info)
4061 || !PURE_SLP_STMT (stmt_info))
4062 continue;
4063 lhs = gimple_call_lhs (stmt);
4064 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4065 gsi = gsi_for_stmt (stmt);
4066 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4067 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4071 static void
4072 vect_remove_slp_scalar_calls (slp_tree node)
4074 hash_set<slp_tree> visited;
4075 vect_remove_slp_scalar_calls (node, visited);
4078 /* Generate vector code for all SLP instances in the loop/basic block. */
4080 void
4081 vect_schedule_slp (vec_info *vinfo)
4083 vec<slp_instance> slp_instances;
4084 slp_instance instance;
4085 unsigned int i;
4087 scalar_stmts_to_slp_tree_map_t *bst_map
4088 = new scalar_stmts_to_slp_tree_map_t ();
4089 slp_instances = vinfo->slp_instances;
4090 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4092 /* Schedule the tree of INSTANCE. */
4093 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
4094 instance, bst_map);
4095 if (dump_enabled_p ())
4096 dump_printf_loc (MSG_NOTE, vect_location,
4097 "vectorizing stmts using SLP.\n");
4099 delete bst_map;
4101 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4103 slp_tree root = SLP_INSTANCE_TREE (instance);
4104 stmt_vec_info store_info;
4105 unsigned int j;
4107 /* Remove scalar call stmts. Do not do this for basic-block
4108 vectorization as not all uses may be vectorized.
4109 ??? Why should this be necessary? DCE should be able to
4110 remove the stmts itself.
4111 ??? For BB vectorization we can as well remove scalar
4112 stmts starting from the SLP tree root if they have no
4113 uses. */
4114 if (is_a <loop_vec_info> (vinfo))
4115 vect_remove_slp_scalar_calls (root);
4117 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4118 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4120 if (!STMT_VINFO_DATA_REF (store_info))
4121 break;
4123 store_info = vect_orig_stmt (store_info);
4124 /* Free the attached stmt_vec_info and remove the stmt. */
4125 vinfo->remove_stmt (store_info);