PR c++/37276
[official-gcc.git] / gcc / tree-vect-slp.c
blob7dd5e93ba1553802af79f420797b529aec9dcc4b
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-pass.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "optabs.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
42 /* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
45 LOC
46 find_bb_location (basic_block bb)
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
51 if (!bb)
52 return UNKNOWN_LOC;
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
61 return UNKNOWN_LOC;
65 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67 static void
68 vect_free_slp_tree (slp_tree node)
70 int i;
71 slp_void_p child;
73 if (!node)
74 return;
76 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
77 vect_free_slp_tree ((slp_tree) child);
79 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85 free (node);
89 /* Free the memory allocated for the SLP instance. */
91 void
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
97 VEC_free (stmt_info_for_cost, heap, SLP_INSTANCE_BODY_COST_VEC (instance));
98 free (instance);
102 /* Create an SLP node for SCALAR_STMTS. */
104 static slp_tree
105 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
107 slp_tree node;
108 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
109 unsigned int nops;
111 if (is_gimple_call (stmt))
112 nops = gimple_call_num_args (stmt);
113 else if (is_gimple_assign (stmt))
115 nops = gimple_num_ops (stmt) - 1;
116 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117 nops++;
119 else
120 return NULL;
122 node = XNEW (struct _slp_tree);
123 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
124 SLP_TREE_VEC_STMTS (node) = NULL;
125 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
127 return node;
131 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
132 operand. */
133 static VEC (slp_oprnd_info, heap) *
134 vect_create_oprnd_info (int nops, int group_size)
136 int i;
137 slp_oprnd_info oprnd_info;
138 VEC (slp_oprnd_info, heap) *oprnds_info;
140 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
141 for (i = 0; i < nops; i++)
143 oprnd_info = XNEW (struct _slp_oprnd_info);
144 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
145 oprnd_info->first_dt = vect_uninitialized_def;
146 oprnd_info->first_def_type = NULL_TREE;
147 oprnd_info->first_const_oprnd = NULL_TREE;
148 oprnd_info->first_pattern = false;
149 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
152 return oprnds_info;
156 /* Free operands info. */
158 static void
159 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
161 int i;
162 slp_oprnd_info oprnd_info;
164 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
166 VEC_free (gimple, heap, oprnd_info->def_stmts);
167 XDELETE (oprnd_info);
170 VEC_free (slp_oprnd_info, heap, *oprnds_info);
174 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
175 they are of a valid type and that they match the defs of the first stmt of
176 the SLP group (stored in OPRNDS_INFO). */
178 static bool
179 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
180 slp_tree slp_node, gimple stmt,
181 int ncopies_for_cost, bool first,
182 VEC (slp_oprnd_info, heap) **oprnds_info,
183 stmt_vector_for_cost *prologue_cost_vec,
184 stmt_vector_for_cost *body_cost_vec)
186 tree oprnd;
187 unsigned int i, number_of_oprnds;
188 tree def, def_op0 = NULL_TREE;
189 gimple def_stmt;
190 enum vect_def_type dt = vect_uninitialized_def;
191 enum vect_def_type dt_op0 = vect_uninitialized_def;
192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
193 tree lhs = gimple_get_lhs (stmt);
194 struct loop *loop = NULL;
195 enum tree_code rhs_code;
196 bool different_types = false;
197 bool pattern = false;
198 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
199 int op_idx = 1;
200 tree compare_rhs = NULL_TREE;
202 if (loop_vinfo)
203 loop = LOOP_VINFO_LOOP (loop_vinfo);
205 if (is_gimple_call (stmt))
207 number_of_oprnds = gimple_call_num_args (stmt);
208 op_idx = 3;
210 else if (is_gimple_assign (stmt))
212 number_of_oprnds = gimple_num_ops (stmt) - 1;
213 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
214 number_of_oprnds++;
216 else
217 return false;
219 for (i = 0; i < number_of_oprnds; i++)
221 if (compare_rhs)
223 oprnd = compare_rhs;
224 compare_rhs = NULL_TREE;
226 else
227 oprnd = gimple_op (stmt, op_idx++);
229 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
231 if (COMPARISON_CLASS_P (oprnd))
233 compare_rhs = TREE_OPERAND (oprnd, 1);
234 oprnd = TREE_OPERAND (oprnd, 0);
237 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
238 &def, &dt)
239 || (!def_stmt && dt != vect_constant_def))
241 if (dump_enabled_p ())
243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
244 "Build SLP failed: can't find def for ");
245 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
248 return false;
251 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
252 from the pattern. Check that all the stmts of the node are in the
253 pattern. */
254 if (def_stmt && gimple_bb (def_stmt)
255 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
256 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
257 && gimple_code (def_stmt) != GIMPLE_PHI))
258 && vinfo_for_stmt (def_stmt)
259 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
260 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
261 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
263 pattern = true;
264 if (!first && !oprnd_info->first_pattern)
266 if (dump_enabled_p ())
268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
269 "Build SLP failed: some of the stmts"
270 " are in a pattern, and others are not ");
271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
274 return false;
277 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
278 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
280 if (dt == vect_unknown_def_type)
282 if (dump_enabled_p ())
283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
284 "Unsupported pattern.");
285 return false;
288 switch (gimple_code (def_stmt))
290 case GIMPLE_PHI:
291 def = gimple_phi_result (def_stmt);
292 break;
294 case GIMPLE_ASSIGN:
295 def = gimple_assign_lhs (def_stmt);
296 break;
298 default:
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
301 "unsupported defining stmt: ");
302 return false;
306 if (first)
308 oprnd_info->first_dt = dt;
309 oprnd_info->first_pattern = pattern;
310 if (def)
312 oprnd_info->first_def_type = TREE_TYPE (def);
313 oprnd_info->first_const_oprnd = NULL_TREE;
315 else
317 oprnd_info->first_def_type = NULL_TREE;
318 oprnd_info->first_const_oprnd = oprnd;
321 if (i == 0)
323 def_op0 = def;
324 dt_op0 = dt;
325 /* Analyze costs (for the first stmt of the group only). */
326 if (REFERENCE_CLASS_P (lhs))
327 /* Store. */
328 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
329 dt, slp_node, prologue_cost_vec,
330 body_cost_vec);
331 else
333 enum vect_def_type dts[2];
334 dts[0] = dt;
335 dts[1] = vect_uninitialized_def;
336 /* Not memory operation (we don't call this function for
337 loads). */
338 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
339 prologue_cost_vec, body_cost_vec);
343 else
345 /* Not first stmt of the group, check that the def-stmt/s match
346 the def-stmt/s of the first stmt. Allow different definition
347 types for reduction chains: the first stmt must be a
348 vect_reduction_def (a phi node), and the rest
349 vect_internal_def. */
350 if (((oprnd_info->first_dt != dt
351 && !(oprnd_info->first_dt == vect_reduction_def
352 && dt == vect_internal_def))
353 || (oprnd_info->first_def_type != NULL_TREE
354 && def
355 && !types_compatible_p (oprnd_info->first_def_type,
356 TREE_TYPE (def))))
357 || (!def
358 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
359 TREE_TYPE (oprnd)))
360 || different_types)
362 if (number_of_oprnds != 2)
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
366 "Build SLP failed: different types ");
368 return false;
371 /* Try to swap operands in case of binary operation. */
372 if (i == 0)
373 different_types = true;
374 else
376 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
377 if (is_gimple_assign (stmt)
378 && (rhs_code = gimple_assign_rhs_code (stmt))
379 && TREE_CODE_CLASS (rhs_code) == tcc_binary
380 && commutative_tree_code (rhs_code)
381 && oprnd0_info->first_dt == dt
382 && oprnd_info->first_dt == dt_op0
383 && def_op0 && def
384 && !(oprnd0_info->first_def_type
385 && !types_compatible_p (oprnd0_info->first_def_type,
386 TREE_TYPE (def)))
387 && !(oprnd_info->first_def_type
388 && !types_compatible_p (oprnd_info->first_def_type,
389 TREE_TYPE (def_op0))))
391 if (dump_enabled_p ())
393 dump_printf_loc (MSG_NOTE, vect_location,
394 "Swapping operands of ");
395 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
398 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
399 gimple_assign_rhs2_ptr (stmt));
401 else
403 if (dump_enabled_p ())
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Build SLP failed: different types ");
407 return false;
413 /* Check the types of the definitions. */
414 switch (dt)
416 case vect_constant_def:
417 case vect_external_def:
418 case vect_reduction_def:
419 break;
421 case vect_internal_def:
422 if (different_types)
424 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
425 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
426 if (i == 0)
427 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
428 else
429 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
431 else
432 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
434 break;
436 default:
437 /* FORNOW: Not supported. */
438 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "Build SLP failed: illegal type of def ");
442 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
445 return false;
449 return true;
453 /* Recursively build an SLP tree starting from NODE.
454 Fail (and return FALSE) if def-stmts are not isomorphic, require data
455 permutation or are of unsupported types of operation. Otherwise, return
456 TRUE. */
458 static bool
459 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
460 slp_tree *node, unsigned int group_size, int *outside_cost,
461 int ncopies_for_cost, unsigned int *max_nunits,
462 VEC (int, heap) **load_permutation,
463 VEC (slp_tree, heap) **loads,
464 unsigned int vectorization_factor, bool *loads_permuted,
465 stmt_vector_for_cost *prologue_cost_vec,
466 stmt_vector_for_cost *body_cost_vec)
468 unsigned int i;
469 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
470 gimple stmt = VEC_index (gimple, stmts, 0);
471 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
472 enum tree_code first_cond_code = ERROR_MARK;
473 tree lhs;
474 bool stop_recursion = false, need_same_oprnds = false;
475 tree vectype, scalar_type, first_op1 = NULL_TREE;
476 unsigned int ncopies;
477 optab optab;
478 int icode;
479 enum machine_mode optab_op2_mode;
480 enum machine_mode vec_mode;
481 struct data_reference *first_dr;
482 HOST_WIDE_INT dummy;
483 bool permutation = false;
484 unsigned int load_place;
485 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
486 VEC (slp_oprnd_info, heap) *oprnds_info;
487 unsigned int nops;
488 slp_oprnd_info oprnd_info;
489 tree cond;
491 if (is_gimple_call (stmt))
492 nops = gimple_call_num_args (stmt);
493 else if (is_gimple_assign (stmt))
495 nops = gimple_num_ops (stmt) - 1;
496 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
497 nops++;
499 else
500 return false;
502 oprnds_info = vect_create_oprnd_info (nops, group_size);
504 /* For every stmt in NODE find its def stmt/s. */
505 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
507 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
510 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
513 /* Fail to vectorize statements marked as unvectorizable. */
514 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
516 if (dump_enabled_p ())
518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
519 "Build SLP failed: unvectorizable statement ");
520 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
523 vect_free_oprnd_info (&oprnds_info);
524 return false;
527 lhs = gimple_get_lhs (stmt);
528 if (lhs == NULL_TREE)
530 if (dump_enabled_p ())
532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
533 "Build SLP failed: not GIMPLE_ASSIGN nor "
534 "GIMPLE_CALL ");
535 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
538 vect_free_oprnd_info (&oprnds_info);
539 return false;
542 if (is_gimple_assign (stmt)
543 && gimple_assign_rhs_code (stmt) == COND_EXPR
544 && (cond = gimple_assign_rhs1 (stmt))
545 && !COMPARISON_CLASS_P (cond))
547 if (dump_enabled_p ())
549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
550 "Build SLP failed: condition is not "
551 "comparison ");
552 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
555 vect_free_oprnd_info (&oprnds_info);
556 return false;
559 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
560 vectype = get_vectype_for_scalar_type (scalar_type);
561 if (!vectype)
563 if (dump_enabled_p ())
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "Build SLP failed: unsupported data-type ");
567 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
568 scalar_type);
571 vect_free_oprnd_info (&oprnds_info);
572 return false;
575 /* In case of multiple types we need to detect the smallest type. */
576 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
578 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
579 if (bb_vinfo)
580 vectorization_factor = *max_nunits;
583 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
585 if (is_gimple_call (stmt))
587 rhs_code = CALL_EXPR;
588 if (gimple_call_internal_p (stmt)
589 || gimple_call_tail_p (stmt)
590 || gimple_call_noreturn_p (stmt)
591 || !gimple_call_nothrow_p (stmt)
592 || gimple_call_chain (stmt))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: unsupported call type ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
601 vect_free_oprnd_info (&oprnds_info);
602 return false;
605 else
606 rhs_code = gimple_assign_rhs_code (stmt);
608 /* Check the operation. */
609 if (i == 0)
611 first_stmt_code = rhs_code;
613 /* Shift arguments should be equal in all the packed stmts for a
614 vector shift with scalar shift operand. */
615 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
616 || rhs_code == LROTATE_EXPR
617 || rhs_code == RROTATE_EXPR)
619 vec_mode = TYPE_MODE (vectype);
621 /* First see if we have a vector/vector shift. */
622 optab = optab_for_tree_code (rhs_code, vectype,
623 optab_vector);
625 if (!optab
626 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
628 /* No vector/vector shift, try for a vector/scalar shift. */
629 optab = optab_for_tree_code (rhs_code, vectype,
630 optab_scalar);
632 if (!optab)
634 if (dump_enabled_p ())
635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
636 "Build SLP failed: no optab.");
637 vect_free_oprnd_info (&oprnds_info);
638 return false;
640 icode = (int) optab_handler (optab, vec_mode);
641 if (icode == CODE_FOR_nothing)
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
645 "Build SLP failed: "
646 "op not supported by target.");
647 vect_free_oprnd_info (&oprnds_info);
648 return false;
650 optab_op2_mode = insn_data[icode].operand[2].mode;
651 if (!VECTOR_MODE_P (optab_op2_mode))
653 need_same_oprnds = true;
654 first_op1 = gimple_assign_rhs2 (stmt);
658 else if (rhs_code == WIDEN_LSHIFT_EXPR)
660 need_same_oprnds = true;
661 first_op1 = gimple_assign_rhs2 (stmt);
664 else
666 if (first_stmt_code != rhs_code
667 && (first_stmt_code != IMAGPART_EXPR
668 || rhs_code != REALPART_EXPR)
669 && (first_stmt_code != REALPART_EXPR
670 || rhs_code != IMAGPART_EXPR)
671 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
672 && (first_stmt_code == ARRAY_REF
673 || first_stmt_code == INDIRECT_REF
674 || first_stmt_code == COMPONENT_REF
675 || first_stmt_code == MEM_REF)))
677 if (dump_enabled_p ())
679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
680 "Build SLP failed: different operation "
681 "in stmt ");
682 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
685 vect_free_oprnd_info (&oprnds_info);
686 return false;
689 if (need_same_oprnds
690 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695 "Build SLP failed: different shift "
696 "arguments in ");
697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
700 vect_free_oprnd_info (&oprnds_info);
701 return false;
704 if (rhs_code == CALL_EXPR)
706 gimple first_stmt = VEC_index (gimple, stmts, 0);
707 if (gimple_call_num_args (stmt) != nops
708 || !operand_equal_p (gimple_call_fn (first_stmt),
709 gimple_call_fn (stmt), 0)
710 || gimple_call_fntype (first_stmt)
711 != gimple_call_fntype (stmt))
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
716 "Build SLP failed: different calls in ");
717 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
718 stmt, 0);
721 vect_free_oprnd_info (&oprnds_info);
722 return false;
727 /* Grouped store or load. */
728 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
730 if (REFERENCE_CLASS_P (lhs))
732 /* Store. */
733 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
734 stmt, ncopies_for_cost,
735 (i == 0), &oprnds_info,
736 prologue_cost_vec,
737 body_cost_vec))
739 vect_free_oprnd_info (&oprnds_info);
740 return false;
743 else
745 /* Load. */
746 /* FORNOW: Check that there is no gap between the loads. */
747 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
748 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
749 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
750 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
752 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "Build SLP failed: grouped "
756 "loads have gaps ");
757 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
758 stmt, 0);
761 vect_free_oprnd_info (&oprnds_info);
762 return false;
765 /* Check that the size of interleaved loads group is not
766 greater than the SLP group size. */
767 if (loop_vinfo
768 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
770 if (dump_enabled_p ())
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
773 "Build SLP failed: the number "
774 "of interleaved loads is greater than "
775 "the SLP group size ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
780 vect_free_oprnd_info (&oprnds_info);
781 return false;
784 old_first_load = first_load;
785 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
786 if (prev_first_load)
788 /* Check that there are no loads from different interleaving
789 chains in the same node. The only exception is complex
790 numbers. */
791 if (prev_first_load != first_load
792 && rhs_code != REALPART_EXPR
793 && rhs_code != IMAGPART_EXPR)
795 if (dump_enabled_p ())
797 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
798 vect_location,
799 "Build SLP failed: different "
800 "interleaving chains in one node ");
801 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
802 stmt, 0);
805 vect_free_oprnd_info (&oprnds_info);
806 return false;
809 else
810 prev_first_load = first_load;
812 /* In some cases a group of loads is just the same load
813 repeated N times. Only analyze its cost once. */
814 if (first_load == stmt && old_first_load != first_load)
816 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
817 if (vect_supportable_dr_alignment (first_dr, false)
818 == dr_unaligned_unsupported)
820 if (dump_enabled_p ())
822 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
823 vect_location,
824 "Build SLP failed: unsupported "
825 "unaligned load ");
826 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
827 stmt, 0);
830 vect_free_oprnd_info (&oprnds_info);
831 return false;
834 /* Analyze costs (for the first stmt in the group). */
835 vect_model_load_cost (vinfo_for_stmt (stmt),
836 ncopies_for_cost, false, *node,
837 prologue_cost_vec, body_cost_vec);
840 /* Store the place of this load in the interleaving chain. In
841 case that permutation is needed we later decide if a specific
842 permutation is supported. */
843 load_place = vect_get_place_in_interleaving_chain (stmt,
844 first_load);
845 if (load_place != i)
846 permutation = true;
848 VEC_safe_push (int, heap, *load_permutation, load_place);
850 /* We stop the tree when we reach a group of loads. */
851 stop_recursion = true;
852 continue;
854 } /* Grouped access. */
855 else
857 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
859 /* Not grouped load. */
860 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not grouped load ");
864 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
867 /* FORNOW: Not grouped loads are not supported. */
868 vect_free_oprnd_info (&oprnds_info);
869 return false;
872 /* Not memory operation. */
873 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
874 && TREE_CODE_CLASS (rhs_code) != tcc_unary
875 && rhs_code != COND_EXPR
876 && rhs_code != CALL_EXPR)
878 if (dump_enabled_p ())
880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
881 "Build SLP failed: operation");
882 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
886 vect_free_oprnd_info (&oprnds_info);
887 return false;
890 if (rhs_code == COND_EXPR)
892 tree cond_expr = gimple_assign_rhs1 (stmt);
894 if (i == 0)
895 first_cond_code = TREE_CODE (cond_expr);
896 else if (first_cond_code != TREE_CODE (cond_expr))
898 if (dump_enabled_p ())
900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901 "Build SLP failed: different"
902 " operation");
903 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
904 stmt, 0);
907 vect_free_oprnd_info (&oprnds_info);
908 return false;
912 /* Find the def-stmts. */
913 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
914 ncopies_for_cost, (i == 0),
915 &oprnds_info, prologue_cost_vec,
916 body_cost_vec))
918 vect_free_oprnd_info (&oprnds_info);
919 return false;
924 /* Grouped loads were reached - stop the recursion. */
925 if (stop_recursion)
927 VEC_safe_push (slp_tree, heap, *loads, *node);
928 if (permutation)
930 gimple first_stmt = VEC_index (gimple, stmts, 0);
931 *loads_permuted = true;
932 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
933 vinfo_for_stmt (first_stmt), 0, vect_body);
935 else
937 /* We don't check here complex numbers chains, so we set
938 LOADS_PERMUTED for further check in
939 vect_supported_load_permutation_p. */
940 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
941 *loads_permuted = true;
944 vect_free_oprnd_info (&oprnds_info);
945 return true;
948 /* Create SLP_TREE nodes for the definition node/s. */
949 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
951 slp_tree child;
953 if (oprnd_info->first_dt != vect_internal_def)
954 continue;
956 child = vect_create_new_slp_node (oprnd_info->def_stmts);
957 if (!child
958 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
959 outside_cost, ncopies_for_cost,
960 max_nunits, load_permutation, loads,
961 vectorization_factor, loads_permuted,
962 prologue_cost_vec, body_cost_vec))
964 if (child)
965 oprnd_info->def_stmts = NULL;
966 vect_free_slp_tree (child);
967 vect_free_oprnd_info (&oprnds_info);
968 return false;
971 oprnd_info->def_stmts = NULL;
972 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
975 vect_free_oprnd_info (&oprnds_info);
976 return true;
979 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
981 static void
982 vect_print_slp_tree (int dump_kind, slp_tree node)
984 int i;
985 gimple stmt;
986 slp_void_p child;
988 if (!node)
989 return;
991 dump_printf (dump_kind, "node ");
992 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
994 dump_printf (dump_kind, "\n\tstmt %d ", i);
995 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
997 dump_printf (dump_kind, "\n");
999 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1000 vect_print_slp_tree (dump_kind, (slp_tree) child);
1004 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1005 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1006 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1007 stmts in NODE are to be marked. */
1009 static void
1010 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1012 int i;
1013 gimple stmt;
1014 slp_void_p child;
1016 if (!node)
1017 return;
1019 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1020 if (j < 0 || i == j)
1021 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1023 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1024 vect_mark_slp_stmts ((slp_tree) child, mark, j);
1028 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1030 static void
1031 vect_mark_slp_stmts_relevant (slp_tree node)
1033 int i;
1034 gimple stmt;
1035 stmt_vec_info stmt_info;
1036 slp_void_p child;
1038 if (!node)
1039 return;
1041 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1043 stmt_info = vinfo_for_stmt (stmt);
1044 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1045 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1046 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1049 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1050 vect_mark_slp_stmts_relevant ((slp_tree) child);
1054 /* Check if the permutation required by the SLP INSTANCE is supported.
1055 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1057 static bool
1058 vect_supported_slp_permutation_p (slp_instance instance)
1060 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1061 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1062 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1063 VEC (slp_tree, heap) *sorted_loads = NULL;
1064 int index;
1065 slp_tree *tmp_loads = NULL;
1066 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1067 slp_tree load;
1069 /* FORNOW: The only supported loads permutation is loads from the same
1070 location in all the loads in the node, when the data-refs in
1071 nodes of LOADS constitute an interleaving chain.
1072 Sort the nodes according to the order of accesses in the chain. */
1073 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1074 for (i = 0, j = 0;
1075 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1076 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
1077 i += group_size, j++)
1079 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1080 /* Check that the loads are all in the same interleaving chain. */
1081 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1083 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "Build SLP failed: unsupported data "
1087 "permutation ");
1088 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1089 scalar_stmt, 0);
1092 free (tmp_loads);
1093 return false;
1096 tmp_loads[index] = load;
1099 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1100 for (i = 0; i < group_size; i++)
1101 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1103 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1104 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1105 free (tmp_loads);
1107 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1108 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1109 instance, true))
1110 return false;
1112 return true;
1116 /* Rearrange the statements of NODE according to PERMUTATION. */
1118 static void
1119 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1120 VEC (int, heap) *permutation)
1122 gimple stmt;
1123 VEC (gimple, heap) *tmp_stmts;
1124 unsigned int index, i;
1125 slp_void_p child;
1127 if (!node)
1128 return;
1130 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1131 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1133 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1134 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1136 for (i = 0; i < group_size; i++)
1137 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1139 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1141 index = VEC_index (int, permutation, i);
1142 VEC_replace (gimple, tmp_stmts, index, stmt);
1145 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1146 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1150 /* Check if the required load permutation is supported.
1151 LOAD_PERMUTATION contains a list of indices of the loads.
1152 In SLP this permutation is relative to the order of grouped stores that are
1153 the base of the SLP instance. */
1155 static bool
1156 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1157 VEC (int, heap) *load_permutation)
1159 int i = 0, j, prev = -1, next, k, number_of_groups;
1160 bool supported, bad_permutation = false;
1161 sbitmap load_index;
1162 slp_tree node, other_complex_node;
1163 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1164 unsigned complex_numbers = 0;
1165 struct data_reference *dr;
1166 bb_vec_info bb_vinfo;
1168 /* FORNOW: permutations are only supported in SLP. */
1169 if (!slp_instn)
1170 return false;
1172 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1175 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1176 dump_printf (MSG_NOTE, "%d ", next);
1179 /* In case of reduction every load permutation is allowed, since the order
1180 of the reduction statements is not important (as opposed to the case of
1181 grouped stores). The only condition we need to check is that all the
1182 load nodes are of the same size and have the same permutation (and then
1183 rearrange all the nodes of the SLP instance according to this
1184 permutation). */
1186 /* Check that all the load nodes are of the same size. */
1187 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1189 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1190 != (unsigned) group_size)
1191 return false;
1193 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1194 if (is_gimple_assign (stmt)
1195 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1196 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1197 complex_numbers++;
1200 /* Complex operands can be swapped as following:
1201 real_c = real_b + real_a;
1202 imag_c = imag_a + imag_b;
1203 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1204 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1205 chains are mixed, they match the above pattern. */
1206 if (complex_numbers)
1208 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1210 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1212 if (j == 0)
1213 first = stmt;
1214 else
1216 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1218 if (complex_numbers != 2)
1219 return false;
1221 if (i == 0)
1222 k = 1;
1223 else
1224 k = 0;
1226 other_complex_node = VEC_index (slp_tree,
1227 SLP_INSTANCE_LOADS (slp_instn), k);
1228 other_node_first = VEC_index (gimple,
1229 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1231 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1232 != other_node_first)
1233 return false;
1240 /* We checked that this case ok, so there is no need to proceed with
1241 permutation tests. */
1242 if (complex_numbers == 2
1243 && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
1245 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1246 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1247 return true;
1250 node = SLP_INSTANCE_TREE (slp_instn);
1251 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1252 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1253 instance, not all the loads belong to the same node or interleaving
1254 group. Hence, we need to divide them into groups according to
1255 GROUP_SIZE. */
1256 number_of_groups = VEC_length (int, load_permutation) / group_size;
1258 /* Reduction (there are no data-refs in the root).
1259 In reduction chain the order of the loads is important. */
1260 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1261 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1263 int first_group_load_index;
1265 /* Compare all the permutation sequences to the first one. */
1266 for (i = 1; i < number_of_groups; i++)
1268 k = 0;
1269 for (j = i * group_size; j < i * group_size + group_size; j++)
1271 next = VEC_index (int, load_permutation, j);
1272 first_group_load_index = VEC_index (int, load_permutation, k);
1274 if (next != first_group_load_index)
1276 bad_permutation = true;
1277 break;
1280 k++;
1283 if (bad_permutation)
1284 break;
1287 if (!bad_permutation)
1289 /* Check that the loads in the first sequence are different and there
1290 are no gaps between them. */
1291 load_index = sbitmap_alloc (group_size);
1292 bitmap_clear (load_index);
1293 for (k = 0; k < group_size; k++)
1295 first_group_load_index = VEC_index (int, load_permutation, k);
1296 if (bitmap_bit_p (load_index, first_group_load_index))
1298 bad_permutation = true;
1299 break;
1302 bitmap_set_bit (load_index, first_group_load_index);
1305 if (!bad_permutation)
1306 for (k = 0; k < group_size; k++)
1307 if (!bitmap_bit_p (load_index, k))
1309 bad_permutation = true;
1310 break;
1313 sbitmap_free (load_index);
1316 if (!bad_permutation)
1318 /* This permutation is valid for reduction. Since the order of the
1319 statements in the nodes is not important unless they are memory
1320 accesses, we can rearrange the statements in all the nodes
1321 according to the order of the loads. */
1322 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1323 load_permutation);
1324 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1325 return true;
1329 /* In basic block vectorization we allow any subchain of an interleaving
1330 chain.
1331 FORNOW: not supported in loop SLP because of realignment compications. */
1332 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1333 bad_permutation = false;
1334 /* Check that for every node in the instance the loads form a subchain. */
1335 if (bb_vinfo)
1337 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1339 next_load = NULL;
1340 first_load = NULL;
1341 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1343 if (!first_load)
1344 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1345 else if (first_load
1346 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1348 bad_permutation = true;
1349 break;
1352 if (j != 0 && next_load != load)
1354 bad_permutation = true;
1355 break;
1358 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1361 if (bad_permutation)
1362 break;
1365 /* Check that the alignment of the first load in every subchain, i.e.,
1366 the first statement in every load node, is supported. */
1367 if (!bad_permutation)
1369 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1371 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1372 if (first_load
1373 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1375 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1376 if (vect_supportable_dr_alignment (dr, false)
1377 == dr_unaligned_unsupported)
1379 if (dump_enabled_p ())
1381 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1382 vect_location,
1383 "unsupported unaligned load ");
1384 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1385 first_load, 0);
1387 bad_permutation = true;
1388 break;
1393 if (!bad_permutation)
1395 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1396 return true;
1401 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1402 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1403 well (unless it's reduction). */
1404 if (VEC_length (int, load_permutation)
1405 != (unsigned int) (group_size * group_size))
1406 return false;
1408 supported = true;
1409 load_index = sbitmap_alloc (group_size);
1410 bitmap_clear (load_index);
1411 for (j = 0; j < group_size; j++)
1413 for (i = j * group_size, k = 0;
1414 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1415 i++, k++)
1417 if (i != j * group_size && next != prev)
1419 supported = false;
1420 break;
1423 prev = next;
1426 if (bitmap_bit_p (load_index, prev))
1428 supported = false;
1429 break;
1432 bitmap_set_bit (load_index, prev);
1435 for (j = 0; j < group_size; j++)
1436 if (!bitmap_bit_p (load_index, j))
1437 return false;
1439 sbitmap_free (load_index);
1441 if (supported && i == group_size * group_size
1442 && vect_supported_slp_permutation_p (slp_instn))
1443 return true;
1445 return false;
1449 /* Find the first load in the loop that belongs to INSTANCE.
1450 When loads are in several SLP nodes, there can be a case in which the first
1451 load does not appear in the first SLP node to be transformed, causing
1452 incorrect order of statements. Since we generate all the loads together,
1453 they must be inserted before the first load of the SLP instance and not
1454 before the first load of the first node of the instance. */
1456 static gimple
1457 vect_find_first_load_in_slp_instance (slp_instance instance)
1459 int i, j;
1460 slp_tree load_node;
1461 gimple first_load = NULL, load;
1463 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1464 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1465 first_load = get_earlier_stmt (load, first_load);
1467 return first_load;
1471 /* Find the last store in SLP INSTANCE. */
1473 static gimple
1474 vect_find_last_store_in_slp_instance (slp_instance instance)
1476 int i;
1477 slp_tree node;
1478 gimple last_store = NULL, store;
1480 node = SLP_INSTANCE_TREE (instance);
1481 for (i = 0;
1482 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1483 i++)
1484 last_store = get_later_stmt (store, last_store);
1486 return last_store;
1490 /* Analyze an SLP instance starting from a group of grouped stores. Call
1491 vect_build_slp_tree to build a tree of packed stmts if possible.
1492 Return FALSE if it's impossible to SLP any stmt in the loop. */
1494 static bool
1495 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1496 gimple stmt)
1498 slp_instance new_instance;
1499 slp_tree node;
1500 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1501 unsigned int unrolling_factor = 1, nunits;
1502 tree vectype, scalar_type = NULL_TREE;
1503 gimple next;
1504 unsigned int vectorization_factor = 0;
1505 int outside_cost = 0, ncopies_for_cost, i;
1506 unsigned int max_nunits = 0;
1507 VEC (int, heap) *load_permutation;
1508 VEC (slp_tree, heap) *loads;
1509 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1510 bool loads_permuted = false;
1511 VEC (gimple, heap) *scalar_stmts;
1512 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1513 stmt_info_for_cost *si;
1515 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1517 if (dr)
1519 scalar_type = TREE_TYPE (DR_REF (dr));
1520 vectype = get_vectype_for_scalar_type (scalar_type);
1522 else
1524 gcc_assert (loop_vinfo);
1525 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1528 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1530 else
1532 gcc_assert (loop_vinfo);
1533 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1534 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1537 if (!vectype)
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1542 "Build SLP failed: unsupported data-type ");
1543 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1546 return false;
1549 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1550 if (loop_vinfo)
1551 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1552 else
1553 vectorization_factor = nunits;
1555 /* Calculate the unrolling factor. */
1556 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1557 if (unrolling_factor != 1 && !loop_vinfo)
1559 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1561 "Build SLP failed: unrolling required in basic"
1562 " block SLP");
1564 return false;
1567 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1568 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1569 next = stmt;
1570 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1572 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1573 while (next)
1575 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1576 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1577 VEC_safe_push (gimple, heap, scalar_stmts,
1578 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1579 else
1580 VEC_safe_push (gimple, heap, scalar_stmts, next);
1581 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1584 else
1586 /* Collect reduction statements. */
1587 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1588 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1589 VEC_safe_push (gimple, heap, scalar_stmts, next);
1592 node = vect_create_new_slp_node (scalar_stmts);
1594 /* Calculate the number of vector stmts to create based on the unrolling
1595 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1596 GROUP_SIZE / NUNITS otherwise. */
1597 ncopies_for_cost = unrolling_factor * group_size / nunits;
1599 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1600 loads = VEC_alloc (slp_tree, heap, group_size);
1601 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 10);
1602 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 10);
1604 /* Build the tree for the SLP instance. */
1605 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1606 &outside_cost, ncopies_for_cost,
1607 &max_nunits, &load_permutation, &loads,
1608 vectorization_factor, &loads_permuted,
1609 &prologue_cost_vec, &body_cost_vec))
1611 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1612 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1614 /* Calculate the unrolling factor based on the smallest type. */
1615 if (max_nunits > nunits)
1616 unrolling_factor = least_common_multiple (max_nunits, group_size)
1617 / group_size;
1619 if (unrolling_factor != 1 && !loop_vinfo)
1621 if (dump_enabled_p ())
1622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1623 "Build SLP failed: unrolling required in basic"
1624 " block SLP");
1625 vect_free_slp_tree (node);
1626 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1627 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1628 VEC_free (int, heap, load_permutation);
1629 VEC_free (slp_tree, heap, loads);
1630 return false;
1633 /* Create a new SLP instance. */
1634 new_instance = XNEW (struct _slp_instance);
1635 SLP_INSTANCE_TREE (new_instance) = node;
1636 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1637 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1638 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1639 SLP_INSTANCE_LOADS (new_instance) = loads;
1640 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1641 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1643 if (loads_permuted)
1645 if (!vect_supported_load_permutation_p (new_instance, group_size,
1646 load_permutation))
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1651 "Build SLP failed: unsupported load "
1652 "permutation ");
1653 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1656 vect_free_slp_instance (new_instance);
1657 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1658 return false;
1661 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1662 = vect_find_first_load_in_slp_instance (new_instance);
1664 else
1665 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1667 /* Record the prologue costs, which were delayed until we were
1668 sure that SLP was successful. Unlike the body costs, we know
1669 the final values now regardless of the loop vectorization factor. */
1670 FOR_EACH_VEC_ELT (stmt_info_for_cost, prologue_cost_vec, i, si)
1672 struct _stmt_vec_info *stmt_info
1673 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1674 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1675 si->misalign, vect_prologue);
1678 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1680 if (loop_vinfo)
1681 VEC_safe_push (slp_instance, heap,
1682 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1683 new_instance);
1684 else
1685 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1686 new_instance);
1688 if (dump_enabled_p ())
1689 vect_print_slp_tree (MSG_NOTE, node);
1691 return true;
1693 else
1695 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1696 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1699 /* Failed to SLP. */
1700 /* Free the allocated memory. */
1701 vect_free_slp_tree (node);
1702 VEC_free (int, heap, load_permutation);
1703 VEC_free (slp_tree, heap, loads);
1705 return false;
1709 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1710 trees of packed scalar stmts if SLP is possible. */
1712 bool
1713 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1715 unsigned int i;
1716 VEC (gimple, heap) *grouped_stores, *reductions = NULL, *reduc_chains = NULL;
1717 gimple first_element;
1718 bool ok = false;
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1723 if (loop_vinfo)
1725 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1726 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1727 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1729 else
1730 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1732 /* Find SLP sequences starting from groups of grouped stores. */
1733 FOR_EACH_VEC_ELT (gimple, grouped_stores, i, first_element)
1734 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1735 ok = true;
1737 if (bb_vinfo && !ok)
1739 if (dump_enabled_p ())
1740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1741 "Failed to SLP the basic block.");
1743 return false;
1746 if (loop_vinfo
1747 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1749 /* Find SLP sequences starting from reduction chains. */
1750 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1751 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1752 ok = true;
1753 else
1754 return false;
1756 /* Don't try to vectorize SLP reductions if reduction chain was
1757 detected. */
1758 return ok;
1761 /* Find SLP sequences starting from groups of reductions. */
1762 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1763 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1764 VEC_index (gimple, reductions, 0)))
1765 ok = true;
1767 return true;
1771 /* For each possible SLP instance decide whether to SLP it and calculate overall
1772 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1773 least one instance. */
1775 bool
1776 vect_make_slp_decision (loop_vec_info loop_vinfo)
1778 unsigned int i, unrolling_factor = 1;
1779 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1780 slp_instance instance;
1781 int decided_to_slp = 0;
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1786 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1788 /* FORNOW: SLP if you can. */
1789 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1790 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1792 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1793 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1794 loop-based vectorization. Such stmts will be marked as HYBRID. */
1795 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1796 decided_to_slp++;
1799 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1801 if (decided_to_slp && dump_enabled_p ())
1802 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1803 "Decided to SLP %d instances. Unrolling factor %d",
1804 decided_to_slp, unrolling_factor);
1806 return (decided_to_slp > 0);
1810 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1811 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1813 static void
1814 vect_detect_hybrid_slp_stmts (slp_tree node)
1816 int i;
1817 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1818 gimple stmt = VEC_index (gimple, stmts, 0);
1819 imm_use_iterator imm_iter;
1820 gimple use_stmt;
1821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1822 slp_void_p child;
1823 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1824 struct loop *loop = NULL;
1825 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1826 basic_block bb = NULL;
1828 if (!node)
1829 return;
1831 if (loop_vinfo)
1832 loop = LOOP_VINFO_LOOP (loop_vinfo);
1833 else
1834 bb = BB_VINFO_BB (bb_vinfo);
1836 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1837 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1838 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1839 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1840 if (gimple_bb (use_stmt)
1841 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1842 || bb == gimple_bb (use_stmt))
1843 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1844 && !STMT_SLP_TYPE (stmt_vinfo)
1845 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1846 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1847 && !(gimple_code (use_stmt) == GIMPLE_PHI
1848 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1849 == vect_reduction_def))
1850 vect_mark_slp_stmts (node, hybrid, i);
1852 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1853 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1857 /* Find stmts that must be both vectorized and SLPed. */
1859 void
1860 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1862 unsigned int i;
1863 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1864 slp_instance instance;
1866 if (dump_enabled_p ())
1867 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1869 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1870 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1874 /* Create and initialize a new bb_vec_info struct for BB, as well as
1875 stmt_vec_info structs for all the stmts in it. */
1877 static bb_vec_info
1878 new_bb_vec_info (basic_block bb)
1880 bb_vec_info res = NULL;
1881 gimple_stmt_iterator gsi;
1883 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1884 BB_VINFO_BB (res) = bb;
1886 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1888 gimple stmt = gsi_stmt (gsi);
1889 gimple_set_uid (stmt, 0);
1890 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1893 BB_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
1894 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1895 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1897 bb->aux = res;
1898 return res;
1902 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1903 stmts in the basic block. */
1905 static void
1906 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1908 VEC (slp_instance, heap) *slp_instances;
1909 slp_instance instance;
1910 basic_block bb;
1911 gimple_stmt_iterator si;
1912 unsigned i;
1914 if (!bb_vinfo)
1915 return;
1917 bb = BB_VINFO_BB (bb_vinfo);
1919 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1921 gimple stmt = gsi_stmt (si);
1922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1924 if (stmt_info)
1925 /* Free stmt_vec_info. */
1926 free_stmt_vec_info (stmt);
1929 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1930 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1931 VEC_free (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo));
1932 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1933 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1934 vect_free_slp_instance (instance);
1935 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1936 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1937 free (bb_vinfo);
1938 bb->aux = NULL;
1942 /* Analyze statements contained in SLP tree node after recursively analyzing
1943 the subtree. Return TRUE if the operations are supported. */
1945 static bool
1946 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1948 bool dummy;
1949 int i;
1950 gimple stmt;
1951 slp_void_p child;
1953 if (!node)
1954 return true;
1956 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1957 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1958 return false;
1960 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1963 gcc_assert (stmt_info);
1964 gcc_assert (PURE_SLP_STMT (stmt_info));
1966 if (!vect_analyze_stmt (stmt, &dummy, node))
1967 return false;
1970 return true;
1974 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1975 operations are supported. */
1977 static bool
1978 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1980 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1981 slp_instance instance;
1982 int i;
1984 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1986 if (!vect_slp_analyze_node_operations (bb_vinfo,
1987 SLP_INSTANCE_TREE (instance)))
1989 vect_free_slp_instance (instance);
1990 VEC_ordered_remove (slp_instance, slp_instances, i);
1992 else
1993 i++;
1996 if (!VEC_length (slp_instance, slp_instances))
1997 return false;
1999 return true;
2002 /* Check if vectorization of the basic block is profitable. */
2004 static bool
2005 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2007 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2008 slp_instance instance;
2009 int i, j;
2010 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2011 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2012 unsigned int stmt_cost;
2013 gimple stmt;
2014 gimple_stmt_iterator si;
2015 basic_block bb = BB_VINFO_BB (bb_vinfo);
2016 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2017 stmt_vec_info stmt_info = NULL;
2018 stmt_vector_for_cost body_cost_vec;
2019 stmt_info_for_cost *ci;
2021 /* Calculate vector costs. */
2022 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2024 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2026 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, j, ci)
2028 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2029 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2030 stmt_info, ci->misalign, vect_body);
2034 /* Calculate scalar cost. */
2035 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2037 stmt = gsi_stmt (si);
2038 stmt_info = vinfo_for_stmt (stmt);
2040 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2041 || !PURE_SLP_STMT (stmt_info))
2042 continue;
2044 if (STMT_VINFO_DATA_REF (stmt_info))
2046 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2047 stmt_cost = vect_get_stmt_cost (scalar_load);
2048 else
2049 stmt_cost = vect_get_stmt_cost (scalar_store);
2051 else
2052 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2054 scalar_cost += stmt_cost;
2057 /* Complete the target-specific cost calculation. */
2058 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2059 &vec_inside_cost, &vec_epilogue_cost);
2061 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2063 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2066 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2067 vec_inside_cost);
2068 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2069 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2070 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2073 /* Vectorization is profitable if its cost is less than the cost of scalar
2074 version. */
2075 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2076 return false;
2078 return true;
2081 /* Check if the basic block can be vectorized. */
2083 static bb_vec_info
2084 vect_slp_analyze_bb_1 (basic_block bb)
2086 bb_vec_info bb_vinfo;
2087 VEC (ddr_p, heap) *ddrs;
2088 VEC (slp_instance, heap) *slp_instances;
2089 slp_instance instance;
2090 int i;
2091 int min_vf = 2;
2092 int max_vf = MAX_VECTORIZATION_FACTOR;
2094 bb_vinfo = new_bb_vec_info (bb);
2095 if (!bb_vinfo)
2096 return NULL;
2098 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2102 "not vectorized: unhandled data-ref in basic "
2103 "block.\n");
2105 destroy_bb_vec_info (bb_vinfo);
2106 return NULL;
2109 ddrs = BB_VINFO_DDRS (bb_vinfo);
2110 if (!VEC_length (ddr_p, ddrs))
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114 "not vectorized: not enough data-refs in "
2115 "basic block.\n");
2117 destroy_bb_vec_info (bb_vinfo);
2118 return NULL;
2121 vect_pattern_recog (NULL, bb_vinfo);
2123 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
2124 || min_vf > max_vf)
2126 if (dump_enabled_p ())
2127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2128 "not vectorized: unhandled data dependence "
2129 "in basic block.\n");
2131 destroy_bb_vec_info (bb_vinfo);
2132 return NULL;
2135 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2137 if (dump_enabled_p ())
2138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2139 "not vectorized: bad data alignment in basic "
2140 "block.\n");
2142 destroy_bb_vec_info (bb_vinfo);
2143 return NULL;
2146 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150 "not vectorized: unhandled data access in "
2151 "basic block.\n");
2153 destroy_bb_vec_info (bb_vinfo);
2154 return NULL;
2157 /* Check the SLP opportunities in the basic block, analyze and build SLP
2158 trees. */
2159 if (!vect_analyze_slp (NULL, bb_vinfo))
2161 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163 "not vectorized: failed to find SLP opportunities "
2164 "in basic block.\n");
2166 destroy_bb_vec_info (bb_vinfo);
2167 return NULL;
2170 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2172 /* Mark all the statements that we want to vectorize as pure SLP and
2173 relevant. */
2174 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2176 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2177 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2180 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "not vectorized: unsupported alignment in basic "
2185 "block.\n");
2186 destroy_bb_vec_info (bb_vinfo);
2187 return NULL;
2190 if (!vect_slp_analyze_operations (bb_vinfo))
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "not vectorized: bad operation in basic block.\n");
2196 destroy_bb_vec_info (bb_vinfo);
2197 return NULL;
2200 /* Cost model: check if the vectorization is worthwhile. */
2201 if (flag_vect_cost_model
2202 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2204 if (dump_enabled_p ())
2205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2206 "not vectorized: vectorization is not "
2207 "profitable.\n");
2209 destroy_bb_vec_info (bb_vinfo);
2210 return NULL;
2213 if (dump_enabled_p ())
2214 dump_printf_loc (MSG_NOTE, vect_location,
2215 "Basic block will be vectorized using SLP\n");
2217 return bb_vinfo;
2221 bb_vec_info
2222 vect_slp_analyze_bb (basic_block bb)
2224 bb_vec_info bb_vinfo;
2225 int insns = 0;
2226 gimple_stmt_iterator gsi;
2227 unsigned int vector_sizes;
2229 if (dump_enabled_p ())
2230 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2232 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2234 gimple stmt = gsi_stmt (gsi);
2235 if (!is_gimple_debug (stmt)
2236 && !gimple_nop_p (stmt)
2237 && gimple_code (stmt) != GIMPLE_LABEL)
2238 insns++;
2241 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2245 "not vectorized: too many instructions in "
2246 "basic block.\n");
2248 return NULL;
2251 /* Autodetect first vector size we try. */
2252 current_vector_size = 0;
2253 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2255 while (1)
2257 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2258 if (bb_vinfo)
2259 return bb_vinfo;
2261 destroy_bb_vec_info (bb_vinfo);
2263 vector_sizes &= ~current_vector_size;
2264 if (vector_sizes == 0
2265 || current_vector_size == 0)
2266 return NULL;
2268 /* Try the next biggest vector size. */
2269 current_vector_size = 1 << floor_log2 (vector_sizes);
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_NOTE, vect_location,
2272 "***** Re-trying analysis with "
2273 "vector size %d\n", current_vector_size);
2278 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2279 the number of created vector stmts depends on the unrolling factor).
2280 However, the actual number of vector stmts for every SLP node depends on
2281 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2282 should be updated. In this function we assume that the inside costs
2283 calculated in vect_model_xxx_cost are linear in ncopies. */
2285 void
2286 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2288 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2289 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2290 slp_instance instance;
2291 stmt_vector_for_cost body_cost_vec;
2292 stmt_info_for_cost *si;
2293 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_NOTE, vect_location,
2297 "=== vect_update_slp_costs_according_to_vf ===");
2299 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2301 /* We assume that costs are linear in ncopies. */
2302 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2304 /* Record the instance's instructions in the target cost model.
2305 This was delayed until here because the count of instructions
2306 isn't known beforehand. */
2307 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2309 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, j, si)
2310 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2311 vinfo_for_stmt (si->stmt), si->misalign,
2312 vect_body);
2317 /* For constant and loop invariant defs of SLP_NODE this function returns
2318 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2319 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2320 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2321 REDUC_INDEX is the index of the reduction operand in the statements, unless
2322 it is -1. */
2324 static void
2325 vect_get_constant_vectors (tree op, slp_tree slp_node,
2326 VEC (tree, heap) **vec_oprnds,
2327 unsigned int op_num, unsigned int number_of_vectors,
2328 int reduc_index)
2330 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2331 gimple stmt = VEC_index (gimple, stmts, 0);
2332 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2333 unsigned nunits;
2334 tree vec_cst;
2335 tree *elts;
2336 unsigned j, number_of_places_left_in_vector;
2337 tree vector_type;
2338 tree vop;
2339 int group_size = VEC_length (gimple, stmts);
2340 unsigned int vec_num, i;
2341 unsigned number_of_copies = 1;
2342 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2343 bool constant_p, is_store;
2344 tree neutral_op = NULL;
2345 enum tree_code code = gimple_expr_code (stmt);
2346 gimple def_stmt;
2347 struct loop *loop;
2348 gimple_seq ctor_seq = NULL;
2350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2351 && reduc_index != -1)
2353 op_num = reduc_index - 1;
2354 op = gimple_op (stmt, reduc_index);
2355 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2356 we need either neutral operands or the original operands. See
2357 get_initial_def_for_reduction() for details. */
2358 switch (code)
2360 case WIDEN_SUM_EXPR:
2361 case DOT_PROD_EXPR:
2362 case PLUS_EXPR:
2363 case MINUS_EXPR:
2364 case BIT_IOR_EXPR:
2365 case BIT_XOR_EXPR:
2366 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2367 neutral_op = build_real (TREE_TYPE (op), dconst0);
2368 else
2369 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2371 break;
2373 case MULT_EXPR:
2374 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2375 neutral_op = build_real (TREE_TYPE (op), dconst1);
2376 else
2377 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2379 break;
2381 case BIT_AND_EXPR:
2382 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2383 break;
2385 case MAX_EXPR:
2386 case MIN_EXPR:
2387 def_stmt = SSA_NAME_DEF_STMT (op);
2388 loop = (gimple_bb (stmt))->loop_father;
2389 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2390 loop_preheader_edge (loop));
2391 break;
2393 default:
2394 neutral_op = NULL;
2398 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2400 is_store = true;
2401 op = gimple_assign_rhs1 (stmt);
2403 else
2404 is_store = false;
2406 gcc_assert (op);
2408 if (CONSTANT_CLASS_P (op))
2409 constant_p = true;
2410 else
2411 constant_p = false;
2413 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2414 gcc_assert (vector_type);
2415 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2417 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2418 created vectors. It is greater than 1 if unrolling is performed.
2420 For example, we have two scalar operands, s1 and s2 (e.g., group of
2421 strided accesses of size two), while NUNITS is four (i.e., four scalars
2422 of this type can be packed in a vector). The output vector will contain
2423 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2424 will be 2).
2426 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2427 containing the operands.
2429 For example, NUNITS is four as before, and the group size is 8
2430 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2431 {s5, s6, s7, s8}. */
2433 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2435 number_of_places_left_in_vector = nunits;
2436 elts = XALLOCAVEC (tree, nunits);
2437 for (j = 0; j < number_of_copies; j++)
2439 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2441 if (is_store)
2442 op = gimple_assign_rhs1 (stmt);
2443 else
2445 switch (code)
2447 case COND_EXPR:
2448 if (op_num == 0 || op_num == 1)
2450 tree cond = gimple_assign_rhs1 (stmt);
2451 op = TREE_OPERAND (cond, op_num);
2453 else
2455 if (op_num == 2)
2456 op = gimple_assign_rhs2 (stmt);
2457 else
2458 op = gimple_assign_rhs3 (stmt);
2460 break;
2462 case CALL_EXPR:
2463 op = gimple_call_arg (stmt, op_num);
2464 break;
2466 case LSHIFT_EXPR:
2467 case RSHIFT_EXPR:
2468 case LROTATE_EXPR:
2469 case RROTATE_EXPR:
2470 op = gimple_op (stmt, op_num + 1);
2471 /* Unlike the other binary operators, shifts/rotates have
2472 the shift count being int, instead of the same type as
2473 the lhs, so make sure the scalar is the right type if
2474 we are dealing with vectors of
2475 long long/long/short/char. */
2476 if (op_num == 1 && constant_p)
2477 op = fold_convert (TREE_TYPE (vector_type), op);
2478 break;
2480 default:
2481 op = gimple_op (stmt, op_num + 1);
2482 break;
2486 if (reduc_index != -1)
2488 loop = (gimple_bb (stmt))->loop_father;
2489 def_stmt = SSA_NAME_DEF_STMT (op);
2491 gcc_assert (loop);
2493 /* Get the def before the loop. In reduction chain we have only
2494 one initial value. */
2495 if ((j != (number_of_copies - 1)
2496 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2497 && i != 0))
2498 && neutral_op)
2499 op = neutral_op;
2500 else
2501 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2502 loop_preheader_edge (loop));
2505 /* Create 'vect_ = {op0,op1,...,opn}'. */
2506 number_of_places_left_in_vector--;
2507 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2509 if (constant_p)
2511 op = fold_unary (VIEW_CONVERT_EXPR,
2512 TREE_TYPE (vector_type), op);
2513 gcc_assert (op && CONSTANT_CLASS_P (op));
2515 else
2517 tree new_temp
2518 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2519 gimple init_stmt;
2520 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2521 op);
2522 init_stmt
2523 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2524 new_temp, op, NULL_TREE);
2525 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2526 op = new_temp;
2529 elts[number_of_places_left_in_vector] = op;
2531 if (number_of_places_left_in_vector == 0)
2533 number_of_places_left_in_vector = nunits;
2535 if (constant_p)
2536 vec_cst = build_vector (vector_type, elts);
2537 else
2539 VEC(constructor_elt,gc) *v;
2540 unsigned k;
2541 v = VEC_alloc (constructor_elt, gc, nunits);
2542 for (k = 0; k < nunits; ++k)
2543 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2544 vec_cst = build_constructor (vector_type, v);
2546 VEC_quick_push (tree, voprnds,
2547 vect_init_vector (stmt, vec_cst,
2548 vector_type, NULL));
2549 if (ctor_seq != NULL)
2551 gimple init_stmt
2552 = SSA_NAME_DEF_STMT (VEC_last (tree, voprnds));
2553 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2554 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2555 GSI_SAME_STMT);
2556 ctor_seq = NULL;
2562 /* Since the vectors are created in the reverse order, we should invert
2563 them. */
2564 vec_num = VEC_length (tree, voprnds);
2565 for (j = vec_num; j != 0; j--)
2567 vop = VEC_index (tree, voprnds, j - 1);
2568 VEC_quick_push (tree, *vec_oprnds, vop);
2571 VEC_free (tree, heap, voprnds);
2573 /* In case that VF is greater than the unrolling factor needed for the SLP
2574 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2575 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2576 to replicate the vectors. */
2577 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2579 tree neutral_vec = NULL;
2581 if (neutral_op)
2583 if (!neutral_vec)
2584 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2586 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2588 else
2590 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2591 VEC_quick_push (tree, *vec_oprnds, vop);
2597 /* Get vectorized definitions from SLP_NODE that contains corresponding
2598 vectorized def-stmts. */
2600 static void
2601 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2603 tree vec_oprnd;
2604 gimple vec_def_stmt;
2605 unsigned int i;
2607 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2609 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2611 gcc_assert (vec_def_stmt);
2612 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2613 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2618 /* Get vectorized definitions for SLP_NODE.
2619 If the scalar definitions are loop invariants or constants, collect them and
2620 call vect_get_constant_vectors() to create vector stmts.
2621 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2622 must be stored in the corresponding child of SLP_NODE, and we call
2623 vect_get_slp_vect_defs () to retrieve them. */
2625 void
2626 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2627 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2629 gimple first_stmt, first_def;
2630 int number_of_vects = 0, i;
2631 unsigned int child_index = 0;
2632 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2633 slp_tree child = NULL;
2634 VEC (tree, heap) *vec_defs;
2635 tree oprnd, def_lhs;
2636 bool vectorized_defs;
2638 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2639 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2641 /* For each operand we check if it has vectorized definitions in a child
2642 node or we need to create them (for invariants and constants). We
2643 check if the LHS of the first stmt of the next child matches OPRND.
2644 If it does, we found the correct child. Otherwise, we call
2645 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2646 to check this child node for the next operand. */
2647 vectorized_defs = false;
2648 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2650 child = (slp_tree) VEC_index (slp_void_p,
2651 SLP_TREE_CHILDREN (slp_node),
2652 child_index);
2653 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2655 /* In the end of a pattern sequence we have a use of the original stmt,
2656 so we need to compare OPRND with the original def. */
2657 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2658 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2659 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2660 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2662 if (is_gimple_call (first_def))
2663 def_lhs = gimple_call_lhs (first_def);
2664 else
2665 def_lhs = gimple_assign_lhs (first_def);
2667 if (operand_equal_p (oprnd, def_lhs, 0))
2669 /* The number of vector defs is determined by the number of
2670 vector statements in the node from which we get those
2671 statements. */
2672 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2673 vectorized_defs = true;
2674 child_index++;
2678 if (!vectorized_defs)
2680 if (i == 0)
2682 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2683 /* Number of vector stmts was calculated according to LHS in
2684 vect_schedule_slp_instance (), fix it by replacing LHS with
2685 RHS, if necessary. See vect_get_smallest_scalar_type () for
2686 details. */
2687 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2688 &rhs_size_unit);
2689 if (rhs_size_unit != lhs_size_unit)
2691 number_of_vects *= rhs_size_unit;
2692 number_of_vects /= lhs_size_unit;
2697 /* Allocate memory for vectorized defs. */
2698 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2700 /* For reduction defs we call vect_get_constant_vectors (), since we are
2701 looking for initial loop invariant values. */
2702 if (vectorized_defs && reduc_index == -1)
2703 /* The defs are already vectorized. */
2704 vect_get_slp_vect_defs (child, &vec_defs);
2705 else
2706 /* Build vectors from scalar defs. */
2707 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2708 number_of_vects, reduc_index);
2710 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2712 /* For reductions, we only need initial values. */
2713 if (reduc_index != -1)
2714 return;
2719 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2720 building a vector of type MASK_TYPE from it) and two input vectors placed in
2721 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2722 shifting by STRIDE elements of DR_CHAIN for every copy.
2723 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2724 copies).
2725 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2726 the created stmts must be inserted. */
2728 static inline void
2729 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2730 tree mask, int first_vec_indx, int second_vec_indx,
2731 gimple_stmt_iterator *gsi, slp_tree node,
2732 tree vectype, VEC(tree,heap) *dr_chain,
2733 int ncopies, int vect_stmts_counter)
2735 tree perm_dest;
2736 gimple perm_stmt = NULL;
2737 stmt_vec_info next_stmt_info;
2738 int i, stride;
2739 tree first_vec, second_vec, data_ref;
2741 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2743 /* Initialize the vect stmts of NODE to properly insert the generated
2744 stmts later. */
2745 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2746 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2747 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2749 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2750 for (i = 0; i < ncopies; i++)
2752 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2753 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2755 /* Generate the permute statement. */
2756 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2757 first_vec, second_vec, mask);
2758 data_ref = make_ssa_name (perm_dest, perm_stmt);
2759 gimple_set_lhs (perm_stmt, data_ref);
2760 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2762 /* Store the vector statement in NODE. */
2763 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2764 stride * i + vect_stmts_counter, perm_stmt);
2766 first_vec_indx += stride;
2767 second_vec_indx += stride;
2770 /* Mark the scalar stmt as vectorized. */
2771 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2772 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2776 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2777 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2778 representation. Check that the mask is valid and return FALSE if not.
2779 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2780 the next vector, i.e., the current first vector is not needed. */
2782 static bool
2783 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2784 int mask_nunits, bool only_one_vec, int index,
2785 unsigned char *mask, int *current_mask_element,
2786 bool *need_next_vector, int *number_of_mask_fixes,
2787 bool *mask_fixed, bool *needs_first_vector)
2789 int i;
2791 /* Convert to target specific representation. */
2792 *current_mask_element = first_mask_element + m;
2793 /* Adjust the value in case it's a mask for second and third vectors. */
2794 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2796 if (*current_mask_element < mask_nunits)
2797 *needs_first_vector = true;
2799 /* We have only one input vector to permute but the mask accesses values in
2800 the next vector as well. */
2801 if (only_one_vec && *current_mask_element >= mask_nunits)
2803 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2806 "permutation requires at least two vectors ");
2807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2810 return false;
2813 /* The mask requires the next vector. */
2814 if (*current_mask_element >= mask_nunits * 2)
2816 if (*needs_first_vector || *mask_fixed)
2818 /* We either need the first vector too or have already moved to the
2819 next vector. In both cases, this permutation needs three
2820 vectors. */
2821 if (dump_enabled_p ())
2823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2824 "permutation requires at "
2825 "least three vectors ");
2826 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2829 return false;
2832 /* We move to the next vector, dropping the first one and working with
2833 the second and the third - we need to adjust the values of the mask
2834 accordingly. */
2835 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2837 for (i = 0; i < index; i++)
2838 mask[i] -= mask_nunits * *number_of_mask_fixes;
2840 (*number_of_mask_fixes)++;
2841 *mask_fixed = true;
2844 *need_next_vector = *mask_fixed;
2846 /* This was the last element of this mask. Start a new one. */
2847 if (index == mask_nunits - 1)
2849 *number_of_mask_fixes = 1;
2850 *mask_fixed = false;
2851 *needs_first_vector = false;
2854 return true;
2858 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2859 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2860 permute statements for SLP_NODE_INSTANCE. */
2861 bool
2862 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2863 gimple_stmt_iterator *gsi, int vf,
2864 slp_instance slp_node_instance, bool analyze_only)
2866 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2867 tree mask_element_type = NULL_TREE, mask_type;
2868 int i, j, k, nunits, vec_index = 0, scalar_index;
2869 slp_tree node;
2870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2871 gimple next_scalar_stmt;
2872 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2873 int first_mask_element;
2874 int index, unroll_factor, current_mask_element, ncopies;
2875 unsigned char *mask;
2876 bool only_one_vec = false, need_next_vector = false;
2877 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2878 int number_of_mask_fixes = 1;
2879 bool mask_fixed = false;
2880 bool needs_first_vector = false;
2881 enum machine_mode mode;
2883 mode = TYPE_MODE (vectype);
2885 if (!can_vec_perm_p (mode, false, NULL))
2887 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2890 "no vect permute for ");
2891 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2893 return false;
2896 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2897 same size as the vector element being permuted. */
2898 mask_element_type = lang_hooks.types.type_for_mode
2899 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2900 mask_type = get_vectype_for_scalar_type (mask_element_type);
2901 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2902 mask = XALLOCAVEC (unsigned char, nunits);
2903 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2905 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2906 unrolling factor. */
2907 orig_vec_stmts_num = group_size *
2908 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2909 if (orig_vec_stmts_num == 1)
2910 only_one_vec = true;
2912 /* Number of copies is determined by the final vectorization factor
2913 relatively to SLP_NODE_INSTANCE unrolling factor. */
2914 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2916 /* Generate permutation masks for every NODE. Number of masks for each NODE
2917 is equal to GROUP_SIZE.
2918 E.g., we have a group of three nodes with three loads from the same
2919 location in each node, and the vector size is 4. I.e., we have a
2920 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2921 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2922 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2925 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2926 The last mask is illegal since we assume two operands for permute
2927 operation, and the mask element values can't be outside that range.
2928 Hence, the last mask must be converted into {2,5,5,5}.
2929 For the first two permutations we need the first and the second input
2930 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2931 we need the second and the third vectors: {b1,c1,a2,b2} and
2932 {c2,a3,b3,c3}. */
2934 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2936 scalar_index = 0;
2937 index = 0;
2938 vect_stmts_counter = 0;
2939 vec_index = 0;
2940 first_vec_index = vec_index++;
2941 if (only_one_vec)
2942 second_vec_index = first_vec_index;
2943 else
2944 second_vec_index = vec_index++;
2946 for (j = 0; j < unroll_factor; j++)
2948 for (k = 0; k < group_size; k++)
2950 first_mask_element = i + j * group_size;
2951 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2952 nunits, only_one_vec, index,
2953 mask, &current_mask_element,
2954 &need_next_vector,
2955 &number_of_mask_fixes, &mask_fixed,
2956 &needs_first_vector))
2957 return false;
2958 mask[index++] = current_mask_element;
2960 if (index == nunits)
2962 tree mask_vec, *mask_elts;
2963 int l;
2965 if (!can_vec_perm_p (mode, false, mask))
2967 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2970 vect_location,
2971 "unsupported vect permute { ");
2972 for (i = 0; i < nunits; ++i)
2973 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2974 mask[i]);
2975 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2977 return false;
2980 mask_elts = XALLOCAVEC (tree, nunits);
2981 for (l = 0; l < nunits; ++l)
2982 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2983 mask_vec = build_vector (mask_type, mask_elts);
2984 index = 0;
2986 if (!analyze_only)
2988 if (need_next_vector)
2990 first_vec_index = second_vec_index;
2991 second_vec_index = vec_index;
2994 next_scalar_stmt = VEC_index (gimple,
2995 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2997 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2998 mask_vec, first_vec_index, second_vec_index,
2999 gsi, node, vectype, dr_chain,
3000 ncopies, vect_stmts_counter++);
3007 return true;
3012 /* Vectorize SLP instance tree in postorder. */
3014 static bool
3015 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3016 unsigned int vectorization_factor)
3018 gimple stmt;
3019 bool grouped_store, is_store;
3020 gimple_stmt_iterator si;
3021 stmt_vec_info stmt_info;
3022 unsigned int vec_stmts_size, nunits, group_size;
3023 tree vectype;
3024 int i;
3025 slp_tree loads_node;
3026 slp_void_p child;
3028 if (!node)
3029 return false;
3031 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
3032 vect_schedule_slp_instance ((slp_tree) child, instance,
3033 vectorization_factor);
3035 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
3036 stmt_info = vinfo_for_stmt (stmt);
3038 /* VECTYPE is the type of the destination. */
3039 vectype = STMT_VINFO_VECTYPE (stmt_info);
3040 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3041 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3043 /* For each SLP instance calculate number of vector stmts to be created
3044 for the scalar stmts in each node of the SLP tree. Number of vector
3045 elements in one vector iteration is the number of scalar elements in
3046 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3047 size. */
3048 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3050 /* In case of load permutation we have to allocate vectorized statements for
3051 all the nodes that participate in that permutation. */
3052 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
3054 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
3056 if (!SLP_TREE_VEC_STMTS (loads_node))
3058 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
3059 vec_stmts_size);
3060 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3065 if (!SLP_TREE_VEC_STMTS (node))
3067 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
3068 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3071 if (dump_enabled_p ())
3073 dump_printf_loc (MSG_NOTE,vect_location,
3074 "------>vectorizing SLP node starting from: ");
3075 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3078 /* Loads should be inserted before the first load. */
3079 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3080 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3081 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3082 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
3083 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3084 else if (is_pattern_stmt_p (stmt_info))
3085 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3086 else
3087 si = gsi_for_stmt (stmt);
3089 /* Stores should be inserted just before the last store. */
3090 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3091 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3093 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3094 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3095 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3096 si = gsi_for_stmt (last_store);
3099 /* Mark the first element of the reduction chain as reduction to properly
3100 transform the node. In the analysis phase only the last element of the
3101 chain is marked as reduction. */
3102 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3103 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3105 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3106 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3109 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3110 return is_store;
3113 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3114 For loop vectorization this is done in vectorizable_call, but for SLP
3115 it needs to be deferred until end of vect_schedule_slp, because multiple
3116 SLP instances may refer to the same scalar stmt. */
3118 static void
3119 vect_remove_slp_scalar_calls (slp_tree node)
3121 gimple stmt, new_stmt;
3122 gimple_stmt_iterator gsi;
3123 int i;
3124 slp_void_p child;
3125 tree lhs;
3126 stmt_vec_info stmt_info;
3128 if (!node)
3129 return;
3131 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
3132 vect_remove_slp_scalar_calls ((slp_tree) child);
3134 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
3136 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3137 continue;
3138 stmt_info = vinfo_for_stmt (stmt);
3139 if (stmt_info == NULL
3140 || is_pattern_stmt_p (stmt_info)
3141 || !PURE_SLP_STMT (stmt_info))
3142 continue;
3143 lhs = gimple_call_lhs (stmt);
3144 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3145 set_vinfo_for_stmt (new_stmt, stmt_info);
3146 set_vinfo_for_stmt (stmt, NULL);
3147 STMT_VINFO_STMT (stmt_info) = new_stmt;
3148 gsi = gsi_for_stmt (stmt);
3149 gsi_replace (&gsi, new_stmt, false);
3150 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3154 /* Generate vector code for all SLP instances in the loop/basic block. */
3156 bool
3157 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3159 VEC (slp_instance, heap) *slp_instances;
3160 slp_instance instance;
3161 unsigned int i, vf;
3162 bool is_store = false;
3164 if (loop_vinfo)
3166 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3167 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3169 else
3171 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3172 vf = 1;
3175 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3177 /* Schedule the tree of INSTANCE. */
3178 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3179 instance, vf);
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE, vect_location,
3182 "vectorizing stmts using SLP.");
3185 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
3187 slp_tree root = SLP_INSTANCE_TREE (instance);
3188 gimple store;
3189 unsigned int j;
3190 gimple_stmt_iterator gsi;
3192 vect_remove_slp_scalar_calls (root);
3194 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3195 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3197 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3198 break;
3200 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3201 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3202 /* Free the attached stmt_vec_info and remove the stmt. */
3203 gsi = gsi_for_stmt (store);
3204 unlink_stmt_vdef (store);
3205 gsi_remove (&gsi, true);
3206 release_defs (store);
3207 free_stmt_vec_info (store);
3211 return is_store;
3215 /* Vectorize the basic block. */
3217 void
3218 vect_slp_transform_bb (basic_block bb)
3220 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3221 gimple_stmt_iterator si;
3223 gcc_assert (bb_vinfo);
3225 if (dump_enabled_p ())
3226 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3228 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3230 gimple stmt = gsi_stmt (si);
3231 stmt_vec_info stmt_info;
3233 if (dump_enabled_p ())
3235 dump_printf_loc (MSG_NOTE, vect_location,
3236 "------>SLPing statement: ");
3237 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3240 stmt_info = vinfo_for_stmt (stmt);
3241 gcc_assert (stmt_info);
3243 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3244 if (STMT_SLP_TYPE (stmt_info))
3246 vect_schedule_slp (NULL, bb_vinfo);
3247 break;
3251 if (dump_enabled_p ())
3252 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3254 destroy_bb_vec_info (bb_vinfo);