tree-vect-stmts.c (vectorizable_store): Accept BIT_FIELD_REF.
[official-gcc.git] / gcc / tree-vect-slp.c
bloba9cf6920cf68656d9867c815a9e1bfe3030368e7
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
41 /* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
44 LOC
45 find_bb_location (basic_block bb)
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
50 if (!bb)
51 return UNKNOWN_LOC;
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
60 return UNKNOWN_LOC;
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66 static void
67 vect_free_slp_tree (slp_tree node)
69 int i;
70 slp_void_p child;
72 if (!node)
73 return;
75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76 vect_free_slp_tree ((slp_tree) child);
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
82 free (node);
86 /* Free the memory allocated for the SLP instance. */
88 void
89 vect_free_slp_instance (slp_instance instance)
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95 free (instance);
99 /* Create an SLP node for SCALAR_STMTS. */
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
104 slp_tree node;
105 gimple stmt = scalar_stmts[0];
106 unsigned int nops;
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
116 else
117 return NULL;
119 node = XNEW (struct _slp_tree);
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
124 return node;
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
130 static vec<slp_oprnd_info>
131 vect_create_oprnd_info (int nops, int group_size)
133 int i;
134 slp_oprnd_info oprnd_info;
135 vec<slp_oprnd_info> oprnds_info;
137 oprnds_info.create (nops);
138 for (i = 0; i < nops; i++)
140 oprnd_info = XNEW (struct _slp_oprnd_info);
141 oprnd_info->def_stmts.create (group_size);
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
146 oprnds_info.quick_push (oprnd_info);
149 return oprnds_info;
153 /* Free operands info. */
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
158 int i;
159 slp_oprnd_info oprnd_info;
161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
163 oprnd_info->def_stmts.release ();
164 XDELETE (oprnd_info);
167 oprnds_info.release ();
171 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
175 static bool
176 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
177 slp_tree slp_node, gimple stmt,
178 int ncopies_for_cost, bool first,
179 vec<slp_oprnd_info> *oprnds_info,
180 stmt_vector_for_cost *prologue_cost_vec,
181 stmt_vector_for_cost *body_cost_vec)
183 tree oprnd;
184 unsigned int i, number_of_oprnds;
185 tree def, def_op0 = NULL_TREE;
186 gimple def_stmt;
187 enum vect_def_type dt = vect_uninitialized_def;
188 enum vect_def_type dt_op0 = vect_uninitialized_def;
189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
190 tree lhs = gimple_get_lhs (stmt);
191 struct loop *loop = NULL;
192 enum tree_code rhs_code;
193 bool different_types = false;
194 bool pattern = false;
195 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
196 int op_idx = 1;
197 tree compare_rhs = NULL_TREE;
199 if (loop_vinfo)
200 loop = LOOP_VINFO_LOOP (loop_vinfo);
202 if (is_gimple_call (stmt))
204 number_of_oprnds = gimple_call_num_args (stmt);
205 op_idx = 3;
207 else if (is_gimple_assign (stmt))
209 number_of_oprnds = gimple_num_ops (stmt) - 1;
210 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
211 number_of_oprnds++;
213 else
214 return false;
216 for (i = 0; i < number_of_oprnds; i++)
218 if (compare_rhs)
220 oprnd = compare_rhs;
221 compare_rhs = NULL_TREE;
223 else
224 oprnd = gimple_op (stmt, op_idx++);
226 oprnd_info = (*oprnds_info)[i];
228 if (COMPARISON_CLASS_P (oprnd))
230 compare_rhs = TREE_OPERAND (oprnd, 1);
231 oprnd = TREE_OPERAND (oprnd, 0);
234 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
235 &def, &dt)
236 || (!def_stmt && dt != vect_constant_def))
238 if (dump_enabled_p ())
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
241 "Build SLP failed: can't find def for ");
242 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
245 return false;
248 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
249 from the pattern. Check that all the stmts of the node are in the
250 pattern. */
251 if (def_stmt && gimple_bb (def_stmt)
252 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
253 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
254 && gimple_code (def_stmt) != GIMPLE_PHI))
255 && vinfo_for_stmt (def_stmt)
256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
260 pattern = true;
261 if (!first && !oprnd_info->first_pattern)
263 if (dump_enabled_p ())
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266 "Build SLP failed: some of the stmts"
267 " are in a pattern, and others are not ");
268 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
271 return false;
274 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
275 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
277 if (dt == vect_unknown_def_type)
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
281 "Unsupported pattern.");
282 return false;
285 switch (gimple_code (def_stmt))
287 case GIMPLE_PHI:
288 def = gimple_phi_result (def_stmt);
289 break;
291 case GIMPLE_ASSIGN:
292 def = gimple_assign_lhs (def_stmt);
293 break;
295 default:
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "unsupported defining stmt: ");
299 return false;
303 if (first)
305 oprnd_info->first_dt = dt;
306 oprnd_info->first_pattern = pattern;
307 if (def)
309 oprnd_info->first_def_type = TREE_TYPE (def);
310 oprnd_info->first_const_oprnd = NULL_TREE;
312 else
314 oprnd_info->first_def_type = NULL_TREE;
315 oprnd_info->first_const_oprnd = oprnd;
318 if (i == 0)
320 def_op0 = def;
321 dt_op0 = dt;
322 /* Analyze costs (for the first stmt of the group only). */
323 if (REFERENCE_CLASS_P (lhs))
324 /* Store. */
325 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
326 dt, slp_node, prologue_cost_vec,
327 body_cost_vec);
328 else
330 enum vect_def_type dts[2];
331 dts[0] = dt;
332 dts[1] = vect_uninitialized_def;
333 /* Not memory operation (we don't call this function for
334 loads). */
335 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
336 prologue_cost_vec, body_cost_vec);
340 else
342 /* Not first stmt of the group, check that the def-stmt/s match
343 the def-stmt/s of the first stmt. Allow different definition
344 types for reduction chains: the first stmt must be a
345 vect_reduction_def (a phi node), and the rest
346 vect_internal_def. */
347 if (((oprnd_info->first_dt != dt
348 && !(oprnd_info->first_dt == vect_reduction_def
349 && dt == vect_internal_def))
350 || (oprnd_info->first_def_type != NULL_TREE
351 && def
352 && !types_compatible_p (oprnd_info->first_def_type,
353 TREE_TYPE (def))))
354 || (!def
355 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
356 TREE_TYPE (oprnd)))
357 || different_types)
359 if (number_of_oprnds != 2)
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
363 "Build SLP failed: different types ");
365 return false;
368 /* Try to swap operands in case of binary operation. */
369 if (i == 0)
370 different_types = true;
371 else
373 oprnd0_info = (*oprnds_info)[0];
374 if (is_gimple_assign (stmt)
375 && (rhs_code = gimple_assign_rhs_code (stmt))
376 && TREE_CODE_CLASS (rhs_code) == tcc_binary
377 && commutative_tree_code (rhs_code)
378 && oprnd0_info->first_dt == dt
379 && oprnd_info->first_dt == dt_op0
380 && def_op0 && def
381 && !(oprnd0_info->first_def_type
382 && !types_compatible_p (oprnd0_info->first_def_type,
383 TREE_TYPE (def)))
384 && !(oprnd_info->first_def_type
385 && !types_compatible_p (oprnd_info->first_def_type,
386 TREE_TYPE (def_op0))))
388 if (dump_enabled_p ())
390 dump_printf_loc (MSG_NOTE, vect_location,
391 "Swapping operands of ");
392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
395 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
396 gimple_assign_rhs2_ptr (stmt));
398 else
400 if (dump_enabled_p ())
401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
402 "Build SLP failed: different types ");
404 return false;
410 /* Check the types of the definitions. */
411 switch (dt)
413 case vect_constant_def:
414 case vect_external_def:
415 case vect_reduction_def:
416 break;
418 case vect_internal_def:
419 if (different_types)
421 oprnd0_info = (*oprnds_info)[0];
422 oprnd1_info = (*oprnds_info)[0];
423 if (i == 0)
424 oprnd1_info->def_stmts.quick_push (def_stmt);
425 else
426 oprnd0_info->def_stmts.quick_push (def_stmt);
428 else
429 oprnd_info->def_stmts.quick_push (def_stmt);
431 break;
433 default:
434 /* FORNOW: Not supported. */
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: illegal type of def ");
439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
442 return false;
446 return true;
450 /* Recursively build an SLP tree starting from NODE.
451 Fail (and return FALSE) if def-stmts are not isomorphic, require data
452 permutation or are of unsupported types of operation. Otherwise, return
453 TRUE. */
455 static bool
456 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
457 slp_tree *node, unsigned int group_size, int *outside_cost,
458 int ncopies_for_cost, unsigned int *max_nunits,
459 vec<int> *load_permutation,
460 vec<slp_tree> *loads,
461 unsigned int vectorization_factor, bool *loads_permuted,
462 stmt_vector_for_cost *prologue_cost_vec,
463 stmt_vector_for_cost *body_cost_vec)
465 unsigned int i;
466 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
467 gimple stmt = stmts[0];
468 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
469 enum tree_code first_cond_code = ERROR_MARK;
470 tree lhs;
471 bool stop_recursion = false, need_same_oprnds = false;
472 tree vectype, scalar_type, first_op1 = NULL_TREE;
473 unsigned int ncopies;
474 optab optab;
475 int icode;
476 enum machine_mode optab_op2_mode;
477 enum machine_mode vec_mode;
478 struct data_reference *first_dr;
479 HOST_WIDE_INT dummy;
480 bool permutation = false;
481 unsigned int load_place;
482 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
483 vec<slp_oprnd_info> oprnds_info;
484 unsigned int nops;
485 slp_oprnd_info oprnd_info;
486 tree cond;
488 if (is_gimple_call (stmt))
489 nops = gimple_call_num_args (stmt);
490 else if (is_gimple_assign (stmt))
492 nops = gimple_num_ops (stmt) - 1;
493 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
494 nops++;
496 else
497 return false;
499 oprnds_info = vect_create_oprnd_info (nops, group_size);
501 /* For every stmt in NODE find its def stmt/s. */
502 FOR_EACH_VEC_ELT (stmts, i, stmt)
504 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
507 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
510 /* Fail to vectorize statements marked as unvectorizable. */
511 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
513 if (dump_enabled_p ())
515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
516 "Build SLP failed: unvectorizable statement ");
517 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
520 vect_free_oprnd_info (oprnds_info);
521 return false;
524 lhs = gimple_get_lhs (stmt);
525 if (lhs == NULL_TREE)
527 if (dump_enabled_p ())
529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
530 "Build SLP failed: not GIMPLE_ASSIGN nor "
531 "GIMPLE_CALL ");
532 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
535 vect_free_oprnd_info (oprnds_info);
536 return false;
539 if (is_gimple_assign (stmt)
540 && gimple_assign_rhs_code (stmt) == COND_EXPR
541 && (cond = gimple_assign_rhs1 (stmt))
542 && !COMPARISON_CLASS_P (cond))
544 if (dump_enabled_p ())
546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547 "Build SLP failed: condition is not "
548 "comparison ");
549 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
552 vect_free_oprnd_info (oprnds_info);
553 return false;
556 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
557 vectype = get_vectype_for_scalar_type (scalar_type);
558 if (!vectype)
560 if (dump_enabled_p ())
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
563 "Build SLP failed: unsupported data-type ");
564 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
565 scalar_type);
568 vect_free_oprnd_info (oprnds_info);
569 return false;
572 /* In case of multiple types we need to detect the smallest type. */
573 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
575 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
576 if (bb_vinfo)
577 vectorization_factor = *max_nunits;
580 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
582 if (is_gimple_call (stmt))
584 rhs_code = CALL_EXPR;
585 if (gimple_call_internal_p (stmt)
586 || gimple_call_tail_p (stmt)
587 || gimple_call_noreturn_p (stmt)
588 || !gimple_call_nothrow_p (stmt)
589 || gimple_call_chain (stmt))
591 if (dump_enabled_p ())
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: unsupported call type ");
595 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
598 vect_free_oprnd_info (oprnds_info);
599 return false;
602 else
603 rhs_code = gimple_assign_rhs_code (stmt);
605 /* Check the operation. */
606 if (i == 0)
608 first_stmt_code = rhs_code;
610 /* Shift arguments should be equal in all the packed stmts for a
611 vector shift with scalar shift operand. */
612 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
613 || rhs_code == LROTATE_EXPR
614 || rhs_code == RROTATE_EXPR)
616 vec_mode = TYPE_MODE (vectype);
618 /* First see if we have a vector/vector shift. */
619 optab = optab_for_tree_code (rhs_code, vectype,
620 optab_vector);
622 if (!optab
623 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
625 /* No vector/vector shift, try for a vector/scalar shift. */
626 optab = optab_for_tree_code (rhs_code, vectype,
627 optab_scalar);
629 if (!optab)
631 if (dump_enabled_p ())
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633 "Build SLP failed: no optab.");
634 vect_free_oprnd_info (oprnds_info);
635 return false;
637 icode = (int) optab_handler (optab, vec_mode);
638 if (icode == CODE_FOR_nothing)
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: "
643 "op not supported by target.");
644 vect_free_oprnd_info (oprnds_info);
645 return false;
647 optab_op2_mode = insn_data[icode].operand[2].mode;
648 if (!VECTOR_MODE_P (optab_op2_mode))
650 need_same_oprnds = true;
651 first_op1 = gimple_assign_rhs2 (stmt);
655 else if (rhs_code == WIDEN_LSHIFT_EXPR)
657 need_same_oprnds = true;
658 first_op1 = gimple_assign_rhs2 (stmt);
661 else
663 if (first_stmt_code != rhs_code
664 && (first_stmt_code != IMAGPART_EXPR
665 || rhs_code != REALPART_EXPR)
666 && (first_stmt_code != REALPART_EXPR
667 || rhs_code != IMAGPART_EXPR)
668 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
669 && (first_stmt_code == ARRAY_REF
670 || first_stmt_code == BIT_FIELD_REF
671 || first_stmt_code == INDIRECT_REF
672 || first_stmt_code == COMPONENT_REF
673 || first_stmt_code == MEM_REF)))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
678 "Build SLP failed: different operation "
679 "in stmt ");
680 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
683 vect_free_oprnd_info (oprnds_info);
684 return false;
687 if (need_same_oprnds
688 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
693 "Build SLP failed: different shift "
694 "arguments in ");
695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
698 vect_free_oprnd_info (oprnds_info);
699 return false;
702 if (rhs_code == CALL_EXPR)
704 gimple first_stmt = stmts[0];
705 if (gimple_call_num_args (stmt) != nops
706 || !operand_equal_p (gimple_call_fn (first_stmt),
707 gimple_call_fn (stmt), 0)
708 || gimple_call_fntype (first_stmt)
709 != gimple_call_fntype (stmt))
711 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "Build SLP failed: different calls in ");
715 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
716 stmt, 0);
719 vect_free_oprnd_info (oprnds_info);
720 return false;
725 /* Grouped store or load. */
726 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
728 if (REFERENCE_CLASS_P (lhs))
730 /* Store. */
731 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
732 stmt, ncopies_for_cost,
733 (i == 0), &oprnds_info,
734 prologue_cost_vec,
735 body_cost_vec))
737 vect_free_oprnd_info (oprnds_info);
738 return false;
741 else
743 /* Load. */
744 /* FORNOW: Check that there is no gap between the loads
745 and no gap between the groups when we need to load
746 multiple groups at once.
747 ??? We should enhance this to only disallow gaps
748 inside vectors. */
749 if ((ncopies > 1
750 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
751 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
752 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
753 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
755 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: grouped "
759 "loads have gaps ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
761 stmt, 0);
764 vect_free_oprnd_info (oprnds_info);
765 return false;
768 /* Check that the size of interleaved loads group is not
769 greater than the SLP group size. */
770 if (loop_vinfo
771 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
772 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
773 - GROUP_GAP (vinfo_for_stmt (stmt)))
774 > ncopies * group_size))
776 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
779 "Build SLP failed: the number "
780 "of interleaved loads is greater than "
781 "the SLP group size ");
782 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
783 stmt, 0);
786 vect_free_oprnd_info (oprnds_info);
787 return false;
790 old_first_load = first_load;
791 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
792 if (prev_first_load)
794 /* Check that there are no loads from different interleaving
795 chains in the same node. The only exception is complex
796 numbers. */
797 if (prev_first_load != first_load
798 && rhs_code != REALPART_EXPR
799 && rhs_code != IMAGPART_EXPR)
801 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
804 vect_location,
805 "Build SLP failed: different "
806 "interleaving chains in one node ");
807 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
808 stmt, 0);
811 vect_free_oprnd_info (oprnds_info);
812 return false;
815 else
816 prev_first_load = first_load;
818 /* In some cases a group of loads is just the same load
819 repeated N times. Only analyze its cost once. */
820 if (first_load == stmt && old_first_load != first_load)
822 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
823 if (vect_supportable_dr_alignment (first_dr, false)
824 == dr_unaligned_unsupported)
826 if (dump_enabled_p ())
828 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 vect_location,
830 "Build SLP failed: unsupported "
831 "unaligned load ");
832 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
833 stmt, 0);
836 vect_free_oprnd_info (oprnds_info);
837 return false;
840 /* Analyze costs (for the first stmt in the group). */
841 vect_model_load_cost (vinfo_for_stmt (stmt),
842 ncopies_for_cost, false, *node,
843 prologue_cost_vec, body_cost_vec);
846 /* Store the place of this load in the interleaving chain. In
847 case that permutation is needed we later decide if a specific
848 permutation is supported. */
849 load_place = vect_get_place_in_interleaving_chain (stmt,
850 first_load);
851 if (load_place != i)
852 permutation = true;
854 load_permutation->safe_push (load_place);
856 /* We stop the tree when we reach a group of loads. */
857 stop_recursion = true;
858 continue;
860 } /* Grouped access. */
861 else
863 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
865 /* Not grouped load. */
866 if (dump_enabled_p ())
868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
869 "Build SLP failed: not grouped load ");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
873 /* FORNOW: Not grouped loads are not supported. */
874 vect_free_oprnd_info (oprnds_info);
875 return false;
878 /* Not memory operation. */
879 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
880 && TREE_CODE_CLASS (rhs_code) != tcc_unary
881 && rhs_code != COND_EXPR
882 && rhs_code != CALL_EXPR)
884 if (dump_enabled_p ())
886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
887 "Build SLP failed: operation");
888 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
892 vect_free_oprnd_info (oprnds_info);
893 return false;
896 if (rhs_code == COND_EXPR)
898 tree cond_expr = gimple_assign_rhs1 (stmt);
900 if (i == 0)
901 first_cond_code = TREE_CODE (cond_expr);
902 else if (first_cond_code != TREE_CODE (cond_expr))
904 if (dump_enabled_p ())
906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
907 "Build SLP failed: different"
908 " operation");
909 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
910 stmt, 0);
913 vect_free_oprnd_info (oprnds_info);
914 return false;
918 /* Find the def-stmts. */
919 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
920 ncopies_for_cost, (i == 0),
921 &oprnds_info, prologue_cost_vec,
922 body_cost_vec))
924 vect_free_oprnd_info (oprnds_info);
925 return false;
930 /* Grouped loads were reached - stop the recursion. */
931 if (stop_recursion)
933 loads->safe_push (*node);
934 if (permutation)
936 gimple first_stmt = stmts[0];
937 *loads_permuted = true;
938 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
939 vinfo_for_stmt (first_stmt), 0, vect_body);
941 else
943 /* We don't check here complex numbers chains, so we set
944 LOADS_PERMUTED for further check in
945 vect_supported_load_permutation_p. */
946 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
947 *loads_permuted = true;
950 vect_free_oprnd_info (oprnds_info);
951 return true;
954 /* Create SLP_TREE nodes for the definition node/s. */
955 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
957 slp_tree child;
959 if (oprnd_info->first_dt != vect_internal_def)
960 continue;
962 child = vect_create_new_slp_node (oprnd_info->def_stmts);
963 if (!child
964 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
965 outside_cost, ncopies_for_cost,
966 max_nunits, load_permutation, loads,
967 vectorization_factor, loads_permuted,
968 prologue_cost_vec, body_cost_vec))
970 if (child)
971 oprnd_info->def_stmts = vNULL;
972 vect_free_slp_tree (child);
973 vect_free_oprnd_info (oprnds_info);
974 return false;
977 oprnd_info->def_stmts.create (0);
978 SLP_TREE_CHILDREN (*node).quick_push (child);
981 vect_free_oprnd_info (oprnds_info);
982 return true;
985 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
987 static void
988 vect_print_slp_tree (int dump_kind, slp_tree node)
990 int i;
991 gimple stmt;
992 slp_void_p child;
994 if (!node)
995 return;
997 dump_printf (dump_kind, "node ");
998 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1000 dump_printf (dump_kind, "\n\tstmt %d ", i);
1001 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1003 dump_printf (dump_kind, "\n");
1005 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1006 vect_print_slp_tree (dump_kind, (slp_tree) child);
1010 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1011 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1012 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1013 stmts in NODE are to be marked. */
1015 static void
1016 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1018 int i;
1019 gimple stmt;
1020 slp_void_p child;
1022 if (!node)
1023 return;
1025 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1026 if (j < 0 || i == j)
1027 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1029 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1030 vect_mark_slp_stmts ((slp_tree) child, mark, j);
1034 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1036 static void
1037 vect_mark_slp_stmts_relevant (slp_tree node)
1039 int i;
1040 gimple stmt;
1041 stmt_vec_info stmt_info;
1042 slp_void_p child;
1044 if (!node)
1045 return;
1047 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1049 stmt_info = vinfo_for_stmt (stmt);
1050 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1051 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1052 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1055 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1056 vect_mark_slp_stmts_relevant ((slp_tree) child);
1060 /* Check if the permutation required by the SLP INSTANCE is supported.
1061 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1063 static bool
1064 vect_supported_slp_permutation_p (slp_instance instance)
1066 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1067 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1068 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1069 vec<slp_tree> sorted_loads = vNULL;
1070 int index;
1071 slp_tree *tmp_loads = NULL;
1072 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1073 slp_tree load;
1075 /* FORNOW: The only supported loads permutation is loads from the same
1076 location in all the loads in the node, when the data-refs in
1077 nodes of LOADS constitute an interleaving chain.
1078 Sort the nodes according to the order of accesses in the chain. */
1079 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1080 for (i = 0, j = 0;
1081 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1082 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1083 i += group_size, j++)
1085 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1086 /* Check that the loads are all in the same interleaving chain. */
1087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1089 if (dump_enabled_p ())
1091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1092 "Build SLP failed: unsupported data "
1093 "permutation ");
1094 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1095 scalar_stmt, 0);
1098 free (tmp_loads);
1099 return false;
1102 tmp_loads[index] = load;
1105 sorted_loads.create (group_size);
1106 for (i = 0; i < group_size; i++)
1107 sorted_loads.safe_push (tmp_loads[i]);
1109 SLP_INSTANCE_LOADS (instance).release ();
1110 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1111 free (tmp_loads);
1113 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1114 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1115 instance, true))
1116 return false;
1118 return true;
1122 /* Rearrange the statements of NODE according to PERMUTATION. */
1124 static void
1125 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1126 vec<int> permutation)
1128 gimple stmt;
1129 vec<gimple> tmp_stmts;
1130 unsigned int index, i;
1131 slp_void_p child;
1133 if (!node)
1134 return;
1136 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1137 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1139 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1140 tmp_stmts.create (group_size);
1142 for (i = 0; i < group_size; i++)
1143 tmp_stmts.safe_push (NULL);
1145 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1147 index = permutation[i];
1148 tmp_stmts[index] = stmt;
1151 SLP_TREE_SCALAR_STMTS (node).release ();
1152 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1156 /* Check if the required load permutation is supported.
1157 LOAD_PERMUTATION contains a list of indices of the loads.
1158 In SLP this permutation is relative to the order of grouped stores that are
1159 the base of the SLP instance. */
1161 static bool
1162 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1163 vec<int> load_permutation)
1165 int i = 0, j, prev = -1, next, k, number_of_groups;
1166 bool supported, bad_permutation = false;
1167 sbitmap load_index;
1168 slp_tree node, other_complex_node;
1169 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1170 unsigned complex_numbers = 0;
1171 struct data_reference *dr;
1172 bb_vec_info bb_vinfo;
1174 /* FORNOW: permutations are only supported in SLP. */
1175 if (!slp_instn)
1176 return false;
1178 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1181 FOR_EACH_VEC_ELT (load_permutation, i, next)
1182 dump_printf (MSG_NOTE, "%d ", next);
1185 /* In case of reduction every load permutation is allowed, since the order
1186 of the reduction statements is not important (as opposed to the case of
1187 grouped stores). The only condition we need to check is that all the
1188 load nodes are of the same size and have the same permutation (and then
1189 rearrange all the nodes of the SLP instance according to this
1190 permutation). */
1192 /* Check that all the load nodes are of the same size. */
1193 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1195 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1196 return false;
1198 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1199 if (is_gimple_assign (stmt)
1200 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1201 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1202 complex_numbers++;
1205 /* Complex operands can be swapped as following:
1206 real_c = real_b + real_a;
1207 imag_c = imag_a + imag_b;
1208 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1209 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1210 chains are mixed, they match the above pattern. */
1211 if (complex_numbers)
1213 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1215 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1217 if (j == 0)
1218 first = stmt;
1219 else
1221 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1223 if (complex_numbers != 2)
1224 return false;
1226 if (i == 0)
1227 k = 1;
1228 else
1229 k = 0;
1231 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1232 other_node_first =
1233 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1235 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1236 != other_node_first)
1237 return false;
1244 /* We checked that this case ok, so there is no need to proceed with
1245 permutation tests. */
1246 if (complex_numbers == 2
1247 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1249 SLP_INSTANCE_LOADS (slp_instn).release ();
1250 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1251 return true;
1254 node = SLP_INSTANCE_TREE (slp_instn);
1255 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1256 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1257 instance, not all the loads belong to the same node or interleaving
1258 group. Hence, we need to divide them into groups according to
1259 GROUP_SIZE. */
1260 number_of_groups = load_permutation.length () / group_size;
1262 /* Reduction (there are no data-refs in the root).
1263 In reduction chain the order of the loads is important. */
1264 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1265 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1267 int first_group_load_index;
1269 /* Compare all the permutation sequences to the first one. */
1270 for (i = 1; i < number_of_groups; i++)
1272 k = 0;
1273 for (j = i * group_size; j < i * group_size + group_size; j++)
1275 next = load_permutation[j];
1276 first_group_load_index = load_permutation[k];
1278 if (next != first_group_load_index)
1280 bad_permutation = true;
1281 break;
1284 k++;
1287 if (bad_permutation)
1288 break;
1291 if (!bad_permutation)
1293 /* Check that the loads in the first sequence are different and there
1294 are no gaps between them. */
1295 load_index = sbitmap_alloc (group_size);
1296 bitmap_clear (load_index);
1297 for (k = 0; k < group_size; k++)
1299 first_group_load_index = load_permutation[k];
1300 if (bitmap_bit_p (load_index, first_group_load_index))
1302 bad_permutation = true;
1303 break;
1306 bitmap_set_bit (load_index, first_group_load_index);
1309 if (!bad_permutation)
1310 for (k = 0; k < group_size; k++)
1311 if (!bitmap_bit_p (load_index, k))
1313 bad_permutation = true;
1314 break;
1317 sbitmap_free (load_index);
1320 if (!bad_permutation)
1322 /* This permutation is valid for reduction. Since the order of the
1323 statements in the nodes is not important unless they are memory
1324 accesses, we can rearrange the statements in all the nodes
1325 according to the order of the loads. */
1326 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1327 load_permutation);
1328 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1329 return true;
1333 /* In basic block vectorization we allow any subchain of an interleaving
1334 chain.
1335 FORNOW: not supported in loop SLP because of realignment compications. */
1336 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1337 bad_permutation = false;
1338 /* Check that for every node in the instance the loads form a subchain. */
1339 if (bb_vinfo)
1341 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1343 next_load = NULL;
1344 first_load = NULL;
1345 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1347 if (!first_load)
1348 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1349 else if (first_load
1350 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1352 bad_permutation = true;
1353 break;
1356 if (j != 0 && next_load != load)
1358 bad_permutation = true;
1359 break;
1362 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1365 if (bad_permutation)
1366 break;
1369 /* Check that the alignment of the first load in every subchain, i.e.,
1370 the first statement in every load node, is supported. */
1371 if (!bad_permutation)
1373 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1375 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1376 if (first_load
1377 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1379 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1380 if (vect_supportable_dr_alignment (dr, false)
1381 == dr_unaligned_unsupported)
1383 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1386 vect_location,
1387 "unsupported unaligned load ");
1388 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1389 first_load, 0);
1391 bad_permutation = true;
1392 break;
1397 if (!bad_permutation)
1399 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1400 return true;
1405 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1406 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1407 well (unless it's reduction). */
1408 if (load_permutation.length ()
1409 != (unsigned int) (group_size * group_size))
1410 return false;
1412 supported = true;
1413 load_index = sbitmap_alloc (group_size);
1414 bitmap_clear (load_index);
1415 for (j = 0; j < group_size; j++)
1417 for (i = j * group_size, k = 0;
1418 load_permutation.iterate (i, &next) && k < group_size;
1419 i++, k++)
1421 if (i != j * group_size && next != prev)
1423 supported = false;
1424 break;
1427 prev = next;
1430 if (bitmap_bit_p (load_index, prev))
1432 supported = false;
1433 break;
1436 bitmap_set_bit (load_index, prev);
1439 for (j = 0; j < group_size; j++)
1440 if (!bitmap_bit_p (load_index, j))
1442 sbitmap_free (load_index);
1443 return false;
1446 sbitmap_free (load_index);
1448 if (supported && i == group_size * group_size
1449 && vect_supported_slp_permutation_p (slp_instn))
1450 return true;
1452 return false;
1456 /* Find the first load in the loop that belongs to INSTANCE.
1457 When loads are in several SLP nodes, there can be a case in which the first
1458 load does not appear in the first SLP node to be transformed, causing
1459 incorrect order of statements. Since we generate all the loads together,
1460 they must be inserted before the first load of the SLP instance and not
1461 before the first load of the first node of the instance. */
1463 static gimple
1464 vect_find_first_load_in_slp_instance (slp_instance instance)
1466 int i, j;
1467 slp_tree load_node;
1468 gimple first_load = NULL, load;
1470 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1472 first_load = get_earlier_stmt (load, first_load);
1474 return first_load;
1478 /* Find the last store in SLP INSTANCE. */
1480 static gimple
1481 vect_find_last_store_in_slp_instance (slp_instance instance)
1483 int i;
1484 slp_tree node;
1485 gimple last_store = NULL, store;
1487 node = SLP_INSTANCE_TREE (instance);
1488 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1489 last_store = get_later_stmt (store, last_store);
1491 return last_store;
1495 /* Analyze an SLP instance starting from a group of grouped stores. Call
1496 vect_build_slp_tree to build a tree of packed stmts if possible.
1497 Return FALSE if it's impossible to SLP any stmt in the loop. */
1499 static bool
1500 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1501 gimple stmt)
1503 slp_instance new_instance;
1504 slp_tree node;
1505 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1506 unsigned int unrolling_factor = 1, nunits;
1507 tree vectype, scalar_type = NULL_TREE;
1508 gimple next;
1509 unsigned int vectorization_factor = 0;
1510 int outside_cost = 0, ncopies_for_cost, i;
1511 unsigned int max_nunits = 0;
1512 vec<int> load_permutation;
1513 vec<slp_tree> loads;
1514 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1515 bool loads_permuted = false;
1516 vec<gimple> scalar_stmts;
1517 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1518 stmt_info_for_cost *si;
1520 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1522 if (dr)
1524 scalar_type = TREE_TYPE (DR_REF (dr));
1525 vectype = get_vectype_for_scalar_type (scalar_type);
1527 else
1529 gcc_assert (loop_vinfo);
1530 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1533 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1535 else
1537 gcc_assert (loop_vinfo);
1538 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1539 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1542 if (!vectype)
1544 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1547 "Build SLP failed: unsupported data-type ");
1548 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1551 return false;
1554 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1555 if (loop_vinfo)
1556 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1557 else
1558 vectorization_factor = nunits;
1560 /* Calculate the unrolling factor. */
1561 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1562 if (unrolling_factor != 1 && !loop_vinfo)
1564 if (dump_enabled_p ())
1565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1566 "Build SLP failed: unrolling required in basic"
1567 " block SLP");
1569 return false;
1572 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1573 scalar_stmts.create (group_size);
1574 next = stmt;
1575 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1577 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1578 while (next)
1580 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1581 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1582 scalar_stmts.safe_push (
1583 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1584 else
1585 scalar_stmts.safe_push (next);
1586 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1589 else
1591 /* Collect reduction statements. */
1592 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1593 for (i = 0; reductions.iterate (i, &next); i++)
1594 scalar_stmts.safe_push (next);
1597 node = vect_create_new_slp_node (scalar_stmts);
1599 /* Calculate the number of vector stmts to create based on the unrolling
1600 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1601 GROUP_SIZE / NUNITS otherwise. */
1602 ncopies_for_cost = unrolling_factor * group_size / nunits;
1604 load_permutation.create (group_size * group_size);
1605 loads.create (group_size);
1606 prologue_cost_vec.create (10);
1607 body_cost_vec.create (10);
1609 /* Build the tree for the SLP instance. */
1610 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1611 &outside_cost, ncopies_for_cost,
1612 &max_nunits, &load_permutation, &loads,
1613 vectorization_factor, &loads_permuted,
1614 &prologue_cost_vec, &body_cost_vec))
1616 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1617 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1619 /* Calculate the unrolling factor based on the smallest type. */
1620 if (max_nunits > nunits)
1621 unrolling_factor = least_common_multiple (max_nunits, group_size)
1622 / group_size;
1624 if (unrolling_factor != 1 && !loop_vinfo)
1626 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1628 "Build SLP failed: unrolling required in basic"
1629 " block SLP");
1630 vect_free_slp_tree (node);
1631 body_cost_vec.release ();
1632 prologue_cost_vec.release ();
1633 load_permutation.release ();
1634 loads.release ();
1635 return false;
1638 /* Create a new SLP instance. */
1639 new_instance = XNEW (struct _slp_instance);
1640 SLP_INSTANCE_TREE (new_instance) = node;
1641 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1642 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1643 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1644 SLP_INSTANCE_LOADS (new_instance) = loads;
1645 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1646 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1648 if (loads_permuted)
1650 if (!vect_supported_load_permutation_p (new_instance, group_size,
1651 load_permutation))
1653 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1656 "Build SLP failed: unsupported load "
1657 "permutation ");
1658 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1661 vect_free_slp_instance (new_instance);
1662 prologue_cost_vec.release ();
1663 return false;
1666 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1667 = vect_find_first_load_in_slp_instance (new_instance);
1669 else
1670 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1672 /* Record the prologue costs, which were delayed until we were
1673 sure that SLP was successful. Unlike the body costs, we know
1674 the final values now regardless of the loop vectorization factor. */
1675 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1677 struct _stmt_vec_info *stmt_info
1678 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1679 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1680 si->misalign, vect_prologue);
1683 prologue_cost_vec.release ();
1685 if (loop_vinfo)
1686 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1687 else
1688 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1690 if (dump_enabled_p ())
1691 vect_print_slp_tree (MSG_NOTE, node);
1693 return true;
1695 else
1697 body_cost_vec.release ();
1698 prologue_cost_vec.release ();
1701 /* Failed to SLP. */
1702 /* Free the allocated memory. */
1703 vect_free_slp_tree (node);
1704 load_permutation.release ();
1705 loads.release ();
1707 return false;
1711 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1712 trees of packed scalar stmts if SLP is possible. */
1714 bool
1715 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1717 unsigned int i;
1718 vec<gimple> grouped_stores;
1719 vec<gimple> reductions = vNULL;
1720 vec<gimple> reduc_chains = vNULL;
1721 gimple first_element;
1722 bool ok = false;
1724 if (dump_enabled_p ())
1725 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1727 if (loop_vinfo)
1729 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1730 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1731 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1733 else
1734 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1736 /* Find SLP sequences starting from groups of grouped stores. */
1737 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1738 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1739 ok = true;
1741 if (bb_vinfo && !ok)
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 "Failed to SLP the basic block.");
1747 return false;
1750 if (loop_vinfo
1751 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1753 /* Find SLP sequences starting from reduction chains. */
1754 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1755 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1756 ok = true;
1757 else
1758 return false;
1760 /* Don't try to vectorize SLP reductions if reduction chain was
1761 detected. */
1762 return ok;
1765 /* Find SLP sequences starting from groups of reductions. */
1766 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1767 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1768 ok = true;
1770 return true;
1774 /* For each possible SLP instance decide whether to SLP it and calculate overall
1775 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1776 least one instance. */
1778 bool
1779 vect_make_slp_decision (loop_vec_info loop_vinfo)
1781 unsigned int i, unrolling_factor = 1;
1782 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1783 slp_instance instance;
1784 int decided_to_slp = 0;
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1789 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1791 /* FORNOW: SLP if you can. */
1792 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1793 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1795 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1796 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1797 loop-based vectorization. Such stmts will be marked as HYBRID. */
1798 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1799 decided_to_slp++;
1802 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1804 if (decided_to_slp && dump_enabled_p ())
1805 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1806 "Decided to SLP %d instances. Unrolling factor %d",
1807 decided_to_slp, unrolling_factor);
1809 return (decided_to_slp > 0);
1813 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1814 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1816 static void
1817 vect_detect_hybrid_slp_stmts (slp_tree node)
1819 int i;
1820 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1821 gimple stmt = stmts[0];
1822 imm_use_iterator imm_iter;
1823 gimple use_stmt;
1824 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1825 slp_void_p child;
1826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1827 struct loop *loop = NULL;
1828 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1829 basic_block bb = NULL;
1831 if (!node)
1832 return;
1834 if (loop_vinfo)
1835 loop = LOOP_VINFO_LOOP (loop_vinfo);
1836 else
1837 bb = BB_VINFO_BB (bb_vinfo);
1839 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1840 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1841 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1842 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1843 if (gimple_bb (use_stmt)
1844 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1845 || bb == gimple_bb (use_stmt))
1846 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1847 && !STMT_SLP_TYPE (stmt_vinfo)
1848 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1849 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1850 && !(gimple_code (use_stmt) == GIMPLE_PHI
1851 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1852 == vect_reduction_def))
1853 vect_mark_slp_stmts (node, hybrid, i);
1855 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1860 /* Find stmts that must be both vectorized and SLPed. */
1862 void
1863 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1865 unsigned int i;
1866 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1867 slp_instance instance;
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1872 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1873 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1877 /* Create and initialize a new bb_vec_info struct for BB, as well as
1878 stmt_vec_info structs for all the stmts in it. */
1880 static bb_vec_info
1881 new_bb_vec_info (basic_block bb)
1883 bb_vec_info res = NULL;
1884 gimple_stmt_iterator gsi;
1886 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1887 BB_VINFO_BB (res) = bb;
1889 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1891 gimple stmt = gsi_stmt (gsi);
1892 gimple_set_uid (stmt, 0);
1893 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1896 BB_VINFO_GROUPED_STORES (res).create (10);
1897 BB_VINFO_SLP_INSTANCES (res).create (2);
1898 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1900 bb->aux = res;
1901 return res;
1905 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1906 stmts in the basic block. */
1908 static void
1909 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1911 vec<slp_instance> slp_instances;
1912 slp_instance instance;
1913 basic_block bb;
1914 gimple_stmt_iterator si;
1915 unsigned i;
1917 if (!bb_vinfo)
1918 return;
1920 bb = BB_VINFO_BB (bb_vinfo);
1922 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1924 gimple stmt = gsi_stmt (si);
1925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1927 if (stmt_info)
1928 /* Free stmt_vec_info. */
1929 free_stmt_vec_info (stmt);
1932 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1933 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1934 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1935 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1936 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1937 vect_free_slp_instance (instance);
1938 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1939 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1940 free (bb_vinfo);
1941 bb->aux = NULL;
1945 /* Analyze statements contained in SLP tree node after recursively analyzing
1946 the subtree. Return TRUE if the operations are supported. */
1948 static bool
1949 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1951 bool dummy;
1952 int i;
1953 gimple stmt;
1954 slp_void_p child;
1956 if (!node)
1957 return true;
1959 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1960 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1961 return false;
1963 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1966 gcc_assert (stmt_info);
1967 gcc_assert (PURE_SLP_STMT (stmt_info));
1969 if (!vect_analyze_stmt (stmt, &dummy, node))
1970 return false;
1973 return true;
1977 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1978 operations are supported. */
1980 static bool
1981 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1983 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1984 slp_instance instance;
1985 int i;
1987 for (i = 0; slp_instances.iterate (i, &instance); )
1989 if (!vect_slp_analyze_node_operations (bb_vinfo,
1990 SLP_INSTANCE_TREE (instance)))
1992 vect_free_slp_instance (instance);
1993 slp_instances.ordered_remove (i);
1995 else
1996 i++;
1999 if (!slp_instances.length ())
2000 return false;
2002 return true;
2005 /* Check if vectorization of the basic block is profitable. */
2007 static bool
2008 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2010 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2011 slp_instance instance;
2012 int i, j;
2013 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2014 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2015 unsigned int stmt_cost;
2016 gimple stmt;
2017 gimple_stmt_iterator si;
2018 basic_block bb = BB_VINFO_BB (bb_vinfo);
2019 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2020 stmt_vec_info stmt_info = NULL;
2021 stmt_vector_for_cost body_cost_vec;
2022 stmt_info_for_cost *ci;
2024 /* Calculate vector costs. */
2025 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2027 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2029 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2031 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2032 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2033 stmt_info, ci->misalign, vect_body);
2037 /* Calculate scalar cost. */
2038 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2040 stmt = gsi_stmt (si);
2041 stmt_info = vinfo_for_stmt (stmt);
2043 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2044 || !PURE_SLP_STMT (stmt_info))
2045 continue;
2047 if (STMT_VINFO_DATA_REF (stmt_info))
2049 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2050 stmt_cost = vect_get_stmt_cost (scalar_load);
2051 else
2052 stmt_cost = vect_get_stmt_cost (scalar_store);
2054 else
2055 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2057 scalar_cost += stmt_cost;
2060 /* Complete the target-specific cost calculation. */
2061 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2062 &vec_inside_cost, &vec_epilogue_cost);
2064 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2066 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2069 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2070 vec_inside_cost);
2071 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2072 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2073 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
2076 /* Vectorization is profitable if its cost is less than the cost of scalar
2077 version. */
2078 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2079 return false;
2081 return true;
2084 /* Check if the basic block can be vectorized. */
2086 static bb_vec_info
2087 vect_slp_analyze_bb_1 (basic_block bb)
2089 bb_vec_info bb_vinfo;
2090 vec<slp_instance> slp_instances;
2091 slp_instance instance;
2092 int i;
2093 int min_vf = 2;
2095 bb_vinfo = new_bb_vec_info (bb);
2096 if (!bb_vinfo)
2097 return NULL;
2099 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2103 "not vectorized: unhandled data-ref in basic "
2104 "block.\n");
2106 destroy_bb_vec_info (bb_vinfo);
2107 return NULL;
2110 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2112 if (dump_enabled_p ())
2113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114 "not vectorized: not enough data-refs in "
2115 "basic block.\n");
2117 destroy_bb_vec_info (bb_vinfo);
2118 return NULL;
2121 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2125 "not vectorized: unhandled data access in "
2126 "basic block.\n");
2128 destroy_bb_vec_info (bb_vinfo);
2129 return NULL;
2132 vect_pattern_recog (NULL, bb_vinfo);
2134 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2136 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2138 "not vectorized: unhandled data dependence "
2139 "in basic block.\n");
2141 destroy_bb_vec_info (bb_vinfo);
2142 return NULL;
2145 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "not vectorized: bad data alignment in basic "
2150 "block.\n");
2152 destroy_bb_vec_info (bb_vinfo);
2153 return NULL;
2156 /* Check the SLP opportunities in the basic block, analyze and build SLP
2157 trees. */
2158 if (!vect_analyze_slp (NULL, bb_vinfo))
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162 "not vectorized: failed to find SLP opportunities "
2163 "in basic block.\n");
2165 destroy_bb_vec_info (bb_vinfo);
2166 return NULL;
2169 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2171 /* Mark all the statements that we want to vectorize as pure SLP and
2172 relevant. */
2173 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2175 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2176 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2179 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "not vectorized: unsupported alignment in basic "
2184 "block.\n");
2185 destroy_bb_vec_info (bb_vinfo);
2186 return NULL;
2189 if (!vect_slp_analyze_operations (bb_vinfo))
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193 "not vectorized: bad operation in basic block.\n");
2195 destroy_bb_vec_info (bb_vinfo);
2196 return NULL;
2199 /* Cost model: check if the vectorization is worthwhile. */
2200 if (flag_vect_cost_model
2201 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2205 "not vectorized: vectorization is not "
2206 "profitable.\n");
2208 destroy_bb_vec_info (bb_vinfo);
2209 return NULL;
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE, vect_location,
2214 "Basic block will be vectorized using SLP\n");
2216 return bb_vinfo;
2220 bb_vec_info
2221 vect_slp_analyze_bb (basic_block bb)
2223 bb_vec_info bb_vinfo;
2224 int insns = 0;
2225 gimple_stmt_iterator gsi;
2226 unsigned int vector_sizes;
2228 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2231 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2233 gimple stmt = gsi_stmt (gsi);
2234 if (!is_gimple_debug (stmt)
2235 && !gimple_nop_p (stmt)
2236 && gimple_code (stmt) != GIMPLE_LABEL)
2237 insns++;
2240 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 "not vectorized: too many instructions in "
2245 "basic block.\n");
2247 return NULL;
2250 /* Autodetect first vector size we try. */
2251 current_vector_size = 0;
2252 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2254 while (1)
2256 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2257 if (bb_vinfo)
2258 return bb_vinfo;
2260 destroy_bb_vec_info (bb_vinfo);
2262 vector_sizes &= ~current_vector_size;
2263 if (vector_sizes == 0
2264 || current_vector_size == 0)
2265 return NULL;
2267 /* Try the next biggest vector size. */
2268 current_vector_size = 1 << floor_log2 (vector_sizes);
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_NOTE, vect_location,
2271 "***** Re-trying analysis with "
2272 "vector size %d\n", current_vector_size);
2277 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2278 the number of created vector stmts depends on the unrolling factor).
2279 However, the actual number of vector stmts for every SLP node depends on
2280 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2281 should be updated. In this function we assume that the inside costs
2282 calculated in vect_model_xxx_cost are linear in ncopies. */
2284 void
2285 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2287 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2288 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2289 slp_instance instance;
2290 stmt_vector_for_cost body_cost_vec;
2291 stmt_info_for_cost *si;
2292 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2294 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_NOTE, vect_location,
2296 "=== vect_update_slp_costs_according_to_vf ===");
2298 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2300 /* We assume that costs are linear in ncopies. */
2301 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2303 /* Record the instance's instructions in the target cost model.
2304 This was delayed until here because the count of instructions
2305 isn't known beforehand. */
2306 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2308 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2309 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2310 vinfo_for_stmt (si->stmt), si->misalign,
2311 vect_body);
2316 /* For constant and loop invariant defs of SLP_NODE this function returns
2317 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2318 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2319 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2320 REDUC_INDEX is the index of the reduction operand in the statements, unless
2321 it is -1. */
2323 static void
2324 vect_get_constant_vectors (tree op, slp_tree slp_node,
2325 vec<tree> *vec_oprnds,
2326 unsigned int op_num, unsigned int number_of_vectors,
2327 int reduc_index)
2329 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2330 gimple stmt = stmts[0];
2331 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2332 unsigned nunits;
2333 tree vec_cst;
2334 tree *elts;
2335 unsigned j, number_of_places_left_in_vector;
2336 tree vector_type;
2337 tree vop;
2338 int group_size = stmts.length ();
2339 unsigned int vec_num, i;
2340 unsigned number_of_copies = 1;
2341 vec<tree> voprnds;
2342 voprnds.create (number_of_vectors);
2343 bool constant_p, is_store;
2344 tree neutral_op = NULL;
2345 enum tree_code code = gimple_expr_code (stmt);
2346 gimple def_stmt;
2347 struct loop *loop;
2348 gimple_seq ctor_seq = NULL;
2350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2351 && reduc_index != -1)
2353 op_num = reduc_index - 1;
2354 op = gimple_op (stmt, reduc_index);
2355 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2356 we need either neutral operands or the original operands. See
2357 get_initial_def_for_reduction() for details. */
2358 switch (code)
2360 case WIDEN_SUM_EXPR:
2361 case DOT_PROD_EXPR:
2362 case PLUS_EXPR:
2363 case MINUS_EXPR:
2364 case BIT_IOR_EXPR:
2365 case BIT_XOR_EXPR:
2366 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2367 neutral_op = build_real (TREE_TYPE (op), dconst0);
2368 else
2369 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2371 break;
2373 case MULT_EXPR:
2374 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2375 neutral_op = build_real (TREE_TYPE (op), dconst1);
2376 else
2377 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2379 break;
2381 case BIT_AND_EXPR:
2382 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2383 break;
2385 case MAX_EXPR:
2386 case MIN_EXPR:
2387 def_stmt = SSA_NAME_DEF_STMT (op);
2388 loop = (gimple_bb (stmt))->loop_father;
2389 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2390 loop_preheader_edge (loop));
2391 break;
2393 default:
2394 neutral_op = NULL;
2398 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2400 is_store = true;
2401 op = gimple_assign_rhs1 (stmt);
2403 else
2404 is_store = false;
2406 gcc_assert (op);
2408 if (CONSTANT_CLASS_P (op))
2409 constant_p = true;
2410 else
2411 constant_p = false;
2413 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2414 gcc_assert (vector_type);
2415 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2417 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2418 created vectors. It is greater than 1 if unrolling is performed.
2420 For example, we have two scalar operands, s1 and s2 (e.g., group of
2421 strided accesses of size two), while NUNITS is four (i.e., four scalars
2422 of this type can be packed in a vector). The output vector will contain
2423 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2424 will be 2).
2426 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2427 containing the operands.
2429 For example, NUNITS is four as before, and the group size is 8
2430 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2431 {s5, s6, s7, s8}. */
2433 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2435 number_of_places_left_in_vector = nunits;
2436 elts = XALLOCAVEC (tree, nunits);
2437 for (j = 0; j < number_of_copies; j++)
2439 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2441 if (is_store)
2442 op = gimple_assign_rhs1 (stmt);
2443 else
2445 switch (code)
2447 case COND_EXPR:
2448 if (op_num == 0 || op_num == 1)
2450 tree cond = gimple_assign_rhs1 (stmt);
2451 op = TREE_OPERAND (cond, op_num);
2453 else
2455 if (op_num == 2)
2456 op = gimple_assign_rhs2 (stmt);
2457 else
2458 op = gimple_assign_rhs3 (stmt);
2460 break;
2462 case CALL_EXPR:
2463 op = gimple_call_arg (stmt, op_num);
2464 break;
2466 case LSHIFT_EXPR:
2467 case RSHIFT_EXPR:
2468 case LROTATE_EXPR:
2469 case RROTATE_EXPR:
2470 op = gimple_op (stmt, op_num + 1);
2471 /* Unlike the other binary operators, shifts/rotates have
2472 the shift count being int, instead of the same type as
2473 the lhs, so make sure the scalar is the right type if
2474 we are dealing with vectors of
2475 long long/long/short/char. */
2476 if (op_num == 1 && constant_p)
2477 op = fold_convert (TREE_TYPE (vector_type), op);
2478 break;
2480 default:
2481 op = gimple_op (stmt, op_num + 1);
2482 break;
2486 if (reduc_index != -1)
2488 loop = (gimple_bb (stmt))->loop_father;
2489 def_stmt = SSA_NAME_DEF_STMT (op);
2491 gcc_assert (loop);
2493 /* Get the def before the loop. In reduction chain we have only
2494 one initial value. */
2495 if ((j != (number_of_copies - 1)
2496 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2497 && i != 0))
2498 && neutral_op)
2499 op = neutral_op;
2500 else
2501 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2502 loop_preheader_edge (loop));
2505 /* Create 'vect_ = {op0,op1,...,opn}'. */
2506 number_of_places_left_in_vector--;
2507 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2509 if (constant_p)
2511 op = fold_unary (VIEW_CONVERT_EXPR,
2512 TREE_TYPE (vector_type), op);
2513 gcc_assert (op && CONSTANT_CLASS_P (op));
2515 else
2517 tree new_temp
2518 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2519 gimple init_stmt;
2520 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2521 op);
2522 init_stmt
2523 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2524 new_temp, op, NULL_TREE);
2525 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2526 op = new_temp;
2529 elts[number_of_places_left_in_vector] = op;
2531 if (number_of_places_left_in_vector == 0)
2533 number_of_places_left_in_vector = nunits;
2535 if (constant_p)
2536 vec_cst = build_vector (vector_type, elts);
2537 else
2539 vec<constructor_elt, va_gc> *v;
2540 unsigned k;
2541 vec_alloc (v, nunits);
2542 for (k = 0; k < nunits; ++k)
2543 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2544 vec_cst = build_constructor (vector_type, v);
2546 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2547 vector_type, NULL));
2548 if (ctor_seq != NULL)
2550 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2551 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2552 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2553 GSI_SAME_STMT);
2554 ctor_seq = NULL;
2560 /* Since the vectors are created in the reverse order, we should invert
2561 them. */
2562 vec_num = voprnds.length ();
2563 for (j = vec_num; j != 0; j--)
2565 vop = voprnds[j - 1];
2566 vec_oprnds->quick_push (vop);
2569 voprnds.release ();
2571 /* In case that VF is greater than the unrolling factor needed for the SLP
2572 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2573 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2574 to replicate the vectors. */
2575 while (number_of_vectors > vec_oprnds->length ())
2577 tree neutral_vec = NULL;
2579 if (neutral_op)
2581 if (!neutral_vec)
2582 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2584 vec_oprnds->quick_push (neutral_vec);
2586 else
2588 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2589 vec_oprnds->quick_push (vop);
2595 /* Get vectorized definitions from SLP_NODE that contains corresponding
2596 vectorized def-stmts. */
2598 static void
2599 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2601 tree vec_oprnd;
2602 gimple vec_def_stmt;
2603 unsigned int i;
2605 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2607 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2609 gcc_assert (vec_def_stmt);
2610 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2611 vec_oprnds->quick_push (vec_oprnd);
2616 /* Get vectorized definitions for SLP_NODE.
2617 If the scalar definitions are loop invariants or constants, collect them and
2618 call vect_get_constant_vectors() to create vector stmts.
2619 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2620 must be stored in the corresponding child of SLP_NODE, and we call
2621 vect_get_slp_vect_defs () to retrieve them. */
2623 void
2624 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2625 vec<vec<tree> > *vec_oprnds, int reduc_index)
2627 gimple first_stmt;
2628 int number_of_vects = 0, i;
2629 unsigned int child_index = 0;
2630 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2631 slp_tree child = NULL;
2632 vec<tree> vec_defs;
2633 tree oprnd;
2634 bool vectorized_defs;
2636 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2637 FOR_EACH_VEC_ELT (ops, i, oprnd)
2639 /* For each operand we check if it has vectorized definitions in a child
2640 node or we need to create them (for invariants and constants). We
2641 check if the LHS of the first stmt of the next child matches OPRND.
2642 If it does, we found the correct child. Otherwise, we call
2643 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2644 to check this child node for the next operand. */
2645 vectorized_defs = false;
2646 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2648 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2650 /* We have to check both pattern and original def, if available. */
2651 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2652 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2654 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2655 || (related
2656 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2658 /* The number of vector defs is determined by the number of
2659 vector statements in the node from which we get those
2660 statements. */
2661 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2662 vectorized_defs = true;
2663 child_index++;
2667 if (!vectorized_defs)
2669 if (i == 0)
2671 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2672 /* Number of vector stmts was calculated according to LHS in
2673 vect_schedule_slp_instance (), fix it by replacing LHS with
2674 RHS, if necessary. See vect_get_smallest_scalar_type () for
2675 details. */
2676 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2677 &rhs_size_unit);
2678 if (rhs_size_unit != lhs_size_unit)
2680 number_of_vects *= rhs_size_unit;
2681 number_of_vects /= lhs_size_unit;
2686 /* Allocate memory for vectorized defs. */
2687 vec_defs = vNULL;
2688 vec_defs.create (number_of_vects);
2690 /* For reduction defs we call vect_get_constant_vectors (), since we are
2691 looking for initial loop invariant values. */
2692 if (vectorized_defs && reduc_index == -1)
2693 /* The defs are already vectorized. */
2694 vect_get_slp_vect_defs (child, &vec_defs);
2695 else
2696 /* Build vectors from scalar defs. */
2697 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2698 number_of_vects, reduc_index);
2700 vec_oprnds->quick_push (vec_defs);
2702 /* For reductions, we only need initial values. */
2703 if (reduc_index != -1)
2704 return;
2709 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2710 building a vector of type MASK_TYPE from it) and two input vectors placed in
2711 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2712 shifting by STRIDE elements of DR_CHAIN for every copy.
2713 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2714 copies).
2715 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2716 the created stmts must be inserted. */
2718 static inline void
2719 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2720 tree mask, int first_vec_indx, int second_vec_indx,
2721 gimple_stmt_iterator *gsi, slp_tree node,
2722 tree vectype, vec<tree> dr_chain,
2723 int ncopies, int vect_stmts_counter)
2725 tree perm_dest;
2726 gimple perm_stmt = NULL;
2727 stmt_vec_info next_stmt_info;
2728 int i, stride;
2729 tree first_vec, second_vec, data_ref;
2731 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2733 /* Initialize the vect stmts of NODE to properly insert the generated
2734 stmts later. */
2735 for (i = SLP_TREE_VEC_STMTS (node).length ();
2736 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2737 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2739 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2740 for (i = 0; i < ncopies; i++)
2742 first_vec = dr_chain[first_vec_indx];
2743 second_vec = dr_chain[second_vec_indx];
2745 /* Generate the permute statement. */
2746 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2747 first_vec, second_vec, mask);
2748 data_ref = make_ssa_name (perm_dest, perm_stmt);
2749 gimple_set_lhs (perm_stmt, data_ref);
2750 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2752 /* Store the vector statement in NODE. */
2753 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2755 first_vec_indx += stride;
2756 second_vec_indx += stride;
2759 /* Mark the scalar stmt as vectorized. */
2760 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2761 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2765 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2766 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2767 representation. Check that the mask is valid and return FALSE if not.
2768 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2769 the next vector, i.e., the current first vector is not needed. */
2771 static bool
2772 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2773 int mask_nunits, bool only_one_vec, int index,
2774 unsigned char *mask, int *current_mask_element,
2775 bool *need_next_vector, int *number_of_mask_fixes,
2776 bool *mask_fixed, bool *needs_first_vector)
2778 int i;
2780 /* Convert to target specific representation. */
2781 *current_mask_element = first_mask_element + m;
2782 /* Adjust the value in case it's a mask for second and third vectors. */
2783 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2785 if (*current_mask_element < mask_nunits)
2786 *needs_first_vector = true;
2788 /* We have only one input vector to permute but the mask accesses values in
2789 the next vector as well. */
2790 if (only_one_vec && *current_mask_element >= mask_nunits)
2792 if (dump_enabled_p ())
2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2795 "permutation requires at least two vectors ");
2796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2799 return false;
2802 /* The mask requires the next vector. */
2803 if (*current_mask_element >= mask_nunits * 2)
2805 if (*needs_first_vector || *mask_fixed)
2807 /* We either need the first vector too or have already moved to the
2808 next vector. In both cases, this permutation needs three
2809 vectors. */
2810 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2813 "permutation requires at "
2814 "least three vectors ");
2815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2818 return false;
2821 /* We move to the next vector, dropping the first one and working with
2822 the second and the third - we need to adjust the values of the mask
2823 accordingly. */
2824 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2826 for (i = 0; i < index; i++)
2827 mask[i] -= mask_nunits * *number_of_mask_fixes;
2829 (*number_of_mask_fixes)++;
2830 *mask_fixed = true;
2833 *need_next_vector = *mask_fixed;
2835 /* This was the last element of this mask. Start a new one. */
2836 if (index == mask_nunits - 1)
2838 *number_of_mask_fixes = 1;
2839 *mask_fixed = false;
2840 *needs_first_vector = false;
2843 return true;
2847 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2848 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2849 permute statements for SLP_NODE_INSTANCE. */
2850 bool
2851 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2852 gimple_stmt_iterator *gsi, int vf,
2853 slp_instance slp_node_instance, bool analyze_only)
2855 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2856 tree mask_element_type = NULL_TREE, mask_type;
2857 int i, j, k, nunits, vec_index = 0, scalar_index;
2858 slp_tree node;
2859 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2860 gimple next_scalar_stmt;
2861 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2862 int first_mask_element;
2863 int index, unroll_factor, current_mask_element, ncopies;
2864 unsigned char *mask;
2865 bool only_one_vec = false, need_next_vector = false;
2866 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2867 int number_of_mask_fixes = 1;
2868 bool mask_fixed = false;
2869 bool needs_first_vector = false;
2870 enum machine_mode mode;
2872 mode = TYPE_MODE (vectype);
2874 if (!can_vec_perm_p (mode, false, NULL))
2876 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2879 "no vect permute for ");
2880 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2882 return false;
2885 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2886 same size as the vector element being permuted. */
2887 mask_element_type = lang_hooks.types.type_for_mode
2888 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2889 mask_type = get_vectype_for_scalar_type (mask_element_type);
2890 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2891 mask = XALLOCAVEC (unsigned char, nunits);
2892 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2894 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2895 unrolling factor. */
2896 orig_vec_stmts_num = group_size *
2897 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2898 if (orig_vec_stmts_num == 1)
2899 only_one_vec = true;
2901 /* Number of copies is determined by the final vectorization factor
2902 relatively to SLP_NODE_INSTANCE unrolling factor. */
2903 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2905 /* Generate permutation masks for every NODE. Number of masks for each NODE
2906 is equal to GROUP_SIZE.
2907 E.g., we have a group of three nodes with three loads from the same
2908 location in each node, and the vector size is 4. I.e., we have a
2909 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2910 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2911 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2914 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2915 The last mask is illegal since we assume two operands for permute
2916 operation, and the mask element values can't be outside that range.
2917 Hence, the last mask must be converted into {2,5,5,5}.
2918 For the first two permutations we need the first and the second input
2919 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2920 we need the second and the third vectors: {b1,c1,a2,b2} and
2921 {c2,a3,b3,c3}. */
2923 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2925 scalar_index = 0;
2926 index = 0;
2927 vect_stmts_counter = 0;
2928 vec_index = 0;
2929 first_vec_index = vec_index++;
2930 if (only_one_vec)
2931 second_vec_index = first_vec_index;
2932 else
2933 second_vec_index = vec_index++;
2935 for (j = 0; j < unroll_factor; j++)
2937 for (k = 0; k < group_size; k++)
2939 first_mask_element = i + j * group_size;
2940 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2941 nunits, only_one_vec, index,
2942 mask, &current_mask_element,
2943 &need_next_vector,
2944 &number_of_mask_fixes, &mask_fixed,
2945 &needs_first_vector))
2946 return false;
2947 mask[index++] = current_mask_element;
2949 if (index == nunits)
2951 tree mask_vec, *mask_elts;
2952 int l;
2954 if (!can_vec_perm_p (mode, false, mask))
2956 if (dump_enabled_p ())
2958 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2959 vect_location,
2960 "unsupported vect permute { ");
2961 for (i = 0; i < nunits; ++i)
2962 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2963 mask[i]);
2964 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2966 return false;
2969 mask_elts = XALLOCAVEC (tree, nunits);
2970 for (l = 0; l < nunits; ++l)
2971 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2972 mask_vec = build_vector (mask_type, mask_elts);
2973 index = 0;
2975 if (!analyze_only)
2977 if (need_next_vector)
2979 first_vec_index = second_vec_index;
2980 second_vec_index = vec_index;
2983 next_scalar_stmt
2984 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2986 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2987 mask_vec, first_vec_index, second_vec_index,
2988 gsi, node, vectype, dr_chain,
2989 ncopies, vect_stmts_counter++);
2996 return true;
3001 /* Vectorize SLP instance tree in postorder. */
3003 static bool
3004 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3005 unsigned int vectorization_factor)
3007 gimple stmt;
3008 bool grouped_store, is_store;
3009 gimple_stmt_iterator si;
3010 stmt_vec_info stmt_info;
3011 unsigned int vec_stmts_size, nunits, group_size;
3012 tree vectype;
3013 int i;
3014 slp_tree loads_node;
3015 slp_void_p child;
3017 if (!node)
3018 return false;
3020 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3021 vect_schedule_slp_instance ((slp_tree) child, instance,
3022 vectorization_factor);
3024 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3025 stmt_info = vinfo_for_stmt (stmt);
3027 /* VECTYPE is the type of the destination. */
3028 vectype = STMT_VINFO_VECTYPE (stmt_info);
3029 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3030 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3032 /* For each SLP instance calculate number of vector stmts to be created
3033 for the scalar stmts in each node of the SLP tree. Number of vector
3034 elements in one vector iteration is the number of scalar elements in
3035 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3036 size. */
3037 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3039 /* In case of load permutation we have to allocate vectorized statements for
3040 all the nodes that participate in that permutation. */
3041 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3043 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3045 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3047 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3048 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3053 if (!SLP_TREE_VEC_STMTS (node).exists ())
3055 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3056 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3059 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE,vect_location,
3062 "------>vectorizing SLP node starting from: ");
3063 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3066 /* Loads should be inserted before the first load. */
3067 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3068 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3069 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3070 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3071 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3072 else if (is_pattern_stmt_p (stmt_info))
3073 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3074 else
3075 si = gsi_for_stmt (stmt);
3077 /* Stores should be inserted just before the last store. */
3078 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3079 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3081 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3082 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3083 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3084 si = gsi_for_stmt (last_store);
3087 /* Mark the first element of the reduction chain as reduction to properly
3088 transform the node. In the analysis phase only the last element of the
3089 chain is marked as reduction. */
3090 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3091 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3093 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3094 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3097 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3098 return is_store;
3101 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3102 For loop vectorization this is done in vectorizable_call, but for SLP
3103 it needs to be deferred until end of vect_schedule_slp, because multiple
3104 SLP instances may refer to the same scalar stmt. */
3106 static void
3107 vect_remove_slp_scalar_calls (slp_tree node)
3109 gimple stmt, new_stmt;
3110 gimple_stmt_iterator gsi;
3111 int i;
3112 slp_void_p child;
3113 tree lhs;
3114 stmt_vec_info stmt_info;
3116 if (!node)
3117 return;
3119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3120 vect_remove_slp_scalar_calls ((slp_tree) child);
3122 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3124 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3125 continue;
3126 stmt_info = vinfo_for_stmt (stmt);
3127 if (stmt_info == NULL
3128 || is_pattern_stmt_p (stmt_info)
3129 || !PURE_SLP_STMT (stmt_info))
3130 continue;
3131 lhs = gimple_call_lhs (stmt);
3132 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3133 set_vinfo_for_stmt (new_stmt, stmt_info);
3134 set_vinfo_for_stmt (stmt, NULL);
3135 STMT_VINFO_STMT (stmt_info) = new_stmt;
3136 gsi = gsi_for_stmt (stmt);
3137 gsi_replace (&gsi, new_stmt, false);
3138 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3142 /* Generate vector code for all SLP instances in the loop/basic block. */
3144 bool
3145 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3147 vec<slp_instance> slp_instances;
3148 slp_instance instance;
3149 slp_tree loads_node;
3150 unsigned int i, j, vf;
3151 bool is_store = false;
3153 if (loop_vinfo)
3155 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3156 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3158 else
3160 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3161 vf = 1;
3164 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3166 /* Schedule the tree of INSTANCE. */
3167 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3168 instance, vf);
3170 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3171 between SLP instances we fail to properly initialize the
3172 vectorized SLP stmts and confuse different load permutations. */
3173 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3174 STMT_VINFO_VEC_STMT
3175 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_NOTE, vect_location,
3179 "vectorizing stmts using SLP.");
3182 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3184 slp_tree root = SLP_INSTANCE_TREE (instance);
3185 gimple store;
3186 unsigned int j;
3187 gimple_stmt_iterator gsi;
3189 /* Remove scalar call stmts. Do not do this for basic-block
3190 vectorization as not all uses may be vectorized.
3191 ??? Why should this be necessary? DCE should be able to
3192 remove the stmts itself.
3193 ??? For BB vectorization we can as well remove scalar
3194 stmts starting from the SLP tree root if they have no
3195 uses. */
3196 if (loop_vinfo)
3197 vect_remove_slp_scalar_calls (root);
3199 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3200 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3202 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3203 break;
3205 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3206 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3207 /* Free the attached stmt_vec_info and remove the stmt. */
3208 gsi = gsi_for_stmt (store);
3209 unlink_stmt_vdef (store);
3210 gsi_remove (&gsi, true);
3211 release_defs (store);
3212 free_stmt_vec_info (store);
3216 return is_store;
3220 /* Vectorize the basic block. */
3222 void
3223 vect_slp_transform_bb (basic_block bb)
3225 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3226 gimple_stmt_iterator si;
3228 gcc_assert (bb_vinfo);
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3233 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3235 gimple stmt = gsi_stmt (si);
3236 stmt_vec_info stmt_info;
3238 if (dump_enabled_p ())
3240 dump_printf_loc (MSG_NOTE, vect_location,
3241 "------>SLPing statement: ");
3242 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3245 stmt_info = vinfo_for_stmt (stmt);
3246 gcc_assert (stmt_info);
3248 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3249 if (STMT_SLP_TYPE (stmt_info))
3251 vect_schedule_slp (NULL, bb_vinfo);
3252 break;
3256 if (dump_enabled_p ())
3257 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3259 destroy_bb_vec_info (bb_vinfo);