make __stl_prime_list in comdat
[official-gcc.git] / gcc / tree-vect-slp.c
blob6628a6fd66df1d5820096fe062fcbd63d11add4e
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
46 LOC
47 find_bb_location (basic_block bb)
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
52 if (!bb)
53 return UNKNOWN_LOC;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
62 return UNKNOWN_LOC;
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
68 static void
69 vect_free_slp_tree (slp_tree node)
71 int i;
72 slp_void_p child;
74 if (!node)
75 return;
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree)child);
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
85 free (node);
89 /* Free the memory allocated for the SLP instance. */
91 void
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Create an SLP node for SCALAR_STMTS. */
102 static slp_tree
103 vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
105 slp_tree node = XNEW (struct _slp_tree);
106 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
107 unsigned int nops;
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
113 nops = gimple_num_ops (stmt) - 1;
114 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
115 nops++;
117 else
118 return NULL;
120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121 SLP_TREE_VEC_STMTS (node) = NULL;
122 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
123 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
124 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
126 return node;
130 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
131 operand. */
132 static VEC (slp_oprnd_info, heap) *
133 vect_create_oprnd_info (int nops, int group_size)
135 int i;
136 slp_oprnd_info oprnd_info;
137 VEC (slp_oprnd_info, heap) *oprnds_info;
139 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
140 for (i = 0; i < nops; i++)
142 oprnd_info = XNEW (struct _slp_oprnd_info);
143 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
144 oprnd_info->first_dt = vect_uninitialized_def;
145 oprnd_info->first_def_type = NULL_TREE;
146 oprnd_info->first_const_oprnd = NULL_TREE;
147 oprnd_info->first_pattern = false;
148 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
151 return oprnds_info;
155 /* Free operands info. Free def-stmts in FREE_DEF_STMTS is true.
156 (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
157 succeds. In the later case we don't need the operands info that we used to
158 check isomorphism of the stmts, but we still need the def-stmts - they are
159 used as scalar stmts in SLP nodes. */
160 static void
161 vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
162 bool free_def_stmts)
164 int i;
165 slp_oprnd_info oprnd_info;
167 if (free_def_stmts)
168 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
169 VEC_free (gimple, heap, oprnd_info->def_stmts);
171 VEC_free (slp_oprnd_info, heap, *oprnds_info);
175 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
176 they are of a valid type and that they match the defs of the first stmt of
177 the SLP group (stored in OPRNDS_INFO). */
179 static bool
180 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
181 slp_tree slp_node, gimple stmt,
182 int ncopies_for_cost, bool first,
183 VEC (slp_oprnd_info, heap) **oprnds_info)
185 tree oprnd;
186 unsigned int i, number_of_oprnds;
187 tree def, def_op0 = NULL_TREE;
188 gimple def_stmt;
189 enum vect_def_type dt = vect_uninitialized_def;
190 enum vect_def_type dt_op0 = vect_uninitialized_def;
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 tree lhs = gimple_get_lhs (stmt);
193 struct loop *loop = NULL;
194 enum tree_code rhs_code;
195 bool different_types = false;
196 bool pattern = false;
197 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
198 int op_idx = 1;
199 tree compare_rhs = NULL_TREE;
201 if (loop_vinfo)
202 loop = LOOP_VINFO_LOOP (loop_vinfo);
204 if (is_gimple_call (stmt))
205 number_of_oprnds = gimple_call_num_args (stmt);
206 else if (is_gimple_assign (stmt))
208 number_of_oprnds = gimple_num_ops (stmt) - 1;
209 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
210 number_of_oprnds++;
212 else
213 return false;
215 for (i = 0; i < number_of_oprnds; i++)
217 if (compare_rhs)
219 oprnd = compare_rhs;
220 compare_rhs = NULL_TREE;
222 else
223 oprnd = gimple_op (stmt, op_idx++);
225 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
227 if (COMPARISON_CLASS_P (oprnd))
229 compare_rhs = TREE_OPERAND (oprnd, 1);
230 oprnd = TREE_OPERAND (oprnd, 0);
233 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
234 &dt)
235 || (!def_stmt && dt != vect_constant_def))
237 if (vect_print_dump_info (REPORT_SLP))
239 fprintf (vect_dump, "Build SLP failed: can't find def for ");
240 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
243 return false;
246 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
247 from the pattern. Check that all the stmts of the node are in the
248 pattern. */
249 if (loop && def_stmt && gimple_bb (def_stmt)
250 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
251 && vinfo_for_stmt (def_stmt)
252 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
253 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
254 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
256 pattern = true;
257 if (!first && !oprnd_info->first_pattern)
259 if (vect_print_dump_info (REPORT_DETAILS))
261 fprintf (vect_dump, "Build SLP failed: some of the stmts"
262 " are in a pattern, and others are not ");
263 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
266 return false;
269 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
270 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
272 if (dt == vect_unknown_def_type)
274 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "Unsupported pattern.");
276 return false;
279 switch (gimple_code (def_stmt))
281 case GIMPLE_PHI:
282 def = gimple_phi_result (def_stmt);
283 break;
285 case GIMPLE_ASSIGN:
286 def = gimple_assign_lhs (def_stmt);
287 break;
289 default:
290 if (vect_print_dump_info (REPORT_DETAILS))
291 fprintf (vect_dump, "unsupported defining stmt: ");
292 return false;
296 if (first)
298 oprnd_info->first_dt = dt;
299 oprnd_info->first_pattern = pattern;
300 if (def)
302 oprnd_info->first_def_type = TREE_TYPE (def);
303 oprnd_info->first_const_oprnd = NULL_TREE;
305 else
307 oprnd_info->first_def_type = NULL_TREE;
308 oprnd_info->first_const_oprnd = oprnd;
311 if (i == 0)
313 def_op0 = def;
314 dt_op0 = dt;
315 /* Analyze costs (for the first stmt of the group only). */
316 if (REFERENCE_CLASS_P (lhs))
317 /* Store. */
318 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
319 dt, slp_node);
320 else
321 /* Not memory operation (we don't call this function for
322 loads). */
323 vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
324 slp_node);
327 else
329 /* Not first stmt of the group, check that the def-stmt/s match
330 the def-stmt/s of the first stmt. Allow different definition
331 types for reduction chains: the first stmt must be a
332 vect_reduction_def (a phi node), and the rest
333 vect_internal_def. */
334 if (((oprnd_info->first_dt != dt
335 && !(oprnd_info->first_dt == vect_reduction_def
336 && dt == vect_internal_def))
337 || (oprnd_info->first_def_type != NULL_TREE
338 && def
339 && !types_compatible_p (oprnd_info->first_def_type,
340 TREE_TYPE (def))))
341 || (!def
342 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
343 TREE_TYPE (oprnd)))
344 || different_types)
346 if (number_of_oprnds != 2)
348 if (vect_print_dump_info (REPORT_SLP))
349 fprintf (vect_dump, "Build SLP failed: different types ");
351 return false;
354 /* Try to swap operands in case of binary operation. */
355 if (i == 0)
356 different_types = true;
357 else
359 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
360 if (is_gimple_assign (stmt)
361 && (rhs_code = gimple_assign_rhs_code (stmt))
362 && TREE_CODE_CLASS (rhs_code) == tcc_binary
363 && commutative_tree_code (rhs_code)
364 && oprnd0_info->first_dt == dt
365 && oprnd_info->first_dt == dt_op0
366 && def_op0 && def
367 && !(oprnd0_info->first_def_type
368 && !types_compatible_p (oprnd0_info->first_def_type,
369 TREE_TYPE (def)))
370 && !(oprnd_info->first_def_type
371 && !types_compatible_p (oprnd_info->first_def_type,
372 TREE_TYPE (def_op0))))
374 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump, "Swapping operands of ");
377 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
380 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
381 gimple_assign_rhs2_ptr (stmt));
383 else
385 if (vect_print_dump_info (REPORT_SLP))
386 fprintf (vect_dump, "Build SLP failed: different types ");
388 return false;
394 /* Check the types of the definitions. */
395 switch (dt)
397 case vect_constant_def:
398 case vect_external_def:
399 case vect_reduction_def:
400 break;
402 case vect_internal_def:
403 if (different_types)
405 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
406 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
407 if (i == 0)
408 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
409 else
410 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
412 else
413 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
415 break;
417 default:
418 /* FORNOW: Not supported. */
419 if (vect_print_dump_info (REPORT_SLP))
421 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
422 print_generic_expr (vect_dump, def, TDF_SLIM);
425 return false;
429 return true;
433 /* Recursively build an SLP tree starting from NODE.
434 Fail (and return FALSE) if def-stmts are not isomorphic, require data
435 permutation or are of unsupported types of operation. Otherwise, return
436 TRUE. */
438 static bool
439 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
440 slp_tree *node, unsigned int group_size,
441 int *inside_cost, int *outside_cost,
442 int ncopies_for_cost, unsigned int *max_nunits,
443 VEC (int, heap) **load_permutation,
444 VEC (slp_tree, heap) **loads,
445 unsigned int vectorization_factor, bool *loads_permuted)
447 unsigned int i;
448 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
449 gimple stmt = VEC_index (gimple, stmts, 0);
450 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
451 enum tree_code first_cond_code = ERROR_MARK;
452 tree lhs;
453 bool stop_recursion = false, need_same_oprnds = false;
454 tree vectype, scalar_type, first_op1 = NULL_TREE;
455 unsigned int ncopies;
456 optab optab;
457 int icode;
458 enum machine_mode optab_op2_mode;
459 enum machine_mode vec_mode;
460 struct data_reference *first_dr;
461 HOST_WIDE_INT dummy;
462 bool permutation = false;
463 unsigned int load_place;
464 gimple first_load, prev_first_load = NULL;
465 VEC (slp_oprnd_info, heap) *oprnds_info;
466 unsigned int nops;
467 slp_oprnd_info oprnd_info;
468 tree cond;
470 if (is_gimple_call (stmt))
471 nops = gimple_call_num_args (stmt);
472 else if (is_gimple_assign (stmt))
474 nops = gimple_num_ops (stmt) - 1;
475 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
476 nops++;
478 else
479 return false;
481 oprnds_info = vect_create_oprnd_info (nops, group_size);
483 /* For every stmt in NODE find its def stmt/s. */
484 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
486 if (vect_print_dump_info (REPORT_SLP))
488 fprintf (vect_dump, "Build SLP for ");
489 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
492 /* Fail to vectorize statements marked as unvectorizable. */
493 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
495 if (vect_print_dump_info (REPORT_SLP))
497 fprintf (vect_dump,
498 "Build SLP failed: unvectorizable statement ");
499 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
502 vect_free_oprnd_info (&oprnds_info, true);
503 return false;
506 lhs = gimple_get_lhs (stmt);
507 if (lhs == NULL_TREE)
509 if (vect_print_dump_info (REPORT_SLP))
511 fprintf (vect_dump,
512 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
513 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
516 vect_free_oprnd_info (&oprnds_info, true);
517 return false;
520 if (is_gimple_assign (stmt)
521 && gimple_assign_rhs_code (stmt) == COND_EXPR
522 && (cond = gimple_assign_rhs1 (stmt))
523 && !COMPARISON_CLASS_P (cond))
525 if (vect_print_dump_info (REPORT_SLP))
527 fprintf (vect_dump,
528 "Build SLP failed: condition is not comparison ");
529 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
532 vect_free_oprnd_info (&oprnds_info, true);
533 return false;
536 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
537 vectype = get_vectype_for_scalar_type (scalar_type);
538 if (!vectype)
540 if (vect_print_dump_info (REPORT_SLP))
542 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
543 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
546 vect_free_oprnd_info (&oprnds_info, true);
547 return false;
550 /* In case of multiple types we need to detect the smallest type. */
551 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
553 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
554 if (bb_vinfo)
555 vectorization_factor = *max_nunits;
558 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
560 if (is_gimple_call (stmt))
561 rhs_code = CALL_EXPR;
562 else
563 rhs_code = gimple_assign_rhs_code (stmt);
565 /* Check the operation. */
566 if (i == 0)
568 first_stmt_code = rhs_code;
570 /* Shift arguments should be equal in all the packed stmts for a
571 vector shift with scalar shift operand. */
572 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
573 || rhs_code == LROTATE_EXPR
574 || rhs_code == RROTATE_EXPR)
576 vec_mode = TYPE_MODE (vectype);
578 /* First see if we have a vector/vector shift. */
579 optab = optab_for_tree_code (rhs_code, vectype,
580 optab_vector);
582 if (!optab
583 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
585 /* No vector/vector shift, try for a vector/scalar shift. */
586 optab = optab_for_tree_code (rhs_code, vectype,
587 optab_scalar);
589 if (!optab)
591 if (vect_print_dump_info (REPORT_SLP))
592 fprintf (vect_dump, "Build SLP failed: no optab.");
593 vect_free_oprnd_info (&oprnds_info, true);
594 return false;
596 icode = (int) optab_handler (optab, vec_mode);
597 if (icode == CODE_FOR_nothing)
599 if (vect_print_dump_info (REPORT_SLP))
600 fprintf (vect_dump, "Build SLP failed: "
601 "op not supported by target.");
602 vect_free_oprnd_info (&oprnds_info, true);
603 return false;
605 optab_op2_mode = insn_data[icode].operand[2].mode;
606 if (!VECTOR_MODE_P (optab_op2_mode))
608 need_same_oprnds = true;
609 first_op1 = gimple_assign_rhs2 (stmt);
613 else if (rhs_code == WIDEN_LSHIFT_EXPR)
615 need_same_oprnds = true;
616 first_op1 = gimple_assign_rhs2 (stmt);
619 else
621 if (first_stmt_code != rhs_code
622 && (first_stmt_code != IMAGPART_EXPR
623 || rhs_code != REALPART_EXPR)
624 && (first_stmt_code != REALPART_EXPR
625 || rhs_code != IMAGPART_EXPR)
626 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
627 && (first_stmt_code == ARRAY_REF
628 || first_stmt_code == INDIRECT_REF
629 || first_stmt_code == COMPONENT_REF
630 || first_stmt_code == MEM_REF)))
632 if (vect_print_dump_info (REPORT_SLP))
634 fprintf (vect_dump,
635 "Build SLP failed: different operation in stmt ");
636 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
639 vect_free_oprnd_info (&oprnds_info, true);
640 return false;
643 if (need_same_oprnds
644 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
646 if (vect_print_dump_info (REPORT_SLP))
648 fprintf (vect_dump,
649 "Build SLP failed: different shift arguments in ");
650 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
653 vect_free_oprnd_info (&oprnds_info, true);
654 return false;
658 /* Strided store or load. */
659 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
661 if (REFERENCE_CLASS_P (lhs))
663 /* Store. */
664 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
665 stmt, ncopies_for_cost,
666 (i == 0), &oprnds_info))
668 vect_free_oprnd_info (&oprnds_info, true);
669 return false;
672 else
674 /* Load. */
675 /* FORNOW: Check that there is no gap between the loads. */
676 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
677 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
678 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
679 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
681 if (vect_print_dump_info (REPORT_SLP))
683 fprintf (vect_dump, "Build SLP failed: strided "
684 "loads have gaps ");
685 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
688 vect_free_oprnd_info (&oprnds_info, true);
689 return false;
692 /* Check that the size of interleaved loads group is not
693 greater than the SLP group size. */
694 if (loop_vinfo
695 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
697 if (vect_print_dump_info (REPORT_SLP))
699 fprintf (vect_dump, "Build SLP failed: the number of "
700 "interleaved loads is greater than"
701 " the SLP group size ");
702 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
705 vect_free_oprnd_info (&oprnds_info, true);
706 return false;
709 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
710 if (prev_first_load)
712 /* Check that there are no loads from different interleaving
713 chains in the same node. The only exception is complex
714 numbers. */
715 if (prev_first_load != first_load
716 && rhs_code != REALPART_EXPR
717 && rhs_code != IMAGPART_EXPR)
719 if (vect_print_dump_info (REPORT_SLP))
721 fprintf (vect_dump, "Build SLP failed: different "
722 "interleaving chains in one node ");
723 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
726 vect_free_oprnd_info (&oprnds_info, true);
727 return false;
730 else
731 prev_first_load = first_load;
733 if (first_load == stmt)
735 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
736 if (vect_supportable_dr_alignment (first_dr, false)
737 == dr_unaligned_unsupported)
739 if (vect_print_dump_info (REPORT_SLP))
741 fprintf (vect_dump, "Build SLP failed: unsupported "
742 "unaligned load ");
743 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
746 vect_free_oprnd_info (&oprnds_info, true);
747 return false;
750 /* Analyze costs (for the first stmt in the group). */
751 vect_model_load_cost (vinfo_for_stmt (stmt),
752 ncopies_for_cost, false, *node);
755 /* Store the place of this load in the interleaving chain. In
756 case that permutation is needed we later decide if a specific
757 permutation is supported. */
758 load_place = vect_get_place_in_interleaving_chain (stmt,
759 first_load);
760 if (load_place != i)
761 permutation = true;
763 VEC_safe_push (int, heap, *load_permutation, load_place);
765 /* We stop the tree when we reach a group of loads. */
766 stop_recursion = true;
767 continue;
769 } /* Strided access. */
770 else
772 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
774 /* Not strided load. */
775 if (vect_print_dump_info (REPORT_SLP))
777 fprintf (vect_dump, "Build SLP failed: not strided load ");
778 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
781 /* FORNOW: Not strided loads are not supported. */
782 vect_free_oprnd_info (&oprnds_info, true);
783 return false;
786 /* Not memory operation. */
787 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
788 && TREE_CODE_CLASS (rhs_code) != tcc_unary
789 && rhs_code != COND_EXPR)
791 if (vect_print_dump_info (REPORT_SLP))
793 fprintf (vect_dump, "Build SLP failed: operation");
794 fprintf (vect_dump, " unsupported ");
795 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
798 vect_free_oprnd_info (&oprnds_info, true);
799 return false;
802 if (rhs_code == COND_EXPR)
804 tree cond_expr = gimple_assign_rhs1 (stmt);
806 if (i == 0)
807 first_cond_code = TREE_CODE (cond_expr);
808 else if (first_cond_code != TREE_CODE (cond_expr))
810 if (vect_print_dump_info (REPORT_SLP))
812 fprintf (vect_dump, "Build SLP failed: different"
813 " operation");
814 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
817 vect_free_oprnd_info (&oprnds_info, true);
818 return false;
822 /* Find the def-stmts. */
823 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
824 ncopies_for_cost, (i == 0),
825 &oprnds_info))
827 vect_free_oprnd_info (&oprnds_info, true);
828 return false;
833 /* Add the costs of the node to the overall instance costs. */
834 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
835 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
837 /* Strided loads were reached - stop the recursion. */
838 if (stop_recursion)
840 VEC_safe_push (slp_tree, heap, *loads, *node);
841 if (permutation)
844 *loads_permuted = true;
845 *inside_cost
846 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
847 * group_size;
849 else
851 /* We don't check here complex numbers chains, so we set
852 LOADS_PERMUTED for further check in
853 vect_supported_load_permutation_p. */
854 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
855 *loads_permuted = true;
858 return true;
861 /* Create SLP_TREE nodes for the definition node/s. */
862 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
864 slp_tree child;
866 if (oprnd_info->first_dt != vect_internal_def)
867 continue;
869 child = vect_create_new_slp_node (oprnd_info->def_stmts);
870 if (!child
871 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
872 inside_cost, outside_cost, ncopies_for_cost,
873 max_nunits, load_permutation, loads,
874 vectorization_factor, loads_permuted))
876 free (child);
877 vect_free_oprnd_info (&oprnds_info, true);
878 return false;
881 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
884 vect_free_oprnd_info (&oprnds_info, false);
885 return true;
889 static void
890 vect_print_slp_tree (slp_tree node)
892 int i;
893 gimple stmt;
894 slp_void_p child;
896 if (!node)
897 return;
899 fprintf (vect_dump, "node ");
900 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
902 fprintf (vect_dump, "\n\tstmt %d ", i);
903 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
905 fprintf (vect_dump, "\n");
907 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
908 vect_print_slp_tree ((slp_tree) child);
912 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
913 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
914 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
915 stmts in NODE are to be marked. */
917 static void
918 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
920 int i;
921 gimple stmt;
922 slp_void_p child;
924 if (!node)
925 return;
927 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
928 if (j < 0 || i == j)
929 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
931 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
932 vect_mark_slp_stmts ((slp_tree) child, mark, j);
936 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
938 static void
939 vect_mark_slp_stmts_relevant (slp_tree node)
941 int i;
942 gimple stmt;
943 stmt_vec_info stmt_info;
944 slp_void_p child;
946 if (!node)
947 return;
949 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
951 stmt_info = vinfo_for_stmt (stmt);
952 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
953 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
954 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
957 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
958 vect_mark_slp_stmts_relevant ((slp_tree) child);
962 /* Check if the permutation required by the SLP INSTANCE is supported.
963 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
965 static bool
966 vect_supported_slp_permutation_p (slp_instance instance)
968 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
969 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
970 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
971 VEC (slp_tree, heap) *sorted_loads = NULL;
972 int index;
973 slp_tree *tmp_loads = NULL;
974 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
975 slp_tree load;
977 /* FORNOW: The only supported loads permutation is loads from the same
978 location in all the loads in the node, when the data-refs in
979 nodes of LOADS constitute an interleaving chain.
980 Sort the nodes according to the order of accesses in the chain. */
981 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
982 for (i = 0, j = 0;
983 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
984 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
985 i += group_size, j++)
987 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
988 /* Check that the loads are all in the same interleaving chain. */
989 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
991 if (vect_print_dump_info (REPORT_DETAILS))
993 fprintf (vect_dump, "Build SLP failed: unsupported data "
994 "permutation ");
995 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
998 free (tmp_loads);
999 return false;
1002 tmp_loads[index] = load;
1005 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1006 for (i = 0; i < group_size; i++)
1007 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1009 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1010 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1011 free (tmp_loads);
1013 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1014 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1015 instance, true))
1016 return false;
1018 return true;
1022 /* Rearrange the statements of NODE according to PERMUTATION. */
1024 static void
1025 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1026 VEC (int, heap) *permutation)
1028 gimple stmt;
1029 VEC (gimple, heap) *tmp_stmts;
1030 unsigned int index, i;
1031 slp_void_p child;
1033 if (!node)
1034 return;
1036 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1037 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
1039 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1040 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1042 for (i = 0; i < group_size; i++)
1043 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1045 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1047 index = VEC_index (int, permutation, i);
1048 VEC_replace (gimple, tmp_stmts, index, stmt);
1051 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1052 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1056 /* Check if the required load permutation is supported.
1057 LOAD_PERMUTATION contains a list of indices of the loads.
1058 In SLP this permutation is relative to the order of strided stores that are
1059 the base of the SLP instance. */
1061 static bool
1062 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1063 VEC (int, heap) *load_permutation)
1065 int i = 0, j, prev = -1, next, k, number_of_groups;
1066 bool supported, bad_permutation = false;
1067 sbitmap load_index;
1068 slp_tree node, other_complex_node;
1069 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1070 unsigned complex_numbers = 0;
1071 struct data_reference *dr;
1072 bb_vec_info bb_vinfo;
1074 /* FORNOW: permutations are only supported in SLP. */
1075 if (!slp_instn)
1076 return false;
1078 if (vect_print_dump_info (REPORT_SLP))
1080 fprintf (vect_dump, "Load permutation ");
1081 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
1082 fprintf (vect_dump, "%d ", next);
1085 /* In case of reduction every load permutation is allowed, since the order
1086 of the reduction statements is not important (as opposed to the case of
1087 strided stores). The only condition we need to check is that all the
1088 load nodes are of the same size and have the same permutation (and then
1089 rearrange all the nodes of the SLP instance according to this
1090 permutation). */
1092 /* Check that all the load nodes are of the same size. */
1093 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1095 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1096 != (unsigned) group_size)
1097 return false;
1099 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1100 if (is_gimple_assign (stmt)
1101 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1102 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1103 complex_numbers++;
1106 /* Complex operands can be swapped as following:
1107 real_c = real_b + real_a;
1108 imag_c = imag_a + imag_b;
1109 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
1110 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
1111 chains are mixed, they match the above pattern. */
1112 if (complex_numbers)
1114 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1116 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
1118 if (j == 0)
1119 first = stmt;
1120 else
1122 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1124 if (complex_numbers != 2)
1125 return false;
1127 if (i == 0)
1128 k = 1;
1129 else
1130 k = 0;
1132 other_complex_node = VEC_index (slp_tree,
1133 SLP_INSTANCE_LOADS (slp_instn), k);
1134 other_node_first = VEC_index (gimple,
1135 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1137 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1138 != other_node_first)
1139 return false;
1146 /* We checked that this case ok, so there is no need to proceed with
1147 permutation tests. */
1148 if (complex_numbers == 2)
1150 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1151 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1152 return true;
1155 node = SLP_INSTANCE_TREE (slp_instn);
1156 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1157 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1158 instance, not all the loads belong to the same node or interleaving
1159 group. Hence, we need to divide them into groups according to
1160 GROUP_SIZE. */
1161 number_of_groups = VEC_length (int, load_permutation) / group_size;
1163 /* Reduction (there are no data-refs in the root).
1164 In reduction chain the order of the loads is important. */
1165 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1166 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1168 int first_group_load_index;
1170 /* Compare all the permutation sequences to the first one. */
1171 for (i = 1; i < number_of_groups; i++)
1173 k = 0;
1174 for (j = i * group_size; j < i * group_size + group_size; j++)
1176 next = VEC_index (int, load_permutation, j);
1177 first_group_load_index = VEC_index (int, load_permutation, k);
1179 if (next != first_group_load_index)
1181 bad_permutation = true;
1182 break;
1185 k++;
1188 if (bad_permutation)
1189 break;
1192 if (!bad_permutation)
1194 /* Check that the loads in the first sequence are different and there
1195 are no gaps between them. */
1196 load_index = sbitmap_alloc (group_size);
1197 sbitmap_zero (load_index);
1198 for (k = 0; k < group_size; k++)
1200 first_group_load_index = VEC_index (int, load_permutation, k);
1201 if (TEST_BIT (load_index, first_group_load_index))
1203 bad_permutation = true;
1204 break;
1207 SET_BIT (load_index, first_group_load_index);
1210 if (!bad_permutation)
1211 for (k = 0; k < group_size; k++)
1212 if (!TEST_BIT (load_index, k))
1214 bad_permutation = true;
1215 break;
1218 sbitmap_free (load_index);
1221 if (!bad_permutation)
1223 /* This permutation is valid for reduction. Since the order of the
1224 statements in the nodes is not important unless they are memory
1225 accesses, we can rearrange the statements in all the nodes
1226 according to the order of the loads. */
1227 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1228 load_permutation);
1229 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1230 return true;
1234 /* In basic block vectorization we allow any subchain of an interleaving
1235 chain.
1236 FORNOW: not supported in loop SLP because of realignment compications. */
1237 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1238 bad_permutation = false;
1239 /* Check that for every node in the instance teh loads form a subchain. */
1240 if (bb_vinfo)
1242 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1244 next_load = NULL;
1245 first_load = NULL;
1246 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1248 if (!first_load)
1249 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1250 else if (first_load
1251 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1253 bad_permutation = true;
1254 break;
1257 if (j != 0 && next_load != load)
1259 bad_permutation = true;
1260 break;
1263 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1266 if (bad_permutation)
1267 break;
1270 /* Check that the alignment of the first load in every subchain, i.e.,
1271 the first statement in every load node, is supported. */
1272 if (!bad_permutation)
1274 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1276 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1277 if (first_load
1278 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1280 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1281 if (vect_supportable_dr_alignment (dr, false)
1282 == dr_unaligned_unsupported)
1284 if (vect_print_dump_info (REPORT_SLP))
1286 fprintf (vect_dump, "unsupported unaligned load ");
1287 print_gimple_stmt (vect_dump, first_load, 0,
1288 TDF_SLIM);
1290 bad_permutation = true;
1291 break;
1296 if (!bad_permutation)
1298 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1299 return true;
1304 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1305 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1306 well (unless it's reduction). */
1307 if (VEC_length (int, load_permutation)
1308 != (unsigned int) (group_size * group_size))
1309 return false;
1311 supported = true;
1312 load_index = sbitmap_alloc (group_size);
1313 sbitmap_zero (load_index);
1314 for (j = 0; j < group_size; j++)
1316 for (i = j * group_size, k = 0;
1317 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1318 i++, k++)
1320 if (i != j * group_size && next != prev)
1322 supported = false;
1323 break;
1326 prev = next;
1329 if (TEST_BIT (load_index, prev))
1331 supported = false;
1332 break;
1335 SET_BIT (load_index, prev);
1338 for (j = 0; j < group_size; j++)
1339 if (!TEST_BIT (load_index, j))
1340 return false;
1342 sbitmap_free (load_index);
1344 if (supported && i == group_size * group_size
1345 && vect_supported_slp_permutation_p (slp_instn))
1346 return true;
1348 return false;
1352 /* Find the first load in the loop that belongs to INSTANCE.
1353 When loads are in several SLP nodes, there can be a case in which the first
1354 load does not appear in the first SLP node to be transformed, causing
1355 incorrect order of statements. Since we generate all the loads together,
1356 they must be inserted before the first load of the SLP instance and not
1357 before the first load of the first node of the instance. */
1359 static gimple
1360 vect_find_first_load_in_slp_instance (slp_instance instance)
1362 int i, j;
1363 slp_tree load_node;
1364 gimple first_load = NULL, load;
1366 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1367 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1368 first_load = get_earlier_stmt (load, first_load);
1370 return first_load;
1374 /* Find the last store in SLP INSTANCE. */
1376 static gimple
1377 vect_find_last_store_in_slp_instance (slp_instance instance)
1379 int i;
1380 slp_tree node;
1381 gimple last_store = NULL, store;
1383 node = SLP_INSTANCE_TREE (instance);
1384 for (i = 0;
1385 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1386 i++)
1387 last_store = get_later_stmt (store, last_store);
1389 return last_store;
1393 /* Analyze an SLP instance starting from a group of strided stores. Call
1394 vect_build_slp_tree to build a tree of packed stmts if possible.
1395 Return FALSE if it's impossible to SLP any stmt in the loop. */
1397 static bool
1398 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1399 gimple stmt)
1401 slp_instance new_instance;
1402 slp_tree node;
1403 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1404 unsigned int unrolling_factor = 1, nunits;
1405 tree vectype, scalar_type = NULL_TREE;
1406 gimple next;
1407 unsigned int vectorization_factor = 0;
1408 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1409 unsigned int max_nunits = 0;
1410 VEC (int, heap) *load_permutation;
1411 VEC (slp_tree, heap) *loads;
1412 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1413 bool loads_permuted = false;
1414 VEC (gimple, heap) *scalar_stmts;
1416 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1418 if (dr)
1420 scalar_type = TREE_TYPE (DR_REF (dr));
1421 vectype = get_vectype_for_scalar_type (scalar_type);
1423 else
1425 gcc_assert (loop_vinfo);
1426 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1429 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1431 else
1433 gcc_assert (loop_vinfo);
1434 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1435 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1438 if (!vectype)
1440 if (vect_print_dump_info (REPORT_SLP))
1442 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1443 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1446 return false;
1449 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1450 if (loop_vinfo)
1451 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1452 else
1453 vectorization_factor = nunits;
1455 /* Calculate the unrolling factor. */
1456 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1457 if (unrolling_factor != 1 && !loop_vinfo)
1459 if (vect_print_dump_info (REPORT_SLP))
1460 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1461 " block SLP");
1463 return false;
1466 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1467 scalar_stmts = VEC_alloc (gimple, heap, group_size);
1468 next = stmt;
1469 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1471 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1472 while (next)
1474 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1475 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1476 VEC_safe_push (gimple, heap, scalar_stmts,
1477 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1478 else
1479 VEC_safe_push (gimple, heap, scalar_stmts, next);
1480 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1483 else
1485 /* Collect reduction statements. */
1486 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1487 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1488 VEC_safe_push (gimple, heap, scalar_stmts, next);
1491 node = vect_create_new_slp_node (scalar_stmts);
1493 /* Calculate the number of vector stmts to create based on the unrolling
1494 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1495 GROUP_SIZE / NUNITS otherwise. */
1496 ncopies_for_cost = unrolling_factor * group_size / nunits;
1498 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1499 loads = VEC_alloc (slp_tree, heap, group_size);
1501 /* Build the tree for the SLP instance. */
1502 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1503 &inside_cost, &outside_cost, ncopies_for_cost,
1504 &max_nunits, &load_permutation, &loads,
1505 vectorization_factor, &loads_permuted))
1507 /* Calculate the unrolling factor based on the smallest type. */
1508 if (max_nunits > nunits)
1509 unrolling_factor = least_common_multiple (max_nunits, group_size)
1510 / group_size;
1512 if (unrolling_factor != 1 && !loop_vinfo)
1514 if (vect_print_dump_info (REPORT_SLP))
1515 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1516 " block SLP");
1517 return false;
1520 /* Create a new SLP instance. */
1521 new_instance = XNEW (struct _slp_instance);
1522 SLP_INSTANCE_TREE (new_instance) = node;
1523 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1524 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1525 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1526 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1527 SLP_INSTANCE_LOADS (new_instance) = loads;
1528 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1529 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1531 if (loads_permuted)
1533 if (!vect_supported_load_permutation_p (new_instance, group_size,
1534 load_permutation))
1536 if (vect_print_dump_info (REPORT_SLP))
1538 fprintf (vect_dump, "Build SLP failed: unsupported load "
1539 "permutation ");
1540 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1543 vect_free_slp_instance (new_instance);
1544 return false;
1547 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1548 = vect_find_first_load_in_slp_instance (new_instance);
1550 else
1551 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1553 if (loop_vinfo)
1554 VEC_safe_push (slp_instance, heap,
1555 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1556 new_instance);
1557 else
1558 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1559 new_instance);
1561 if (vect_print_dump_info (REPORT_SLP))
1562 vect_print_slp_tree (node);
1564 return true;
1567 /* Failed to SLP. */
1568 /* Free the allocated memory. */
1569 vect_free_slp_tree (node);
1570 VEC_free (int, heap, load_permutation);
1571 VEC_free (slp_tree, heap, loads);
1573 return false;
1577 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1578 trees of packed scalar stmts if SLP is possible. */
1580 bool
1581 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1583 unsigned int i;
1584 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1585 gimple first_element;
1586 bool ok = false;
1588 if (vect_print_dump_info (REPORT_SLP))
1589 fprintf (vect_dump, "=== vect_analyze_slp ===");
1591 if (loop_vinfo)
1593 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1594 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1595 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1597 else
1598 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1600 /* Find SLP sequences starting from groups of strided stores. */
1601 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1602 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1603 ok = true;
1605 if (bb_vinfo && !ok)
1607 if (vect_print_dump_info (REPORT_SLP))
1608 fprintf (vect_dump, "Failed to SLP the basic block.");
1610 return false;
1613 if (loop_vinfo
1614 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1616 /* Find SLP sequences starting from reduction chains. */
1617 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1618 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1619 ok = true;
1620 else
1621 return false;
1623 /* Don't try to vectorize SLP reductions if reduction chain was
1624 detected. */
1625 return ok;
1628 /* Find SLP sequences starting from groups of reductions. */
1629 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1630 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1631 VEC_index (gimple, reductions, 0)))
1632 ok = true;
1634 return true;
1638 /* For each possible SLP instance decide whether to SLP it and calculate overall
1639 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1640 least one instance. */
1642 bool
1643 vect_make_slp_decision (loop_vec_info loop_vinfo)
1645 unsigned int i, unrolling_factor = 1;
1646 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1647 slp_instance instance;
1648 int decided_to_slp = 0;
1650 if (vect_print_dump_info (REPORT_SLP))
1651 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1653 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1655 /* FORNOW: SLP if you can. */
1656 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1657 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1659 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1660 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1661 loop-based vectorization. Such stmts will be marked as HYBRID. */
1662 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1663 decided_to_slp++;
1666 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1668 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1669 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1670 decided_to_slp, unrolling_factor);
1672 return (decided_to_slp > 0);
1676 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1677 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1679 static void
1680 vect_detect_hybrid_slp_stmts (slp_tree node)
1682 int i;
1683 gimple stmt;
1684 imm_use_iterator imm_iter;
1685 gimple use_stmt;
1686 stmt_vec_info stmt_vinfo;
1687 slp_void_p child;
1689 if (!node)
1690 return;
1692 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1693 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1694 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1695 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1696 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1697 && !STMT_SLP_TYPE (stmt_vinfo)
1698 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1699 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1700 && !(gimple_code (use_stmt) == GIMPLE_PHI
1701 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1702 == vect_reduction_def))
1703 vect_mark_slp_stmts (node, hybrid, i);
1705 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1706 vect_detect_hybrid_slp_stmts ((slp_tree) child);
1710 /* Find stmts that must be both vectorized and SLPed. */
1712 void
1713 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1715 unsigned int i;
1716 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1717 slp_instance instance;
1719 if (vect_print_dump_info (REPORT_SLP))
1720 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1722 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1723 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1727 /* Create and initialize a new bb_vec_info struct for BB, as well as
1728 stmt_vec_info structs for all the stmts in it. */
1730 static bb_vec_info
1731 new_bb_vec_info (basic_block bb)
1733 bb_vec_info res = NULL;
1734 gimple_stmt_iterator gsi;
1736 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1737 BB_VINFO_BB (res) = bb;
1739 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1741 gimple stmt = gsi_stmt (gsi);
1742 gimple_set_uid (stmt, 0);
1743 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1746 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1747 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1749 bb->aux = res;
1750 return res;
1754 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1755 stmts in the basic block. */
1757 static void
1758 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1760 basic_block bb;
1761 gimple_stmt_iterator si;
1763 if (!bb_vinfo)
1764 return;
1766 bb = BB_VINFO_BB (bb_vinfo);
1768 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1770 gimple stmt = gsi_stmt (si);
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1773 if (stmt_info)
1774 /* Free stmt_vec_info. */
1775 free_stmt_vec_info (stmt);
1778 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1779 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1780 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1781 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1782 free (bb_vinfo);
1783 bb->aux = NULL;
1787 /* Analyze statements contained in SLP tree node after recursively analyzing
1788 the subtree. Return TRUE if the operations are supported. */
1790 static bool
1791 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1793 bool dummy;
1794 int i;
1795 gimple stmt;
1796 slp_void_p child;
1798 if (!node)
1799 return true;
1801 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1802 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1803 return false;
1805 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1808 gcc_assert (stmt_info);
1809 gcc_assert (PURE_SLP_STMT (stmt_info));
1811 if (!vect_analyze_stmt (stmt, &dummy, node))
1812 return false;
1815 return true;
1819 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1820 operations are supported. */
1822 static bool
1823 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1825 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1826 slp_instance instance;
1827 int i;
1829 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1831 if (!vect_slp_analyze_node_operations (bb_vinfo,
1832 SLP_INSTANCE_TREE (instance)))
1834 vect_free_slp_instance (instance);
1835 VEC_ordered_remove (slp_instance, slp_instances, i);
1837 else
1838 i++;
1841 if (!VEC_length (slp_instance, slp_instances))
1842 return false;
1844 return true;
1847 /* Check if vectorization of the basic block is profitable. */
1849 static bool
1850 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1852 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1853 slp_instance instance;
1854 int i;
1855 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1856 unsigned int stmt_cost;
1857 gimple stmt;
1858 gimple_stmt_iterator si;
1859 basic_block bb = BB_VINFO_BB (bb_vinfo);
1860 stmt_vec_info stmt_info = NULL;
1861 tree dummy_type = NULL;
1862 int dummy = 0;
1864 /* Calculate vector costs. */
1865 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1867 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1868 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1871 /* Calculate scalar cost. */
1872 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1874 stmt = gsi_stmt (si);
1875 stmt_info = vinfo_for_stmt (stmt);
1877 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1878 || !PURE_SLP_STMT (stmt_info))
1879 continue;
1881 if (STMT_VINFO_DATA_REF (stmt_info))
1883 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1884 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1885 (scalar_load, dummy_type, dummy);
1886 else
1887 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1888 (scalar_store, dummy_type, dummy);
1890 else
1891 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1892 (scalar_stmt, dummy_type, dummy);
1894 scalar_cost += stmt_cost;
1897 if (vect_print_dump_info (REPORT_COST))
1899 fprintf (vect_dump, "Cost model analysis: \n");
1900 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1901 vec_inside_cost);
1902 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1903 vec_outside_cost);
1904 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1907 /* Vectorization is profitable if its cost is less than the cost of scalar
1908 version. */
1909 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1910 return false;
1912 return true;
1915 /* Check if the basic block can be vectorized. */
1917 static bb_vec_info
1918 vect_slp_analyze_bb_1 (basic_block bb)
1920 bb_vec_info bb_vinfo;
1921 VEC (ddr_p, heap) *ddrs;
1922 VEC (slp_instance, heap) *slp_instances;
1923 slp_instance instance;
1924 int i;
1925 int min_vf = 2;
1926 int max_vf = MAX_VECTORIZATION_FACTOR;
1928 bb_vinfo = new_bb_vec_info (bb);
1929 if (!bb_vinfo)
1930 return NULL;
1932 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1934 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1935 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1936 "block.\n");
1938 destroy_bb_vec_info (bb_vinfo);
1939 return NULL;
1942 ddrs = BB_VINFO_DDRS (bb_vinfo);
1943 if (!VEC_length (ddr_p, ddrs))
1945 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1946 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1947 "block.\n");
1949 destroy_bb_vec_info (bb_vinfo);
1950 return NULL;
1953 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1954 || min_vf > max_vf)
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1957 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1958 "in basic block.\n");
1960 destroy_bb_vec_info (bb_vinfo);
1961 return NULL;
1964 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1967 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1968 "block.\n");
1970 destroy_bb_vec_info (bb_vinfo);
1971 return NULL;
1974 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1976 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1977 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1978 "block.\n");
1980 destroy_bb_vec_info (bb_vinfo);
1981 return NULL;
1984 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1986 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1987 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1988 "block.\n");
1990 destroy_bb_vec_info (bb_vinfo);
1991 return NULL;
1994 /* Check the SLP opportunities in the basic block, analyze and build SLP
1995 trees. */
1996 if (!vect_analyze_slp (NULL, bb_vinfo))
1998 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1999 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2000 "in basic block.\n");
2002 destroy_bb_vec_info (bb_vinfo);
2003 return NULL;
2006 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2008 /* Mark all the statements that we want to vectorize as pure SLP and
2009 relevant. */
2010 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2012 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2013 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2016 if (!vect_slp_analyze_operations (bb_vinfo))
2018 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2019 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2021 destroy_bb_vec_info (bb_vinfo);
2022 return NULL;
2025 /* Cost model: check if the vectorization is worthwhile. */
2026 if (flag_vect_cost_model
2027 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2029 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2030 fprintf (vect_dump, "not vectorized: vectorization is not "
2031 "profitable.\n");
2033 destroy_bb_vec_info (bb_vinfo);
2034 return NULL;
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
2040 return bb_vinfo;
2044 bb_vec_info
2045 vect_slp_analyze_bb (basic_block bb)
2047 bb_vec_info bb_vinfo;
2048 int insns = 0;
2049 gimple_stmt_iterator gsi;
2050 unsigned int vector_sizes;
2052 if (vect_print_dump_info (REPORT_DETAILS))
2053 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2055 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2057 gimple stmt = gsi_stmt (gsi);
2058 if (!is_gimple_debug (stmt)
2059 && !gimple_nop_p (stmt)
2060 && gimple_code (stmt) != GIMPLE_LABEL)
2061 insns++;
2064 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2067 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2068 "block.\n");
2070 return NULL;
2073 /* Autodetect first vector size we try. */
2074 current_vector_size = 0;
2075 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2077 while (1)
2079 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2080 if (bb_vinfo)
2081 return bb_vinfo;
2083 destroy_bb_vec_info (bb_vinfo);
2085 vector_sizes &= ~current_vector_size;
2086 if (vector_sizes == 0
2087 || current_vector_size == 0)
2088 return NULL;
2090 /* Try the next biggest vector size. */
2091 current_vector_size = 1 << floor_log2 (vector_sizes);
2092 if (vect_print_dump_info (REPORT_DETAILS))
2093 fprintf (vect_dump, "***** Re-trying analysis with "
2094 "vector size %d\n", current_vector_size);
2099 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2100 the number of created vector stmts depends on the unrolling factor).
2101 However, the actual number of vector stmts for every SLP node depends on
2102 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2103 should be updated. In this function we assume that the inside costs
2104 calculated in vect_model_xxx_cost are linear in ncopies. */
2106 void
2107 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2109 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2110 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2111 slp_instance instance;
2113 if (vect_print_dump_info (REPORT_SLP))
2114 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2116 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2117 /* We assume that costs are linear in ncopies. */
2118 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2119 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2123 /* For constant and loop invariant defs of SLP_NODE this function returns
2124 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2125 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2126 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2127 REDUC_INDEX is the index of the reduction operand in the statements, unless
2128 it is -1. */
2130 static void
2131 vect_get_constant_vectors (tree op, slp_tree slp_node,
2132 VEC (tree, heap) **vec_oprnds,
2133 unsigned int op_num, unsigned int number_of_vectors,
2134 int reduc_index)
2136 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2137 gimple stmt = VEC_index (gimple, stmts, 0);
2138 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2139 int nunits;
2140 tree vec_cst;
2141 tree t = NULL_TREE;
2142 int j, number_of_places_left_in_vector;
2143 tree vector_type;
2144 tree vop;
2145 int group_size = VEC_length (gimple, stmts);
2146 unsigned int vec_num, i;
2147 int number_of_copies = 1;
2148 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2149 bool constant_p, is_store;
2150 tree neutral_op = NULL;
2151 enum tree_code code = gimple_assign_rhs_code (stmt);
2152 gimple def_stmt;
2153 struct loop *loop;
2155 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2156 && reduc_index != -1)
2158 op_num = reduc_index - 1;
2159 op = gimple_op (stmt, reduc_index);
2160 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2161 we need either neutral operands or the original operands. See
2162 get_initial_def_for_reduction() for details. */
2163 switch (code)
2165 case WIDEN_SUM_EXPR:
2166 case DOT_PROD_EXPR:
2167 case PLUS_EXPR:
2168 case MINUS_EXPR:
2169 case BIT_IOR_EXPR:
2170 case BIT_XOR_EXPR:
2171 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2172 neutral_op = build_real (TREE_TYPE (op), dconst0);
2173 else
2174 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2176 break;
2178 case MULT_EXPR:
2179 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2180 neutral_op = build_real (TREE_TYPE (op), dconst1);
2181 else
2182 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2184 break;
2186 case BIT_AND_EXPR:
2187 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2188 break;
2190 case MAX_EXPR:
2191 case MIN_EXPR:
2192 def_stmt = SSA_NAME_DEF_STMT (op);
2193 loop = (gimple_bb (stmt))->loop_father;
2194 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2195 loop_preheader_edge (loop));
2196 break;
2198 default:
2199 neutral_op = NULL;
2203 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2205 is_store = true;
2206 op = gimple_assign_rhs1 (stmt);
2208 else
2209 is_store = false;
2211 gcc_assert (op);
2213 if (CONSTANT_CLASS_P (op))
2214 constant_p = true;
2215 else
2216 constant_p = false;
2218 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2219 gcc_assert (vector_type);
2220 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2222 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2223 created vectors. It is greater than 1 if unrolling is performed.
2225 For example, we have two scalar operands, s1 and s2 (e.g., group of
2226 strided accesses of size two), while NUNITS is four (i.e., four scalars
2227 of this type can be packed in a vector). The output vector will contain
2228 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2229 will be 2).
2231 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2232 containing the operands.
2234 For example, NUNITS is four as before, and the group size is 8
2235 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2236 {s5, s6, s7, s8}. */
2238 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2240 number_of_places_left_in_vector = nunits;
2241 for (j = 0; j < number_of_copies; j++)
2243 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2245 if (is_store)
2246 op = gimple_assign_rhs1 (stmt);
2247 else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
2248 op = gimple_op (stmt, op_num + 1);
2249 else
2251 if (op_num == 0 || op_num == 1)
2253 tree cond = gimple_assign_rhs1 (stmt);
2254 op = TREE_OPERAND (cond, op_num);
2256 else
2258 if (op_num == 2)
2259 op = gimple_assign_rhs2 (stmt);
2260 else
2261 op = gimple_assign_rhs3 (stmt);
2265 if (reduc_index != -1)
2267 loop = (gimple_bb (stmt))->loop_father;
2268 def_stmt = SSA_NAME_DEF_STMT (op);
2270 gcc_assert (loop);
2272 /* Get the def before the loop. In reduction chain we have only
2273 one initial value. */
2274 if ((j != (number_of_copies - 1)
2275 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2276 && i != 0))
2277 && neutral_op)
2278 op = neutral_op;
2279 else
2280 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2281 loop_preheader_edge (loop));
2284 /* Create 'vect_ = {op0,op1,...,opn}'. */
2285 t = tree_cons (NULL_TREE, op, t);
2287 number_of_places_left_in_vector--;
2289 if (number_of_places_left_in_vector == 0)
2291 number_of_places_left_in_vector = nunits;
2293 if (constant_p)
2294 vec_cst = build_vector (vector_type, t);
2295 else
2296 vec_cst = build_constructor_from_list (vector_type, t);
2297 VEC_quick_push (tree, voprnds,
2298 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2299 t = NULL_TREE;
2304 /* Since the vectors are created in the reverse order, we should invert
2305 them. */
2306 vec_num = VEC_length (tree, voprnds);
2307 for (j = vec_num - 1; j >= 0; j--)
2309 vop = VEC_index (tree, voprnds, j);
2310 VEC_quick_push (tree, *vec_oprnds, vop);
2313 VEC_free (tree, heap, voprnds);
2315 /* In case that VF is greater than the unrolling factor needed for the SLP
2316 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2317 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2318 to replicate the vectors. */
2319 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2321 tree neutral_vec = NULL;
2323 if (neutral_op)
2325 if (!neutral_vec)
2326 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2328 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2330 else
2332 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2333 VEC_quick_push (tree, *vec_oprnds, vop);
2339 /* Get vectorized definitions from SLP_NODE that contains corresponding
2340 vectorized def-stmts. */
2342 static void
2343 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2345 tree vec_oprnd;
2346 gimple vec_def_stmt;
2347 unsigned int i;
2349 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2351 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2353 gcc_assert (vec_def_stmt);
2354 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2355 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2360 /* Get vectorized definitions for SLP_NODE.
2361 If the scalar definitions are loop invariants or constants, collect them and
2362 call vect_get_constant_vectors() to create vector stmts.
2363 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2364 must be stored in the corresponding child of SLP_NODE, and we call
2365 vect_get_slp_vect_defs () to retrieve them. */
2367 void
2368 vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2369 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
2371 gimple first_stmt, first_def;
2372 int number_of_vects = 0, i;
2373 unsigned int child_index = 0;
2374 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2375 slp_tree child = NULL;
2376 VEC (tree, heap) *vec_defs;
2377 tree oprnd, def_lhs;
2378 bool vectorized_defs;
2380 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2381 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
2383 /* For each operand we check if it has vectorized definitions in a child
2384 node or we need to create them (for invariants and constants). We
2385 check if the LHS of the first stmt of the next child matches OPRND.
2386 If it does, we found the correct child. Otherwise, we call
2387 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2388 to check this child node for the next operand. */
2389 vectorized_defs = false;
2390 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
2392 child = (slp_tree) VEC_index (slp_void_p,
2393 SLP_TREE_CHILDREN (slp_node),
2394 child_index);
2395 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2397 /* In the end of a pattern sequence we have a use of the original stmt,
2398 so we need to compare OPRND with the original def. */
2399 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2400 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2401 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2402 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2404 if (is_gimple_call (first_def))
2405 def_lhs = gimple_call_lhs (first_def);
2406 else
2407 def_lhs = gimple_assign_lhs (first_def);
2409 if (operand_equal_p (oprnd, def_lhs, 0))
2411 /* The number of vector defs is determined by the number of
2412 vector statements in the node from which we get those
2413 statements. */
2414 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2415 vectorized_defs = true;
2416 child_index++;
2420 if (!vectorized_defs)
2422 if (i == 0)
2424 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2425 /* Number of vector stmts was calculated according to LHS in
2426 vect_schedule_slp_instance (), fix it by replacing LHS with
2427 RHS, if necessary. See vect_get_smallest_scalar_type () for
2428 details. */
2429 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2430 &rhs_size_unit);
2431 if (rhs_size_unit != lhs_size_unit)
2433 number_of_vects *= rhs_size_unit;
2434 number_of_vects /= lhs_size_unit;
2439 /* Allocate memory for vectorized defs. */
2440 vec_defs = VEC_alloc (tree, heap, number_of_vects);
2442 /* For reduction defs we call vect_get_constant_vectors (), since we are
2443 looking for initial loop invariant values. */
2444 if (vectorized_defs && reduc_index == -1)
2445 /* The defs are already vectorized. */
2446 vect_get_slp_vect_defs (child, &vec_defs);
2447 else
2448 /* Build vectors from scalar defs. */
2449 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2450 number_of_vects, reduc_index);
2452 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
2454 /* For reductions, we only need initial values. */
2455 if (reduc_index != -1)
2456 return;
2461 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2462 building a vector of type MASK_TYPE from it) and two input vectors placed in
2463 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2464 shifting by STRIDE elements of DR_CHAIN for every copy.
2465 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2466 copies).
2467 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2468 the created stmts must be inserted. */
2470 static inline void
2471 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2472 tree mask, int first_vec_indx, int second_vec_indx,
2473 gimple_stmt_iterator *gsi, slp_tree node,
2474 tree vectype, VEC(tree,heap) *dr_chain,
2475 int ncopies, int vect_stmts_counter)
2477 tree perm_dest;
2478 gimple perm_stmt = NULL;
2479 stmt_vec_info next_stmt_info;
2480 int i, stride;
2481 tree first_vec, second_vec, data_ref;
2483 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2485 /* Initialize the vect stmts of NODE to properly insert the generated
2486 stmts later. */
2487 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2488 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2489 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2491 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2492 for (i = 0; i < ncopies; i++)
2494 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2495 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2497 /* Generate the permute statement. */
2498 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2499 first_vec, second_vec, mask);
2500 data_ref = make_ssa_name (perm_dest, perm_stmt);
2501 gimple_set_lhs (perm_stmt, data_ref);
2502 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2504 /* Store the vector statement in NODE. */
2505 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2506 stride * i + vect_stmts_counter, perm_stmt);
2508 first_vec_indx += stride;
2509 second_vec_indx += stride;
2512 /* Mark the scalar stmt as vectorized. */
2513 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2514 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2518 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2519 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2520 representation. Check that the mask is valid and return FALSE if not.
2521 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2522 the next vector, i.e., the current first vector is not needed. */
2524 static bool
2525 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2526 int mask_nunits, bool only_one_vec, int index,
2527 unsigned char *mask, int *current_mask_element,
2528 bool *need_next_vector, int *number_of_mask_fixes,
2529 bool *mask_fixed, bool *needs_first_vector)
2531 int i;
2533 /* Convert to target specific representation. */
2534 *current_mask_element = first_mask_element + m;
2535 /* Adjust the value in case it's a mask for second and third vectors. */
2536 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2538 if (*current_mask_element < mask_nunits)
2539 *needs_first_vector = true;
2541 /* We have only one input vector to permute but the mask accesses values in
2542 the next vector as well. */
2543 if (only_one_vec && *current_mask_element >= mask_nunits)
2545 if (vect_print_dump_info (REPORT_DETAILS))
2547 fprintf (vect_dump, "permutation requires at least two vectors ");
2548 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2551 return false;
2554 /* The mask requires the next vector. */
2555 if (*current_mask_element >= mask_nunits * 2)
2557 if (*needs_first_vector || *mask_fixed)
2559 /* We either need the first vector too or have already moved to the
2560 next vector. In both cases, this permutation needs three
2561 vectors. */
2562 if (vect_print_dump_info (REPORT_DETAILS))
2564 fprintf (vect_dump, "permutation requires at "
2565 "least three vectors ");
2566 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2569 return false;
2572 /* We move to the next vector, dropping the first one and working with
2573 the second and the third - we need to adjust the values of the mask
2574 accordingly. */
2575 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2577 for (i = 0; i < index; i++)
2578 mask[i] -= mask_nunits * *number_of_mask_fixes;
2580 (*number_of_mask_fixes)++;
2581 *mask_fixed = true;
2584 *need_next_vector = *mask_fixed;
2586 /* This was the last element of this mask. Start a new one. */
2587 if (index == mask_nunits - 1)
2589 *number_of_mask_fixes = 1;
2590 *mask_fixed = false;
2591 *needs_first_vector = false;
2594 return true;
2598 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2599 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2600 permute statements for SLP_NODE_INSTANCE. */
2601 bool
2602 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2603 gimple_stmt_iterator *gsi, int vf,
2604 slp_instance slp_node_instance, bool analyze_only)
2606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2607 tree mask_element_type = NULL_TREE, mask_type;
2608 int i, j, k, nunits, vec_index = 0, scalar_index;
2609 slp_tree node;
2610 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2611 gimple next_scalar_stmt;
2612 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2613 int first_mask_element;
2614 int index, unroll_factor, current_mask_element, ncopies;
2615 unsigned char *mask;
2616 bool only_one_vec = false, need_next_vector = false;
2617 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2618 int number_of_mask_fixes = 1;
2619 bool mask_fixed = false;
2620 bool needs_first_vector = false;
2621 enum machine_mode mode;
2623 mode = TYPE_MODE (vectype);
2625 if (!can_vec_perm_p (mode, false, NULL))
2627 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "no vect permute for ");
2630 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2632 return false;
2635 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2636 same size as the vector element being permuted. */
2637 mask_element_type
2638 = lang_hooks.types.type_for_size
2639 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2640 mask_type = get_vectype_for_scalar_type (mask_element_type);
2641 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2642 mask = XALLOCAVEC (unsigned char, nunits);
2643 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2645 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2646 unrolling factor. */
2647 orig_vec_stmts_num = group_size *
2648 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2649 if (orig_vec_stmts_num == 1)
2650 only_one_vec = true;
2652 /* Number of copies is determined by the final vectorization factor
2653 relatively to SLP_NODE_INSTANCE unrolling factor. */
2654 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2656 /* Generate permutation masks for every NODE. Number of masks for each NODE
2657 is equal to GROUP_SIZE.
2658 E.g., we have a group of three nodes with three loads from the same
2659 location in each node, and the vector size is 4. I.e., we have a
2660 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2661 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2662 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2665 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2666 The last mask is illegal since we assume two operands for permute
2667 operation, and the mask element values can't be outside that range.
2668 Hence, the last mask must be converted into {2,5,5,5}.
2669 For the first two permutations we need the first and the second input
2670 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2671 we need the second and the third vectors: {b1,c1,a2,b2} and
2672 {c2,a3,b3,c3}. */
2674 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2676 scalar_index = 0;
2677 index = 0;
2678 vect_stmts_counter = 0;
2679 vec_index = 0;
2680 first_vec_index = vec_index++;
2681 if (only_one_vec)
2682 second_vec_index = first_vec_index;
2683 else
2684 second_vec_index = vec_index++;
2686 for (j = 0; j < unroll_factor; j++)
2688 for (k = 0; k < group_size; k++)
2690 first_mask_element = i + j * group_size;
2691 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2692 nunits, only_one_vec, index,
2693 mask, &current_mask_element,
2694 &need_next_vector,
2695 &number_of_mask_fixes, &mask_fixed,
2696 &needs_first_vector))
2697 return false;
2698 mask[index++] = current_mask_element;
2700 if (index == nunits)
2702 tree mask_vec = NULL;
2704 if (!can_vec_perm_p (mode, false, mask))
2706 if (vect_print_dump_info (REPORT_DETAILS))
2708 fprintf (vect_dump, "unsupported vect permute { ");
2709 for (i = 0; i < nunits; ++i)
2710 fprintf (vect_dump, "%d ", mask[i]);
2711 fprintf (vect_dump, "}\n");
2713 return false;
2716 while (--index >= 0)
2718 tree t = build_int_cst (mask_element_type, mask[index]);
2719 mask_vec = tree_cons (NULL, t, mask_vec);
2721 mask_vec = build_vector (mask_type, mask_vec);
2722 index = 0;
2724 if (!analyze_only)
2726 if (need_next_vector)
2728 first_vec_index = second_vec_index;
2729 second_vec_index = vec_index;
2732 next_scalar_stmt = VEC_index (gimple,
2733 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2735 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2736 mask_vec, first_vec_index, second_vec_index,
2737 gsi, node, vectype, dr_chain,
2738 ncopies, vect_stmts_counter++);
2745 return true;
2750 /* Vectorize SLP instance tree in postorder. */
2752 static bool
2753 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2754 unsigned int vectorization_factor)
2756 gimple stmt;
2757 bool strided_store, is_store;
2758 gimple_stmt_iterator si;
2759 stmt_vec_info stmt_info;
2760 unsigned int vec_stmts_size, nunits, group_size;
2761 tree vectype;
2762 int i;
2763 slp_tree loads_node;
2764 slp_void_p child;
2766 if (!node)
2767 return false;
2769 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2770 vect_schedule_slp_instance ((slp_tree) child, instance,
2771 vectorization_factor);
2773 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2774 stmt_info = vinfo_for_stmt (stmt);
2776 /* VECTYPE is the type of the destination. */
2777 vectype = STMT_VINFO_VECTYPE (stmt_info);
2778 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2779 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2781 /* For each SLP instance calculate number of vector stmts to be created
2782 for the scalar stmts in each node of the SLP tree. Number of vector
2783 elements in one vector iteration is the number of scalar elements in
2784 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2785 size. */
2786 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2788 /* In case of load permutation we have to allocate vectorized statements for
2789 all the nodes that participate in that permutation. */
2790 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2792 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2794 if (!SLP_TREE_VEC_STMTS (loads_node))
2796 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2797 vec_stmts_size);
2798 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2803 if (!SLP_TREE_VEC_STMTS (node))
2805 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2806 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2809 if (vect_print_dump_info (REPORT_DETAILS))
2811 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2812 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2815 /* Loads should be inserted before the first load. */
2816 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2817 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2818 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2819 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2820 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2821 else if (is_pattern_stmt_p (stmt_info))
2822 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2823 else
2824 si = gsi_for_stmt (stmt);
2826 /* Stores should be inserted just before the last store. */
2827 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2828 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2830 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2831 si = gsi_for_stmt (last_store);
2834 /* Mark the first element of the reduction chain as reduction to properly
2835 transform the node. In the analysis phase only the last element of the
2836 chain is marked as reduction. */
2837 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2838 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2840 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2841 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2844 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2845 return is_store;
2849 /* Generate vector code for all SLP instances in the loop/basic block. */
2851 bool
2852 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2854 VEC (slp_instance, heap) *slp_instances;
2855 slp_instance instance;
2856 unsigned int i, vf;
2857 bool is_store = false;
2859 if (loop_vinfo)
2861 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2862 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2864 else
2866 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2867 vf = 1;
2870 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2872 /* Schedule the tree of INSTANCE. */
2873 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2874 instance, vf);
2875 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2876 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2877 fprintf (vect_dump, "vectorizing stmts using SLP.");
2880 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2882 slp_tree root = SLP_INSTANCE_TREE (instance);
2883 gimple store;
2884 unsigned int j;
2885 gimple_stmt_iterator gsi;
2887 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2888 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2890 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2891 break;
2893 /* Free the attached stmt_vec_info and remove the stmt. */
2894 gsi = gsi_for_stmt (store);
2895 gsi_remove (&gsi, true);
2896 free_stmt_vec_info (store);
2900 return is_store;
2904 /* Vectorize the basic block. */
2906 void
2907 vect_slp_transform_bb (basic_block bb)
2909 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2910 gimple_stmt_iterator si;
2912 gcc_assert (bb_vinfo);
2914 if (vect_print_dump_info (REPORT_DETAILS))
2915 fprintf (vect_dump, "SLPing BB\n");
2917 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2919 gimple stmt = gsi_stmt (si);
2920 stmt_vec_info stmt_info;
2922 if (vect_print_dump_info (REPORT_DETAILS))
2924 fprintf (vect_dump, "------>SLPing statement: ");
2925 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2928 stmt_info = vinfo_for_stmt (stmt);
2929 gcc_assert (stmt_info);
2931 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2932 if (STMT_SLP_TYPE (stmt_info))
2934 vect_schedule_slp (NULL, bb_vinfo);
2935 break;
2939 mark_sym_for_renaming (gimple_vop (cfun));
2940 /* The memory tags and pointers in vectorized statements need to
2941 have their SSA forms updated. FIXME, why can't this be delayed
2942 until all the loops have been transformed? */
2943 update_ssa (TODO_update_ssa);
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2948 destroy_bb_vec_info (bb_vinfo);