Fix gnu11 fallout on SPARC
[official-gcc.git] / gcc / tree-vect-slp.c
blob81017fbfd8ca864e6a6cd161b2e752f253b696b8
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "gimple-expr.h"
35 #include "is-a.h"
36 #include "gimple.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "tree-pass.h"
44 #include "cfgloop.h"
45 #include "expr.h"
46 #include "recog.h" /* FIXME: for insn_data */
47 #include "optabs.h"
48 #include "tree-vectorizer.h"
49 #include "langhooks.h"
51 /* Extract the location of the basic block in the source code.
52 Return the basic block location if succeed and NULL if not. */
54 source_location
55 find_bb_location (basic_block bb)
57 gimple stmt = NULL;
58 gimple_stmt_iterator si;
60 if (!bb)
61 return UNKNOWN_LOCATION;
63 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
65 stmt = gsi_stmt (si);
66 if (gimple_location (stmt) != UNKNOWN_LOCATION)
67 return gimple_location (stmt);
70 return UNKNOWN_LOCATION;
74 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
76 static void
77 vect_free_slp_tree (slp_tree node)
79 int i;
80 slp_tree child;
82 if (!node)
83 return;
85 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
86 vect_free_slp_tree (child);
88 SLP_TREE_CHILDREN (node).release ();
89 SLP_TREE_SCALAR_STMTS (node).release ();
90 SLP_TREE_VEC_STMTS (node).release ();
91 SLP_TREE_LOAD_PERMUTATION (node).release ();
93 free (node);
97 /* Free the memory allocated for the SLP instance. */
99 void
100 vect_free_slp_instance (slp_instance instance)
102 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
103 SLP_INSTANCE_LOADS (instance).release ();
104 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
105 free (instance);
109 /* Create an SLP node for SCALAR_STMTS. */
111 static slp_tree
112 vect_create_new_slp_node (vec<gimple> scalar_stmts)
114 slp_tree node;
115 gimple stmt = scalar_stmts[0];
116 unsigned int nops;
118 if (is_gimple_call (stmt))
119 nops = gimple_call_num_args (stmt);
120 else if (is_gimple_assign (stmt))
122 nops = gimple_num_ops (stmt) - 1;
123 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
124 nops++;
126 else
127 return NULL;
129 node = XNEW (struct _slp_tree);
130 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
131 SLP_TREE_VEC_STMTS (node).create (0);
132 SLP_TREE_CHILDREN (node).create (nops);
133 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
135 return node;
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140 operand. */
141 static vec<slp_oprnd_info>
142 vect_create_oprnd_info (int nops, int group_size)
144 int i;
145 slp_oprnd_info oprnd_info;
146 vec<slp_oprnd_info> oprnds_info;
148 oprnds_info.create (nops);
149 for (i = 0; i < nops; i++)
151 oprnd_info = XNEW (struct _slp_oprnd_info);
152 oprnd_info->def_stmts.create (group_size);
153 oprnd_info->first_dt = vect_uninitialized_def;
154 oprnd_info->first_op_type = NULL_TREE;
155 oprnd_info->first_pattern = false;
156 oprnds_info.quick_push (oprnd_info);
159 return oprnds_info;
163 /* Free operands info. */
165 static void
166 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
168 int i;
169 slp_oprnd_info oprnd_info;
171 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
173 oprnd_info->def_stmts.release ();
174 XDELETE (oprnd_info);
177 oprnds_info.release ();
181 /* Find the place of the data-ref in STMT in the interleaving chain that starts
182 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
184 static int
185 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
187 gimple next_stmt = first_stmt;
188 int result = 0;
190 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191 return -1;
195 if (next_stmt == stmt)
196 return result;
197 result++;
198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
200 while (next_stmt);
202 return -1;
206 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
207 they are of a valid type and that they match the defs of the first stmt of
208 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
209 return -1, if the error could be corrected by swapping operands of the
210 operation return 1, if everything is ok return 0. */
212 static int
213 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
214 gimple stmt, bool first,
215 vec<slp_oprnd_info> *oprnds_info)
217 tree oprnd;
218 unsigned int i, number_of_oprnds;
219 tree def;
220 gimple def_stmt;
221 enum vect_def_type dt = vect_uninitialized_def;
222 struct loop *loop = NULL;
223 bool pattern = false;
224 slp_oprnd_info oprnd_info;
225 int first_op_idx = 1;
226 bool commutative = false;
227 bool first_op_cond = false;
229 if (loop_vinfo)
230 loop = LOOP_VINFO_LOOP (loop_vinfo);
232 if (is_gimple_call (stmt))
234 number_of_oprnds = gimple_call_num_args (stmt);
235 first_op_idx = 3;
237 else if (is_gimple_assign (stmt))
239 enum tree_code code = gimple_assign_rhs_code (stmt);
240 number_of_oprnds = gimple_num_ops (stmt) - 1;
241 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
243 first_op_cond = true;
244 commutative = true;
245 number_of_oprnds++;
247 else
248 commutative = commutative_tree_code (code);
250 else
251 return -1;
253 bool swapped = false;
254 for (i = 0; i < number_of_oprnds; i++)
256 again:
257 if (first_op_cond)
259 if (i == 0 || i == 1)
260 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261 swapped ? !i : i);
262 else
263 oprnd = gimple_op (stmt, first_op_idx + i - 1);
265 else
266 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
268 oprnd_info = (*oprnds_info)[i];
270 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
271 &def, &dt)
272 || (!def_stmt && dt != vect_constant_def))
274 if (dump_enabled_p ())
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "Build SLP failed: can't find def for ");
278 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
279 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
282 return -1;
285 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
286 from the pattern. Check that all the stmts of the node are in the
287 pattern. */
288 if (def_stmt && gimple_bb (def_stmt)
289 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
290 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
291 && gimple_code (def_stmt) != GIMPLE_PHI))
292 && vinfo_for_stmt (def_stmt)
293 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
294 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
295 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
297 pattern = true;
298 if (!first && !oprnd_info->first_pattern)
300 if (i == 0
301 && !swapped
302 && commutative)
304 swapped = true;
305 goto again;
308 if (dump_enabled_p ())
310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311 "Build SLP failed: some of the stmts"
312 " are in a pattern, and others are not ");
313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
317 return 1;
320 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
321 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
323 if (dt == vect_unknown_def_type)
325 if (dump_enabled_p ())
326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327 "Unsupported pattern.\n");
328 return -1;
331 switch (gimple_code (def_stmt))
333 case GIMPLE_PHI:
334 def = gimple_phi_result (def_stmt);
335 break;
337 case GIMPLE_ASSIGN:
338 def = gimple_assign_lhs (def_stmt);
339 break;
341 default:
342 if (dump_enabled_p ())
343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
344 "unsupported defining stmt:\n");
345 return -1;
349 if (first)
351 oprnd_info->first_dt = dt;
352 oprnd_info->first_pattern = pattern;
353 oprnd_info->first_op_type = TREE_TYPE (oprnd);
355 else
357 /* Not first stmt of the group, check that the def-stmt/s match
358 the def-stmt/s of the first stmt. Allow different definition
359 types for reduction chains: the first stmt must be a
360 vect_reduction_def (a phi node), and the rest
361 vect_internal_def. */
362 if (((oprnd_info->first_dt != dt
363 && !(oprnd_info->first_dt == vect_reduction_def
364 && dt == vect_internal_def)
365 && !((oprnd_info->first_dt == vect_external_def
366 || oprnd_info->first_dt == vect_constant_def)
367 && (dt == vect_external_def
368 || dt == vect_constant_def)))
369 || !types_compatible_p (oprnd_info->first_op_type,
370 TREE_TYPE (oprnd))))
372 /* Try swapping operands if we got a mismatch. */
373 if (i == 0
374 && !swapped
375 && commutative)
377 swapped = true;
378 goto again;
381 if (dump_enabled_p ())
382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
383 "Build SLP failed: different types\n");
385 return 1;
389 /* Check the types of the definitions. */
390 switch (dt)
392 case vect_constant_def:
393 case vect_external_def:
394 case vect_reduction_def:
395 break;
397 case vect_internal_def:
398 oprnd_info->def_stmts.quick_push (def_stmt);
399 break;
401 default:
402 /* FORNOW: Not supported. */
403 if (dump_enabled_p ())
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
406 "Build SLP failed: illegal type of def ");
407 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
408 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
411 return -1;
415 /* Swap operands. */
416 if (swapped)
418 if (first_op_cond)
420 tree cond = gimple_assign_rhs1 (stmt);
421 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
422 &TREE_OPERAND (cond, 1));
423 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
425 else
426 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
427 gimple_assign_rhs2_ptr (stmt));
430 return 0;
434 /* Verify if the scalar stmts STMTS are isomorphic, require data
435 permutation or are of unsupported types of operation. Return
436 true if they are, otherwise return false and indicate in *MATCHES
437 which stmts are not isomorphic to the first one. If MATCHES[0]
438 is false then this indicates the comparison could not be
439 carried out or the stmts will never be vectorized by SLP. */
441 static bool
442 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
443 vec<gimple> stmts, unsigned int group_size,
444 unsigned nops, unsigned int *max_nunits,
445 unsigned int vectorization_factor, bool *matches)
447 unsigned int i;
448 gimple stmt = stmts[0];
449 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
450 enum tree_code first_cond_code = ERROR_MARK;
451 tree lhs;
452 bool need_same_oprnds = false;
453 tree vectype, scalar_type, first_op1 = NULL_TREE;
454 optab optab;
455 int icode;
456 enum machine_mode optab_op2_mode;
457 enum machine_mode vec_mode;
458 struct data_reference *first_dr;
459 HOST_WIDE_INT dummy;
460 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
461 tree cond;
463 /* For every stmt in NODE find its def stmt/s. */
464 FOR_EACH_VEC_ELT (stmts, i, stmt)
466 matches[i] = false;
468 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
471 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
472 dump_printf (MSG_NOTE, "\n");
475 /* Fail to vectorize statements marked as unvectorizable. */
476 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
478 if (dump_enabled_p ())
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
481 "Build SLP failed: unvectorizable statement ");
482 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
483 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
485 /* Fatal mismatch. */
486 matches[0] = false;
487 return false;
490 lhs = gimple_get_lhs (stmt);
491 if (lhs == NULL_TREE)
493 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: not GIMPLE_ASSIGN nor "
497 "GIMPLE_CALL ");
498 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
499 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
501 /* Fatal mismatch. */
502 matches[0] = false;
503 return false;
506 if (is_gimple_assign (stmt)
507 && gimple_assign_rhs_code (stmt) == COND_EXPR
508 && (cond = gimple_assign_rhs1 (stmt))
509 && !COMPARISON_CLASS_P (cond))
511 if (dump_enabled_p ())
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 "Build SLP failed: condition is not "
515 "comparison ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
519 /* Fatal mismatch. */
520 matches[0] = false;
521 return false;
524 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
525 vectype = get_vectype_for_scalar_type (scalar_type);
526 if (!vectype)
528 if (dump_enabled_p ())
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: unsupported data-type ");
532 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
533 scalar_type);
534 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
536 /* Fatal mismatch. */
537 matches[0] = false;
538 return false;
541 /* In case of multiple types we need to detect the smallest type. */
542 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
544 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
545 if (bb_vinfo)
546 vectorization_factor = *max_nunits;
549 if (is_gimple_call (stmt))
551 rhs_code = CALL_EXPR;
552 if (gimple_call_internal_p (stmt)
553 || gimple_call_tail_p (stmt)
554 || gimple_call_noreturn_p (stmt)
555 || !gimple_call_nothrow_p (stmt)
556 || gimple_call_chain (stmt))
558 if (dump_enabled_p ())
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported call type ");
562 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
563 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
565 /* Fatal mismatch. */
566 matches[0] = false;
567 return false;
570 else
571 rhs_code = gimple_assign_rhs_code (stmt);
573 /* Check the operation. */
574 if (i == 0)
576 first_stmt_code = rhs_code;
578 /* Shift arguments should be equal in all the packed stmts for a
579 vector shift with scalar shift operand. */
580 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
581 || rhs_code == LROTATE_EXPR
582 || rhs_code == RROTATE_EXPR)
584 vec_mode = TYPE_MODE (vectype);
586 /* First see if we have a vector/vector shift. */
587 optab = optab_for_tree_code (rhs_code, vectype,
588 optab_vector);
590 if (!optab
591 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
593 /* No vector/vector shift, try for a vector/scalar shift. */
594 optab = optab_for_tree_code (rhs_code, vectype,
595 optab_scalar);
597 if (!optab)
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
601 "Build SLP failed: no optab.\n");
602 /* Fatal mismatch. */
603 matches[0] = false;
604 return false;
606 icode = (int) optab_handler (optab, vec_mode);
607 if (icode == CODE_FOR_nothing)
609 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: "
612 "op not supported by target.\n");
613 /* Fatal mismatch. */
614 matches[0] = false;
615 return false;
617 optab_op2_mode = insn_data[icode].operand[2].mode;
618 if (!VECTOR_MODE_P (optab_op2_mode))
620 need_same_oprnds = true;
621 first_op1 = gimple_assign_rhs2 (stmt);
625 else if (rhs_code == WIDEN_LSHIFT_EXPR)
627 need_same_oprnds = true;
628 first_op1 = gimple_assign_rhs2 (stmt);
631 else
633 if (first_stmt_code != rhs_code
634 && (first_stmt_code != IMAGPART_EXPR
635 || rhs_code != REALPART_EXPR)
636 && (first_stmt_code != REALPART_EXPR
637 || rhs_code != IMAGPART_EXPR)
638 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
639 && (first_stmt_code == ARRAY_REF
640 || first_stmt_code == BIT_FIELD_REF
641 || first_stmt_code == INDIRECT_REF
642 || first_stmt_code == COMPONENT_REF
643 || first_stmt_code == MEM_REF)))
645 if (dump_enabled_p ())
647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
648 "Build SLP failed: different operation "
649 "in stmt ");
650 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
651 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
653 /* Mismatch. */
654 continue;
657 if (need_same_oprnds
658 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
663 "Build SLP failed: different shift "
664 "arguments in ");
665 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
666 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
668 /* Mismatch. */
669 continue;
672 if (rhs_code == CALL_EXPR)
674 gimple first_stmt = stmts[0];
675 if (gimple_call_num_args (stmt) != nops
676 || !operand_equal_p (gimple_call_fn (first_stmt),
677 gimple_call_fn (stmt), 0)
678 || gimple_call_fntype (first_stmt)
679 != gimple_call_fntype (stmt))
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
684 "Build SLP failed: different calls in ");
685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
686 stmt, 0);
687 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
689 /* Mismatch. */
690 continue;
695 /* Grouped store or load. */
696 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
698 if (REFERENCE_CLASS_P (lhs))
700 /* Store. */
703 else
705 /* Load. */
706 unsigned unrolling_factor
707 = least_common_multiple
708 (*max_nunits, group_size) / group_size;
709 /* FORNOW: Check that there is no gap between the loads
710 and no gap between the groups when we need to load
711 multiple groups at once.
712 ??? We should enhance this to only disallow gaps
713 inside vectors. */
714 if ((unrolling_factor > 1
715 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
716 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
717 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
718 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
720 if (dump_enabled_p ())
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
723 "Build SLP failed: grouped "
724 "loads have gaps ");
725 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
726 stmt, 0);
727 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
729 /* Fatal mismatch. */
730 matches[0] = false;
731 return false;
734 /* Check that the size of interleaved loads group is not
735 greater than the SLP group size. */
736 unsigned ncopies
737 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
738 if (loop_vinfo
739 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
740 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
741 - GROUP_GAP (vinfo_for_stmt (stmt)))
742 > ncopies * group_size))
744 if (dump_enabled_p ())
746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
747 "Build SLP failed: the number "
748 "of interleaved loads is greater than "
749 "the SLP group size ");
750 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
751 stmt, 0);
752 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
754 /* Fatal mismatch. */
755 matches[0] = false;
756 return false;
759 old_first_load = first_load;
760 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
761 if (prev_first_load)
763 /* Check that there are no loads from different interleaving
764 chains in the same node. */
765 if (prev_first_load != first_load)
767 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
770 vect_location,
771 "Build SLP failed: different "
772 "interleaving chains in one node ");
773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
774 stmt, 0);
775 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
777 /* Mismatch. */
778 continue;
781 else
782 prev_first_load = first_load;
784 /* In some cases a group of loads is just the same load
785 repeated N times. Only analyze its cost once. */
786 if (first_load == stmt && old_first_load != first_load)
788 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
789 if (vect_supportable_dr_alignment (first_dr, false)
790 == dr_unaligned_unsupported)
792 if (dump_enabled_p ())
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
795 vect_location,
796 "Build SLP failed: unsupported "
797 "unaligned load ");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
800 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
802 /* Fatal mismatch. */
803 matches[0] = false;
804 return false;
808 } /* Grouped access. */
809 else
811 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
813 /* Not grouped load. */
814 if (dump_enabled_p ())
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: not grouped load ");
818 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
819 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
822 /* FORNOW: Not grouped loads are not supported. */
823 /* Fatal mismatch. */
824 matches[0] = false;
825 return false;
828 /* Not memory operation. */
829 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
830 && TREE_CODE_CLASS (rhs_code) != tcc_unary
831 && rhs_code != COND_EXPR
832 && rhs_code != CALL_EXPR)
834 if (dump_enabled_p ())
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: operation");
838 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
839 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
840 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
842 /* Fatal mismatch. */
843 matches[0] = false;
844 return false;
847 if (rhs_code == COND_EXPR)
849 tree cond_expr = gimple_assign_rhs1 (stmt);
851 if (i == 0)
852 first_cond_code = TREE_CODE (cond_expr);
853 else if (first_cond_code != TREE_CODE (cond_expr))
855 if (dump_enabled_p ())
857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
858 "Build SLP failed: different"
859 " operation");
860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
861 stmt, 0);
862 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
864 /* Mismatch. */
865 continue;
870 matches[i] = true;
873 for (i = 0; i < group_size; ++i)
874 if (!matches[i])
875 return false;
877 return true;
880 /* Recursively build an SLP tree starting from NODE.
881 Fail (and return a value not equal to zero) if def-stmts are not
882 isomorphic, require data permutation or are of unsupported types of
883 operation. Otherwise, return 0.
884 The value returned is the depth in the SLP tree where a mismatch
885 was found. */
887 static bool
888 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
889 slp_tree *node, unsigned int group_size,
890 unsigned int *max_nunits,
891 vec<slp_tree> *loads,
892 unsigned int vectorization_factor,
893 bool *matches, unsigned *npermutes, unsigned *tree_size,
894 unsigned max_tree_size)
896 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
897 gimple stmt;
899 if (!matches)
900 matches = XALLOCAVEC (bool, group_size);
901 if (!npermutes)
902 npermutes = &this_npermutes;
904 matches[0] = false;
906 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
907 if (is_gimple_call (stmt))
908 nops = gimple_call_num_args (stmt);
909 else if (is_gimple_assign (stmt))
911 nops = gimple_num_ops (stmt) - 1;
912 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
913 nops++;
915 else
916 return false;
918 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
919 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
920 max_nunits, vectorization_factor, matches))
921 return false;
923 /* If the SLP node is a load, terminate the recursion. */
924 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
925 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
927 loads->safe_push (*node);
928 return true;
931 /* Get at the operands, verifying they are compatible. */
932 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
933 slp_oprnd_info oprnd_info;
934 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
936 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
937 stmt, (i == 0), &oprnds_info))
939 case 0:
940 break;
941 case -1:
942 matches[0] = false;
943 vect_free_oprnd_info (oprnds_info);
944 return false;
945 case 1:
946 matches[i] = false;
947 break;
950 for (i = 0; i < group_size; ++i)
951 if (!matches[i])
953 vect_free_oprnd_info (oprnds_info);
954 return false;
957 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
959 /* Create SLP_TREE nodes for the definition node/s. */
960 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
962 slp_tree child;
963 unsigned old_nloads = loads->length ();
964 unsigned old_max_nunits = *max_nunits;
966 if (oprnd_info->first_dt != vect_internal_def)
967 continue;
969 if (++this_tree_size > max_tree_size)
971 vect_free_oprnd_info (oprnds_info);
972 return false;
975 child = vect_create_new_slp_node (oprnd_info->def_stmts);
976 if (!child)
978 vect_free_oprnd_info (oprnds_info);
979 return false;
982 bool *matches = XALLOCAVEC (bool, group_size);
983 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
984 group_size, max_nunits, loads,
985 vectorization_factor, matches,
986 npermutes, &this_tree_size, max_tree_size))
988 oprnd_info->def_stmts = vNULL;
989 SLP_TREE_CHILDREN (*node).quick_push (child);
990 continue;
993 /* If the SLP build for operand zero failed and operand zero
994 and one can be commutated try that for the scalar stmts
995 that failed the match. */
996 if (i == 0
997 /* A first scalar stmt mismatch signals a fatal mismatch. */
998 && matches[0]
999 /* ??? For COND_EXPRs we can swap the comparison operands
1000 as well as the arms under some constraints. */
1001 && nops == 2
1002 && oprnds_info[1]->first_dt == vect_internal_def
1003 && is_gimple_assign (stmt)
1004 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1005 /* Do so only if the number of not successful permutes was nor more
1006 than a cut-ff as re-trying the recursive match on
1007 possibly each level of the tree would expose exponential
1008 behavior. */
1009 && *npermutes < 4)
1011 /* Roll back. */
1012 *max_nunits = old_max_nunits;
1013 loads->truncate (old_nloads);
1014 /* Swap mismatched definition stmts. */
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "Re-trying with swapped operands of stmts ");
1017 for (unsigned j = 0; j < group_size; ++j)
1018 if (!matches[j])
1020 gimple tem = oprnds_info[0]->def_stmts[j];
1021 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1022 oprnds_info[1]->def_stmts[j] = tem;
1023 dump_printf (MSG_NOTE, "%d ", j);
1025 dump_printf (MSG_NOTE, "\n");
1026 /* And try again ... */
1027 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1028 group_size, max_nunits, loads,
1029 vectorization_factor,
1030 matches, npermutes, &this_tree_size,
1031 max_tree_size))
1033 oprnd_info->def_stmts = vNULL;
1034 SLP_TREE_CHILDREN (*node).quick_push (child);
1035 continue;
1038 ++*npermutes;
1041 oprnd_info->def_stmts = vNULL;
1042 vect_free_slp_tree (child);
1043 vect_free_oprnd_info (oprnds_info);
1044 return false;
1047 if (tree_size)
1048 *tree_size += this_tree_size;
1050 vect_free_oprnd_info (oprnds_info);
1051 return true;
1054 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1056 static void
1057 vect_print_slp_tree (int dump_kind, slp_tree node)
1059 int i;
1060 gimple stmt;
1061 slp_tree child;
1063 if (!node)
1064 return;
1066 dump_printf (dump_kind, "node ");
1067 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1069 dump_printf (dump_kind, "\n\tstmt %d ", i);
1070 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1072 dump_printf (dump_kind, "\n");
1074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1075 vect_print_slp_tree (dump_kind, child);
1079 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1080 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1081 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1082 stmts in NODE are to be marked. */
1084 static void
1085 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1087 int i;
1088 gimple stmt;
1089 slp_tree child;
1091 if (!node)
1092 return;
1094 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1095 if (j < 0 || i == j)
1096 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1098 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1099 vect_mark_slp_stmts (child, mark, j);
1103 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1105 static void
1106 vect_mark_slp_stmts_relevant (slp_tree node)
1108 int i;
1109 gimple stmt;
1110 stmt_vec_info stmt_info;
1111 slp_tree child;
1113 if (!node)
1114 return;
1116 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1118 stmt_info = vinfo_for_stmt (stmt);
1119 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1120 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1121 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1124 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1125 vect_mark_slp_stmts_relevant (child);
1129 /* Rearrange the statements of NODE according to PERMUTATION. */
1131 static void
1132 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1133 vec<unsigned> permutation)
1135 gimple stmt;
1136 vec<gimple> tmp_stmts;
1137 unsigned int i;
1138 slp_tree child;
1140 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1141 vect_slp_rearrange_stmts (child, group_size, permutation);
1143 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1144 tmp_stmts.create (group_size);
1145 tmp_stmts.quick_grow_cleared (group_size);
1147 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1148 tmp_stmts[permutation[i]] = stmt;
1150 SLP_TREE_SCALAR_STMTS (node).release ();
1151 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1155 /* Check if the required load permutations in the SLP instance
1156 SLP_INSTN are supported. */
1158 static bool
1159 vect_supported_load_permutation_p (slp_instance slp_instn)
1161 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1162 unsigned int i, j, k, next;
1163 sbitmap load_index;
1164 slp_tree node;
1165 gimple stmt, load, next_load, first_load;
1166 struct data_reference *dr;
1168 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1171 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1172 if (node->load_permutation.exists ())
1173 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1174 dump_printf (MSG_NOTE, "%d ", next);
1175 else
1176 for (k = 0; k < group_size; ++k)
1177 dump_printf (MSG_NOTE, "%d ", k);
1178 dump_printf (MSG_NOTE, "\n");
1181 /* In case of reduction every load permutation is allowed, since the order
1182 of the reduction statements is not important (as opposed to the case of
1183 grouped stores). The only condition we need to check is that all the
1184 load nodes are of the same size and have the same permutation (and then
1185 rearrange all the nodes of the SLP instance according to this
1186 permutation). */
1188 /* Check that all the load nodes are of the same size. */
1189 /* ??? Can't we assert this? */
1190 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1191 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1192 return false;
1194 node = SLP_INSTANCE_TREE (slp_instn);
1195 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1197 /* Reduction (there are no data-refs in the root).
1198 In reduction chain the order of the loads is important. */
1199 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1200 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1202 slp_tree load;
1203 unsigned int lidx;
1205 /* Compare all the permutation sequences to the first one. We know
1206 that at least one load is permuted. */
1207 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1208 if (!node->load_permutation.exists ())
1209 return false;
1210 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1212 if (!load->load_permutation.exists ())
1213 return false;
1214 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1215 if (lidx != node->load_permutation[j])
1216 return false;
1219 /* Check that the loads in the first sequence are different and there
1220 are no gaps between them. */
1221 load_index = sbitmap_alloc (group_size);
1222 bitmap_clear (load_index);
1223 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1225 if (bitmap_bit_p (load_index, lidx))
1227 sbitmap_free (load_index);
1228 return false;
1230 bitmap_set_bit (load_index, lidx);
1232 for (i = 0; i < group_size; i++)
1233 if (!bitmap_bit_p (load_index, i))
1235 sbitmap_free (load_index);
1236 return false;
1238 sbitmap_free (load_index);
1240 /* This permutation is valid for reduction. Since the order of the
1241 statements in the nodes is not important unless they are memory
1242 accesses, we can rearrange the statements in all the nodes
1243 according to the order of the loads. */
1244 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1245 node->load_permutation);
1247 /* We are done, no actual permutations need to be generated. */
1248 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1249 SLP_TREE_LOAD_PERMUTATION (node).release ();
1250 return true;
1253 /* In basic block vectorization we allow any subchain of an interleaving
1254 chain.
1255 FORNOW: not supported in loop SLP because of realignment compications. */
1256 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1258 /* Check that for every node in the instance the loads
1259 form a subchain. */
1260 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1262 next_load = NULL;
1263 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1265 if (j != 0 && next_load != load)
1266 return false;
1267 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1271 /* Check that the alignment of the first load in every subchain, i.e.,
1272 the first statement in every load node, is supported.
1273 ??? This belongs in alignment checking. */
1274 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1276 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1277 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1279 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1280 if (vect_supportable_dr_alignment (dr, false)
1281 == dr_unaligned_unsupported)
1283 if (dump_enabled_p ())
1285 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1286 vect_location,
1287 "unsupported unaligned load ");
1288 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1289 first_load, 0);
1290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1292 return false;
1297 /* We are done, no actual permutations need to be generated. */
1298 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1299 SLP_TREE_LOAD_PERMUTATION (node).release ();
1300 return true;
1303 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1304 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1305 well (unless it's reduction). */
1306 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1307 return false;
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1309 if (!node->load_permutation.exists ())
1310 return false;
1312 load_index = sbitmap_alloc (group_size);
1313 bitmap_clear (load_index);
1314 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1316 unsigned int lidx = node->load_permutation[0];
1317 if (bitmap_bit_p (load_index, lidx))
1319 sbitmap_free (load_index);
1320 return false;
1322 bitmap_set_bit (load_index, lidx);
1323 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1324 if (k != lidx)
1326 sbitmap_free (load_index);
1327 return false;
1330 for (i = 0; i < group_size; i++)
1331 if (!bitmap_bit_p (load_index, i))
1333 sbitmap_free (load_index);
1334 return false;
1336 sbitmap_free (load_index);
1338 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1339 if (node->load_permutation.exists ()
1340 && !vect_transform_slp_perm_load
1341 (node, vNULL, NULL,
1342 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1343 return false;
1344 return true;
1348 /* Find the first load in the loop that belongs to INSTANCE.
1349 When loads are in several SLP nodes, there can be a case in which the first
1350 load does not appear in the first SLP node to be transformed, causing
1351 incorrect order of statements. Since we generate all the loads together,
1352 they must be inserted before the first load of the SLP instance and not
1353 before the first load of the first node of the instance. */
1355 static gimple
1356 vect_find_first_load_in_slp_instance (slp_instance instance)
1358 int i, j;
1359 slp_tree load_node;
1360 gimple first_load = NULL, load;
1362 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1363 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1364 first_load = get_earlier_stmt (load, first_load);
1366 return first_load;
1370 /* Find the last store in SLP INSTANCE. */
1372 static gimple
1373 vect_find_last_store_in_slp_instance (slp_instance instance)
1375 int i;
1376 slp_tree node;
1377 gimple last_store = NULL, store;
1379 node = SLP_INSTANCE_TREE (instance);
1380 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1381 last_store = get_later_stmt (store, last_store);
1383 return last_store;
1386 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1388 static void
1389 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1390 slp_instance instance, slp_tree node,
1391 stmt_vector_for_cost *prologue_cost_vec,
1392 unsigned ncopies_for_cost)
1394 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1396 unsigned i;
1397 slp_tree child;
1398 gimple stmt, s;
1399 stmt_vec_info stmt_info;
1400 tree lhs;
1401 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1403 /* Recurse down the SLP tree. */
1404 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1405 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1406 instance, child, prologue_cost_vec,
1407 ncopies_for_cost);
1409 /* Look at the first scalar stmt to determine the cost. */
1410 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1411 stmt_info = vinfo_for_stmt (stmt);
1412 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1414 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1415 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1416 vect_uninitialized_def,
1417 node, prologue_cost_vec, body_cost_vec);
1418 else
1420 int i;
1421 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1422 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1423 node, prologue_cost_vec, body_cost_vec);
1424 /* If the load is permuted record the cost for the permutation.
1425 ??? Loads from multiple chains are let through here only
1426 for a single special case involving complex numbers where
1427 in the end no permutation is necessary. */
1428 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1429 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1430 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1431 && vect_get_place_in_interleaving_chain
1432 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1434 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1435 stmt_info, 0, vect_body);
1436 break;
1440 else
1441 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1442 stmt_info, 0, vect_body);
1444 /* Scan operands and account for prologue cost of constants/externals.
1445 ??? This over-estimates cost for multiple uses and should be
1446 re-engineered. */
1447 lhs = gimple_get_lhs (stmt);
1448 for (i = 0; i < gimple_num_ops (stmt); ++i)
1450 tree def, op = gimple_op (stmt, i);
1451 gimple def_stmt;
1452 enum vect_def_type dt;
1453 if (!op || op == lhs)
1454 continue;
1455 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1456 &def_stmt, &def, &dt)
1457 && (dt == vect_constant_def || dt == vect_external_def))
1458 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1459 stmt_info, 0, vect_prologue);
1463 /* Compute the cost for the SLP instance INSTANCE. */
1465 static void
1466 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1467 slp_instance instance, unsigned nunits)
1469 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1470 unsigned ncopies_for_cost;
1471 stmt_info_for_cost *si;
1472 unsigned i;
1474 /* Calculate the number of vector stmts to create based on the unrolling
1475 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1476 GROUP_SIZE / NUNITS otherwise. */
1477 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1478 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1480 prologue_cost_vec.create (10);
1481 body_cost_vec.create (10);
1482 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1483 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1484 instance, SLP_INSTANCE_TREE (instance),
1485 &prologue_cost_vec, ncopies_for_cost);
1487 /* Record the prologue costs, which were delayed until we were
1488 sure that SLP was successful. Unlike the body costs, we know
1489 the final values now regardless of the loop vectorization factor. */
1490 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1491 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1492 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1494 struct _stmt_vec_info *stmt_info
1495 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1496 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1497 si->misalign, vect_prologue);
1500 prologue_cost_vec.release ();
1503 /* Analyze an SLP instance starting from a group of grouped stores. Call
1504 vect_build_slp_tree to build a tree of packed stmts if possible.
1505 Return FALSE if it's impossible to SLP any stmt in the loop. */
1507 static bool
1508 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1509 gimple stmt, unsigned max_tree_size)
1511 slp_instance new_instance;
1512 slp_tree node;
1513 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1514 unsigned int unrolling_factor = 1, nunits;
1515 tree vectype, scalar_type = NULL_TREE;
1516 gimple next;
1517 unsigned int vectorization_factor = 0;
1518 int i;
1519 unsigned int max_nunits = 0;
1520 vec<slp_tree> loads;
1521 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1522 vec<gimple> scalar_stmts;
1524 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1526 if (dr)
1528 scalar_type = TREE_TYPE (DR_REF (dr));
1529 vectype = get_vectype_for_scalar_type (scalar_type);
1531 else
1533 gcc_assert (loop_vinfo);
1534 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1537 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1539 else
1541 gcc_assert (loop_vinfo);
1542 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1543 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1546 if (!vectype)
1548 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1551 "Build SLP failed: unsupported data-type ");
1552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1553 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1556 return false;
1559 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1560 if (loop_vinfo)
1561 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1562 else
1563 vectorization_factor = nunits;
1565 /* Calculate the unrolling factor. */
1566 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1567 if (unrolling_factor != 1 && !loop_vinfo)
1569 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1571 "Build SLP failed: unrolling required in basic"
1572 " block SLP\n");
1574 return false;
1577 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1578 scalar_stmts.create (group_size);
1579 next = stmt;
1580 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1582 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1583 while (next)
1585 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1586 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1587 scalar_stmts.safe_push (
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1589 else
1590 scalar_stmts.safe_push (next);
1591 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1594 else
1596 /* Collect reduction statements. */
1597 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1598 for (i = 0; reductions.iterate (i, &next); i++)
1599 scalar_stmts.safe_push (next);
1602 node = vect_create_new_slp_node (scalar_stmts);
1604 loads.create (group_size);
1606 /* Build the tree for the SLP instance. */
1607 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1608 &max_nunits, &loads,
1609 vectorization_factor, NULL, NULL, NULL,
1610 max_tree_size))
1612 /* Calculate the unrolling factor based on the smallest type. */
1613 if (max_nunits > nunits)
1614 unrolling_factor = least_common_multiple (max_nunits, group_size)
1615 / group_size;
1617 if (unrolling_factor != 1 && !loop_vinfo)
1619 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1621 "Build SLP failed: unrolling required in basic"
1622 " block SLP\n");
1623 vect_free_slp_tree (node);
1624 loads.release ();
1625 return false;
1628 /* Create a new SLP instance. */
1629 new_instance = XNEW (struct _slp_instance);
1630 SLP_INSTANCE_TREE (new_instance) = node;
1631 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1632 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1633 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1634 SLP_INSTANCE_LOADS (new_instance) = loads;
1635 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1637 /* Compute the load permutation. */
1638 slp_tree load_node;
1639 bool loads_permuted = false;
1640 FOR_EACH_VEC_ELT (loads, i, load_node)
1642 vec<unsigned> load_permutation;
1643 int j;
1644 gimple load, first_stmt;
1645 bool this_load_permuted = false;
1646 load_permutation.create (group_size);
1647 first_stmt = GROUP_FIRST_ELEMENT
1648 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1649 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1651 int load_place
1652 = vect_get_place_in_interleaving_chain (load, first_stmt);
1653 gcc_assert (load_place != -1);
1654 if (load_place != j)
1655 this_load_permuted = true;
1656 load_permutation.safe_push (load_place);
1658 if (!this_load_permuted)
1660 load_permutation.release ();
1661 continue;
1663 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1664 loads_permuted = true;
1667 if (loads_permuted)
1669 if (!vect_supported_load_permutation_p (new_instance))
1671 if (dump_enabled_p ())
1673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1674 "Build SLP failed: unsupported load "
1675 "permutation ");
1676 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1677 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1679 vect_free_slp_instance (new_instance);
1680 return false;
1683 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1684 = vect_find_first_load_in_slp_instance (new_instance);
1687 /* Compute the costs of this SLP instance. */
1688 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1689 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1691 if (loop_vinfo)
1692 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1693 else
1694 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1696 if (dump_enabled_p ())
1697 vect_print_slp_tree (MSG_NOTE, node);
1699 return true;
1702 /* Failed to SLP. */
1703 /* Free the allocated memory. */
1704 vect_free_slp_tree (node);
1705 loads.release ();
1707 return false;
1711 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1712 trees of packed scalar stmts if SLP is possible. */
1714 bool
1715 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1716 unsigned max_tree_size)
1718 unsigned int i;
1719 vec<gimple> grouped_stores;
1720 vec<gimple> reductions = vNULL;
1721 vec<gimple> reduc_chains = vNULL;
1722 gimple first_element;
1723 bool ok = false;
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1728 if (loop_vinfo)
1730 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1731 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1732 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1734 else
1735 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1737 /* Find SLP sequences starting from groups of grouped stores. */
1738 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1739 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1740 max_tree_size))
1741 ok = true;
1743 if (bb_vinfo && !ok)
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1747 "Failed to SLP the basic block.\n");
1749 return false;
1752 if (loop_vinfo
1753 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1755 /* Find SLP sequences starting from reduction chains. */
1756 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1757 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1758 max_tree_size))
1759 ok = true;
1760 else
1761 return false;
1763 /* Don't try to vectorize SLP reductions if reduction chain was
1764 detected. */
1765 return ok;
1768 /* Find SLP sequences starting from groups of reductions. */
1769 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1770 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1771 max_tree_size))
1772 ok = true;
1774 return true;
1778 /* For each possible SLP instance decide whether to SLP it and calculate overall
1779 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1780 least one instance. */
1782 bool
1783 vect_make_slp_decision (loop_vec_info loop_vinfo)
1785 unsigned int i, unrolling_factor = 1;
1786 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1787 slp_instance instance;
1788 int decided_to_slp = 0;
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1792 "\n");
1794 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1796 /* FORNOW: SLP if you can. */
1797 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1798 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1800 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1801 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1802 loop-based vectorization. Such stmts will be marked as HYBRID. */
1803 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1804 decided_to_slp++;
1807 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1809 if (decided_to_slp && dump_enabled_p ())
1810 dump_printf_loc (MSG_NOTE, vect_location,
1811 "Decided to SLP %d instances. Unrolling factor %d\n",
1812 decided_to_slp, unrolling_factor);
1814 return (decided_to_slp > 0);
1818 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1819 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1821 static void
1822 vect_detect_hybrid_slp_stmts (slp_tree node)
1824 int i;
1825 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1826 gimple stmt = stmts[0];
1827 imm_use_iterator imm_iter;
1828 gimple use_stmt;
1829 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1830 slp_tree child;
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 struct loop *loop = NULL;
1833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1834 basic_block bb = NULL;
1836 if (!node)
1837 return;
1839 if (loop_vinfo)
1840 loop = LOOP_VINFO_LOOP (loop_vinfo);
1841 else
1842 bb = BB_VINFO_BB (bb_vinfo);
1844 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1845 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1846 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1847 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1848 if (gimple_bb (use_stmt)
1849 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1850 || bb == gimple_bb (use_stmt))
1851 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1852 && !STMT_SLP_TYPE (stmt_vinfo)
1853 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1854 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))
1855 || (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
1856 && STMT_VINFO_RELATED_STMT (stmt_vinfo)
1857 && !STMT_SLP_TYPE (vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo)))))
1858 && !(gimple_code (use_stmt) == GIMPLE_PHI
1859 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1860 == vect_reduction_def))
1861 vect_mark_slp_stmts (node, hybrid, i);
1863 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1864 vect_detect_hybrid_slp_stmts (child);
1868 /* Find stmts that must be both vectorized and SLPed. */
1870 void
1871 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1873 unsigned int i;
1874 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1875 slp_instance instance;
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1879 "\n");
1881 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1882 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1886 /* Create and initialize a new bb_vec_info struct for BB, as well as
1887 stmt_vec_info structs for all the stmts in it. */
1889 static bb_vec_info
1890 new_bb_vec_info (basic_block bb)
1892 bb_vec_info res = NULL;
1893 gimple_stmt_iterator gsi;
1895 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1896 BB_VINFO_BB (res) = bb;
1898 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1900 gimple stmt = gsi_stmt (gsi);
1901 gimple_set_uid (stmt, 0);
1902 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1905 BB_VINFO_GROUPED_STORES (res).create (10);
1906 BB_VINFO_SLP_INSTANCES (res).create (2);
1907 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1909 bb->aux = res;
1910 return res;
1914 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1915 stmts in the basic block. */
1917 static void
1918 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1920 vec<slp_instance> slp_instances;
1921 slp_instance instance;
1922 basic_block bb;
1923 gimple_stmt_iterator si;
1924 unsigned i;
1926 if (!bb_vinfo)
1927 return;
1929 bb = BB_VINFO_BB (bb_vinfo);
1931 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1933 gimple stmt = gsi_stmt (si);
1934 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1936 if (stmt_info)
1937 /* Free stmt_vec_info. */
1938 free_stmt_vec_info (stmt);
1941 vect_destroy_datarefs (NULL, bb_vinfo);
1942 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1943 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1944 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1945 FOR_EACH_VEC_ELT (slp_instances, i, instance)
1946 vect_free_slp_instance (instance);
1947 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1948 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1949 free (bb_vinfo);
1950 bb->aux = NULL;
1954 /* Analyze statements contained in SLP tree node after recursively analyzing
1955 the subtree. Return TRUE if the operations are supported. */
1957 static bool
1958 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1960 bool dummy;
1961 int i;
1962 gimple stmt;
1963 slp_tree child;
1965 if (!node)
1966 return true;
1968 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1969 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1970 return false;
1972 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1975 gcc_assert (stmt_info);
1976 gcc_assert (PURE_SLP_STMT (stmt_info));
1978 if (!vect_analyze_stmt (stmt, &dummy, node))
1979 return false;
1982 return true;
1986 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1987 operations are supported. */
1989 static bool
1990 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1992 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1993 slp_instance instance;
1994 int i;
1996 for (i = 0; slp_instances.iterate (i, &instance); )
1998 if (!vect_slp_analyze_node_operations (bb_vinfo,
1999 SLP_INSTANCE_TREE (instance)))
2001 vect_free_slp_instance (instance);
2002 slp_instances.ordered_remove (i);
2004 else
2005 i++;
2008 if (!slp_instances.length ())
2009 return false;
2011 return true;
2015 /* Compute the scalar cost of the SLP node NODE and its children
2016 and return it. Do not account defs that are marked in LIFE and
2017 update LIFE according to uses of NODE. */
2019 static unsigned
2020 vect_bb_slp_scalar_cost (basic_block bb,
2021 slp_tree node, vec<bool, va_heap> *life)
2023 unsigned scalar_cost = 0;
2024 unsigned i;
2025 gimple stmt;
2026 slp_tree child;
2028 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2030 unsigned stmt_cost;
2031 ssa_op_iter op_iter;
2032 def_operand_p def_p;
2033 stmt_vec_info stmt_info;
2035 if ((*life)[i])
2036 continue;
2038 /* If there is a non-vectorized use of the defs then the scalar
2039 stmt is kept live in which case we do not account it or any
2040 required defs in the SLP children in the scalar cost. This
2041 way we make the vectorization more costly when compared to
2042 the scalar cost. */
2043 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2045 imm_use_iterator use_iter;
2046 gimple use_stmt;
2047 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2048 if (!is_gimple_debug (use_stmt)
2049 && (gimple_code (use_stmt) == GIMPLE_PHI
2050 || gimple_bb (use_stmt) != bb
2051 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2053 (*life)[i] = true;
2054 BREAK_FROM_IMM_USE_STMT (use_iter);
2057 if ((*life)[i])
2058 continue;
2060 stmt_info = vinfo_for_stmt (stmt);
2061 if (STMT_VINFO_DATA_REF (stmt_info))
2063 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2064 stmt_cost = vect_get_stmt_cost (scalar_load);
2065 else
2066 stmt_cost = vect_get_stmt_cost (scalar_store);
2068 else
2069 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2071 scalar_cost += stmt_cost;
2074 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2075 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2077 return scalar_cost;
2080 /* Check if vectorization of the basic block is profitable. */
2082 static bool
2083 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2085 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2086 slp_instance instance;
2087 int i, j;
2088 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2089 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2090 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2091 stmt_vec_info stmt_info = NULL;
2092 stmt_vector_for_cost body_cost_vec;
2093 stmt_info_for_cost *ci;
2095 /* Calculate vector costs. */
2096 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2098 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2100 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2102 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2103 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2104 stmt_info, ci->misalign, vect_body);
2108 /* Calculate scalar cost. */
2109 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2111 auto_vec<bool, 20> life;
2112 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2113 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2114 SLP_INSTANCE_TREE (instance),
2115 &life);
2118 /* Complete the target-specific cost calculation. */
2119 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2120 &vec_inside_cost, &vec_epilogue_cost);
2122 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2124 if (dump_enabled_p ())
2126 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2127 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2128 vec_inside_cost);
2129 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2130 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2131 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2134 /* Vectorization is profitable if its cost is less than the cost of scalar
2135 version. */
2136 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2137 return false;
2139 return true;
2142 /* Check if the basic block can be vectorized. */
2144 static bb_vec_info
2145 vect_slp_analyze_bb_1 (basic_block bb)
2147 bb_vec_info bb_vinfo;
2148 vec<slp_instance> slp_instances;
2149 slp_instance instance;
2150 int i;
2151 int min_vf = 2;
2152 unsigned n_stmts = 0;
2154 bb_vinfo = new_bb_vec_info (bb);
2155 if (!bb_vinfo)
2156 return NULL;
2158 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162 "not vectorized: unhandled data-ref in basic "
2163 "block.\n");
2165 destroy_bb_vec_info (bb_vinfo);
2166 return NULL;
2169 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2171 if (dump_enabled_p ())
2172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2173 "not vectorized: not enough data-refs in "
2174 "basic block.\n");
2176 destroy_bb_vec_info (bb_vinfo);
2177 return NULL;
2180 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "not vectorized: unhandled data access in "
2185 "basic block.\n");
2187 destroy_bb_vec_info (bb_vinfo);
2188 return NULL;
2191 vect_pattern_recog (NULL, bb_vinfo);
2193 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2195 if (dump_enabled_p ())
2196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2197 "not vectorized: bad data alignment in basic "
2198 "block.\n");
2200 destroy_bb_vec_info (bb_vinfo);
2201 return NULL;
2204 /* Check the SLP opportunities in the basic block, analyze and build SLP
2205 trees. */
2206 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2208 if (dump_enabled_p ())
2209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2210 "not vectorized: failed to find SLP opportunities "
2211 "in basic block.\n");
2213 destroy_bb_vec_info (bb_vinfo);
2214 return NULL;
2217 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2219 /* Mark all the statements that we want to vectorize as pure SLP and
2220 relevant. */
2221 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2223 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2224 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2227 /* Mark all the statements that we do not want to vectorize. */
2228 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2229 !gsi_end_p (gsi); gsi_next (&gsi))
2231 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2232 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2233 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2236 /* Analyze dependences. At this point all stmts not participating in
2237 vectorization have to be marked. Dependence analysis assumes
2238 that we either vectorize all SLP instances or none at all. */
2239 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "not vectorized: unhandled data dependence "
2244 "in basic block.\n");
2246 destroy_bb_vec_info (bb_vinfo);
2247 return NULL;
2250 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "not vectorized: unsupported alignment in basic "
2255 "block.\n");
2256 destroy_bb_vec_info (bb_vinfo);
2257 return NULL;
2260 if (!vect_slp_analyze_operations (bb_vinfo))
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 "not vectorized: bad operation in basic block.\n");
2266 destroy_bb_vec_info (bb_vinfo);
2267 return NULL;
2270 /* Cost model: check if the vectorization is worthwhile. */
2271 if (!unlimited_cost_model (NULL)
2272 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "not vectorized: vectorization is not "
2277 "profitable.\n");
2279 destroy_bb_vec_info (bb_vinfo);
2280 return NULL;
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_NOTE, vect_location,
2285 "Basic block will be vectorized using SLP\n");
2287 return bb_vinfo;
2291 bb_vec_info
2292 vect_slp_analyze_bb (basic_block bb)
2294 bb_vec_info bb_vinfo;
2295 int insns = 0;
2296 gimple_stmt_iterator gsi;
2297 unsigned int vector_sizes;
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2302 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2304 gimple stmt = gsi_stmt (gsi);
2305 if (!is_gimple_debug (stmt)
2306 && !gimple_nop_p (stmt)
2307 && gimple_code (stmt) != GIMPLE_LABEL)
2308 insns++;
2311 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "not vectorized: too many instructions in "
2316 "basic block.\n");
2318 return NULL;
2321 /* Autodetect first vector size we try. */
2322 current_vector_size = 0;
2323 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2325 while (1)
2327 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2328 if (bb_vinfo)
2329 return bb_vinfo;
2331 destroy_bb_vec_info (bb_vinfo);
2333 vector_sizes &= ~current_vector_size;
2334 if (vector_sizes == 0
2335 || current_vector_size == 0)
2336 return NULL;
2338 /* Try the next biggest vector size. */
2339 current_vector_size = 1 << floor_log2 (vector_sizes);
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE, vect_location,
2342 "***** Re-trying analysis with "
2343 "vector size %d\n", current_vector_size);
2348 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2349 the number of created vector stmts depends on the unrolling factor).
2350 However, the actual number of vector stmts for every SLP node depends on
2351 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2352 should be updated. In this function we assume that the inside costs
2353 calculated in vect_model_xxx_cost are linear in ncopies. */
2355 void
2356 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2358 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2359 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2360 slp_instance instance;
2361 stmt_vector_for_cost body_cost_vec;
2362 stmt_info_for_cost *si;
2363 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location,
2367 "=== vect_update_slp_costs_according_to_vf ===\n");
2369 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2371 /* We assume that costs are linear in ncopies. */
2372 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2374 /* Record the instance's instructions in the target cost model.
2375 This was delayed until here because the count of instructions
2376 isn't known beforehand. */
2377 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2379 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2380 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2381 vinfo_for_stmt (si->stmt), si->misalign,
2382 vect_body);
2387 /* For constant and loop invariant defs of SLP_NODE this function returns
2388 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2389 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2390 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2391 REDUC_INDEX is the index of the reduction operand in the statements, unless
2392 it is -1. */
2394 static void
2395 vect_get_constant_vectors (tree op, slp_tree slp_node,
2396 vec<tree> *vec_oprnds,
2397 unsigned int op_num, unsigned int number_of_vectors,
2398 int reduc_index)
2400 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2401 gimple stmt = stmts[0];
2402 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2403 unsigned nunits;
2404 tree vec_cst;
2405 tree *elts;
2406 unsigned j, number_of_places_left_in_vector;
2407 tree vector_type;
2408 tree vop;
2409 int group_size = stmts.length ();
2410 unsigned int vec_num, i;
2411 unsigned number_of_copies = 1;
2412 vec<tree> voprnds;
2413 voprnds.create (number_of_vectors);
2414 bool constant_p, is_store;
2415 tree neutral_op = NULL;
2416 enum tree_code code = gimple_expr_code (stmt);
2417 gimple def_stmt;
2418 struct loop *loop;
2419 gimple_seq ctor_seq = NULL;
2421 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2422 && reduc_index != -1)
2424 op_num = reduc_index - 1;
2425 op = gimple_op (stmt, reduc_index);
2426 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2427 we need either neutral operands or the original operands. See
2428 get_initial_def_for_reduction() for details. */
2429 switch (code)
2431 case WIDEN_SUM_EXPR:
2432 case DOT_PROD_EXPR:
2433 case PLUS_EXPR:
2434 case MINUS_EXPR:
2435 case BIT_IOR_EXPR:
2436 case BIT_XOR_EXPR:
2437 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2438 neutral_op = build_real (TREE_TYPE (op), dconst0);
2439 else
2440 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2442 break;
2444 case MULT_EXPR:
2445 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2446 neutral_op = build_real (TREE_TYPE (op), dconst1);
2447 else
2448 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2450 break;
2452 case BIT_AND_EXPR:
2453 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2454 break;
2456 /* For MIN/MAX we don't have an easy neutral operand but
2457 the initial values can be used fine here. Only for
2458 a reduction chain we have to force a neutral element. */
2459 case MAX_EXPR:
2460 case MIN_EXPR:
2461 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2462 neutral_op = NULL;
2463 else
2465 def_stmt = SSA_NAME_DEF_STMT (op);
2466 loop = (gimple_bb (stmt))->loop_father;
2467 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2468 loop_preheader_edge (loop));
2470 break;
2472 default:
2473 neutral_op = NULL;
2477 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2479 is_store = true;
2480 op = gimple_assign_rhs1 (stmt);
2482 else
2483 is_store = false;
2485 gcc_assert (op);
2487 if (CONSTANT_CLASS_P (op))
2488 constant_p = true;
2489 else
2490 constant_p = false;
2492 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2493 gcc_assert (vector_type);
2494 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2496 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2497 created vectors. It is greater than 1 if unrolling is performed.
2499 For example, we have two scalar operands, s1 and s2 (e.g., group of
2500 strided accesses of size two), while NUNITS is four (i.e., four scalars
2501 of this type can be packed in a vector). The output vector will contain
2502 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2503 will be 2).
2505 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2506 containing the operands.
2508 For example, NUNITS is four as before, and the group size is 8
2509 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2510 {s5, s6, s7, s8}. */
2512 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2514 number_of_places_left_in_vector = nunits;
2515 elts = XALLOCAVEC (tree, nunits);
2516 for (j = 0; j < number_of_copies; j++)
2518 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2520 if (is_store)
2521 op = gimple_assign_rhs1 (stmt);
2522 else
2524 switch (code)
2526 case COND_EXPR:
2527 if (op_num == 0 || op_num == 1)
2529 tree cond = gimple_assign_rhs1 (stmt);
2530 op = TREE_OPERAND (cond, op_num);
2532 else
2534 if (op_num == 2)
2535 op = gimple_assign_rhs2 (stmt);
2536 else
2537 op = gimple_assign_rhs3 (stmt);
2539 break;
2541 case CALL_EXPR:
2542 op = gimple_call_arg (stmt, op_num);
2543 break;
2545 case LSHIFT_EXPR:
2546 case RSHIFT_EXPR:
2547 case LROTATE_EXPR:
2548 case RROTATE_EXPR:
2549 op = gimple_op (stmt, op_num + 1);
2550 /* Unlike the other binary operators, shifts/rotates have
2551 the shift count being int, instead of the same type as
2552 the lhs, so make sure the scalar is the right type if
2553 we are dealing with vectors of
2554 long long/long/short/char. */
2555 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2556 op = fold_convert (TREE_TYPE (vector_type), op);
2557 break;
2559 default:
2560 op = gimple_op (stmt, op_num + 1);
2561 break;
2565 if (reduc_index != -1)
2567 loop = (gimple_bb (stmt))->loop_father;
2568 def_stmt = SSA_NAME_DEF_STMT (op);
2570 gcc_assert (loop);
2572 /* Get the def before the loop. In reduction chain we have only
2573 one initial value. */
2574 if ((j != (number_of_copies - 1)
2575 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2576 && i != 0))
2577 && neutral_op)
2578 op = neutral_op;
2579 else
2580 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2581 loop_preheader_edge (loop));
2584 /* Create 'vect_ = {op0,op1,...,opn}'. */
2585 number_of_places_left_in_vector--;
2586 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2588 if (CONSTANT_CLASS_P (op))
2590 op = fold_unary (VIEW_CONVERT_EXPR,
2591 TREE_TYPE (vector_type), op);
2592 gcc_assert (op && CONSTANT_CLASS_P (op));
2594 else
2596 tree new_temp
2597 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2598 gimple init_stmt;
2599 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2600 op);
2601 init_stmt
2602 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2603 new_temp, op, NULL_TREE);
2604 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2605 op = new_temp;
2608 elts[number_of_places_left_in_vector] = op;
2609 if (!CONSTANT_CLASS_P (op))
2610 constant_p = false;
2612 if (number_of_places_left_in_vector == 0)
2614 number_of_places_left_in_vector = nunits;
2616 if (constant_p)
2617 vec_cst = build_vector (vector_type, elts);
2618 else
2620 vec<constructor_elt, va_gc> *v;
2621 unsigned k;
2622 vec_alloc (v, nunits);
2623 for (k = 0; k < nunits; ++k)
2624 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2625 vec_cst = build_constructor (vector_type, v);
2627 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2628 vector_type, NULL));
2629 if (ctor_seq != NULL)
2631 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2632 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2633 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2634 GSI_SAME_STMT);
2635 ctor_seq = NULL;
2641 /* Since the vectors are created in the reverse order, we should invert
2642 them. */
2643 vec_num = voprnds.length ();
2644 for (j = vec_num; j != 0; j--)
2646 vop = voprnds[j - 1];
2647 vec_oprnds->quick_push (vop);
2650 voprnds.release ();
2652 /* In case that VF is greater than the unrolling factor needed for the SLP
2653 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2654 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2655 to replicate the vectors. */
2656 while (number_of_vectors > vec_oprnds->length ())
2658 tree neutral_vec = NULL;
2660 if (neutral_op)
2662 if (!neutral_vec)
2663 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2665 vec_oprnds->quick_push (neutral_vec);
2667 else
2669 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2670 vec_oprnds->quick_push (vop);
2676 /* Get vectorized definitions from SLP_NODE that contains corresponding
2677 vectorized def-stmts. */
2679 static void
2680 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2682 tree vec_oprnd;
2683 gimple vec_def_stmt;
2684 unsigned int i;
2686 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2688 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2690 gcc_assert (vec_def_stmt);
2691 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2692 vec_oprnds->quick_push (vec_oprnd);
2697 /* Get vectorized definitions for SLP_NODE.
2698 If the scalar definitions are loop invariants or constants, collect them and
2699 call vect_get_constant_vectors() to create vector stmts.
2700 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2701 must be stored in the corresponding child of SLP_NODE, and we call
2702 vect_get_slp_vect_defs () to retrieve them. */
2704 void
2705 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2706 vec<vec<tree> > *vec_oprnds, int reduc_index)
2708 gimple first_stmt;
2709 int number_of_vects = 0, i;
2710 unsigned int child_index = 0;
2711 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2712 slp_tree child = NULL;
2713 vec<tree> vec_defs;
2714 tree oprnd;
2715 bool vectorized_defs;
2717 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2718 FOR_EACH_VEC_ELT (ops, i, oprnd)
2720 /* For each operand we check if it has vectorized definitions in a child
2721 node or we need to create them (for invariants and constants). We
2722 check if the LHS of the first stmt of the next child matches OPRND.
2723 If it does, we found the correct child. Otherwise, we call
2724 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2725 to check this child node for the next operand. */
2726 vectorized_defs = false;
2727 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2729 child = SLP_TREE_CHILDREN (slp_node)[child_index];
2731 /* We have to check both pattern and original def, if available. */
2732 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2733 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2735 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2736 || (related
2737 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2739 /* The number of vector defs is determined by the number of
2740 vector statements in the node from which we get those
2741 statements. */
2742 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2743 vectorized_defs = true;
2744 child_index++;
2748 if (!vectorized_defs)
2750 if (i == 0)
2752 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2753 /* Number of vector stmts was calculated according to LHS in
2754 vect_schedule_slp_instance (), fix it by replacing LHS with
2755 RHS, if necessary. See vect_get_smallest_scalar_type () for
2756 details. */
2757 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2758 &rhs_size_unit);
2759 if (rhs_size_unit != lhs_size_unit)
2761 number_of_vects *= rhs_size_unit;
2762 number_of_vects /= lhs_size_unit;
2767 /* Allocate memory for vectorized defs. */
2768 vec_defs = vNULL;
2769 vec_defs.create (number_of_vects);
2771 /* For reduction defs we call vect_get_constant_vectors (), since we are
2772 looking for initial loop invariant values. */
2773 if (vectorized_defs && reduc_index == -1)
2774 /* The defs are already vectorized. */
2775 vect_get_slp_vect_defs (child, &vec_defs);
2776 else
2777 /* Build vectors from scalar defs. */
2778 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2779 number_of_vects, reduc_index);
2781 vec_oprnds->quick_push (vec_defs);
2783 /* For reductions, we only need initial values. */
2784 if (reduc_index != -1)
2785 return;
2790 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2791 building a vector of type MASK_TYPE from it) and two input vectors placed in
2792 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2793 shifting by STRIDE elements of DR_CHAIN for every copy.
2794 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2795 copies).
2796 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2797 the created stmts must be inserted. */
2799 static inline void
2800 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2801 tree mask, int first_vec_indx, int second_vec_indx,
2802 gimple_stmt_iterator *gsi, slp_tree node,
2803 tree vectype, vec<tree> dr_chain,
2804 int ncopies, int vect_stmts_counter)
2806 tree perm_dest;
2807 gimple perm_stmt = NULL;
2808 stmt_vec_info next_stmt_info;
2809 int i, stride;
2810 tree first_vec, second_vec, data_ref;
2812 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2814 /* Initialize the vect stmts of NODE to properly insert the generated
2815 stmts later. */
2816 for (i = SLP_TREE_VEC_STMTS (node).length ();
2817 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2818 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2820 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2821 for (i = 0; i < ncopies; i++)
2823 first_vec = dr_chain[first_vec_indx];
2824 second_vec = dr_chain[second_vec_indx];
2826 /* Generate the permute statement. */
2827 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2828 first_vec, second_vec, mask);
2829 data_ref = make_ssa_name (perm_dest, perm_stmt);
2830 gimple_set_lhs (perm_stmt, data_ref);
2831 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2833 /* Store the vector statement in NODE. */
2834 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2836 first_vec_indx += stride;
2837 second_vec_indx += stride;
2840 /* Mark the scalar stmt as vectorized. */
2841 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2842 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2846 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2847 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2848 representation. Check that the mask is valid and return FALSE if not.
2849 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2850 the next vector, i.e., the current first vector is not needed. */
2852 static bool
2853 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2854 int mask_nunits, bool only_one_vec, int index,
2855 unsigned char *mask, int *current_mask_element,
2856 bool *need_next_vector, int *number_of_mask_fixes,
2857 bool *mask_fixed, bool *needs_first_vector)
2859 int i;
2861 /* Convert to target specific representation. */
2862 *current_mask_element = first_mask_element + m;
2863 /* Adjust the value in case it's a mask for second and third vectors. */
2864 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2866 if (*current_mask_element < mask_nunits)
2867 *needs_first_vector = true;
2869 /* We have only one input vector to permute but the mask accesses values in
2870 the next vector as well. */
2871 if (only_one_vec && *current_mask_element >= mask_nunits)
2873 if (dump_enabled_p ())
2875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2876 "permutation requires at least two vectors ");
2877 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2878 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2881 return false;
2884 /* The mask requires the next vector. */
2885 if (*current_mask_element >= mask_nunits * 2)
2887 if (*needs_first_vector || *mask_fixed)
2889 /* We either need the first vector too or have already moved to the
2890 next vector. In both cases, this permutation needs three
2891 vectors. */
2892 if (dump_enabled_p ())
2894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2895 "permutation requires at "
2896 "least three vectors ");
2897 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2898 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2901 return false;
2904 /* We move to the next vector, dropping the first one and working with
2905 the second and the third - we need to adjust the values of the mask
2906 accordingly. */
2907 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2909 for (i = 0; i < index; i++)
2910 mask[i] -= mask_nunits * *number_of_mask_fixes;
2912 (*number_of_mask_fixes)++;
2913 *mask_fixed = true;
2916 *need_next_vector = *mask_fixed;
2918 /* This was the last element of this mask. Start a new one. */
2919 if (index == mask_nunits - 1)
2921 *number_of_mask_fixes = 1;
2922 *mask_fixed = false;
2923 *needs_first_vector = false;
2926 return true;
2930 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2931 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2932 permute statements for the SLP node NODE of the SLP instance
2933 SLP_NODE_INSTANCE. */
2935 bool
2936 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2937 gimple_stmt_iterator *gsi, int vf,
2938 slp_instance slp_node_instance, bool analyze_only)
2940 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2942 tree mask_element_type = NULL_TREE, mask_type;
2943 int i, j, k, nunits, vec_index = 0, scalar_index;
2944 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2945 gimple next_scalar_stmt;
2946 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2947 int first_mask_element;
2948 int index, unroll_factor, current_mask_element, ncopies;
2949 unsigned char *mask;
2950 bool only_one_vec = false, need_next_vector = false;
2951 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2952 int number_of_mask_fixes = 1;
2953 bool mask_fixed = false;
2954 bool needs_first_vector = false;
2955 enum machine_mode mode;
2957 mode = TYPE_MODE (vectype);
2959 if (!can_vec_perm_p (mode, false, NULL))
2961 if (dump_enabled_p ())
2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2964 "no vect permute for ");
2965 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2966 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2968 return false;
2971 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2972 same size as the vector element being permuted. */
2973 mask_element_type = lang_hooks.types.type_for_mode
2974 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2975 mask_type = get_vectype_for_scalar_type (mask_element_type);
2976 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2977 mask = XALLOCAVEC (unsigned char, nunits);
2978 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2980 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2981 unrolling factor. */
2982 orig_vec_stmts_num = group_size *
2983 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2984 if (orig_vec_stmts_num == 1)
2985 only_one_vec = true;
2987 /* Number of copies is determined by the final vectorization factor
2988 relatively to SLP_NODE_INSTANCE unrolling factor. */
2989 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2991 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2992 return false;
2994 /* Generate permutation masks for every NODE. Number of masks for each NODE
2995 is equal to GROUP_SIZE.
2996 E.g., we have a group of three nodes with three loads from the same
2997 location in each node, and the vector size is 4. I.e., we have a
2998 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2999 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3000 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3003 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3004 The last mask is illegal since we assume two operands for permute
3005 operation, and the mask element values can't be outside that range.
3006 Hence, the last mask must be converted into {2,5,5,5}.
3007 For the first two permutations we need the first and the second input
3008 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3009 we need the second and the third vectors: {b1,c1,a2,b2} and
3010 {c2,a3,b3,c3}. */
3013 scalar_index = 0;
3014 index = 0;
3015 vect_stmts_counter = 0;
3016 vec_index = 0;
3017 first_vec_index = vec_index++;
3018 if (only_one_vec)
3019 second_vec_index = first_vec_index;
3020 else
3021 second_vec_index = vec_index++;
3023 for (j = 0; j < unroll_factor; j++)
3025 for (k = 0; k < group_size; k++)
3027 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3028 first_mask_element = i + j * group_size;
3029 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3030 nunits, only_one_vec, index,
3031 mask, &current_mask_element,
3032 &need_next_vector,
3033 &number_of_mask_fixes, &mask_fixed,
3034 &needs_first_vector))
3035 return false;
3036 mask[index++] = current_mask_element;
3038 if (index == nunits)
3040 index = 0;
3041 if (!can_vec_perm_p (mode, false, mask))
3043 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3046 vect_location,
3047 "unsupported vect permute { ");
3048 for (i = 0; i < nunits; ++i)
3049 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3050 mask[i]);
3051 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3053 return false;
3056 if (!analyze_only)
3058 int l;
3059 tree mask_vec, *mask_elts;
3060 mask_elts = XALLOCAVEC (tree, nunits);
3061 for (l = 0; l < nunits; ++l)
3062 mask_elts[l] = build_int_cst (mask_element_type,
3063 mask[l]);
3064 mask_vec = build_vector (mask_type, mask_elts);
3066 if (need_next_vector)
3068 first_vec_index = second_vec_index;
3069 second_vec_index = vec_index;
3072 next_scalar_stmt
3073 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3075 vect_create_mask_and_perm (stmt, next_scalar_stmt,
3076 mask_vec, first_vec_index, second_vec_index,
3077 gsi, node, vectype, dr_chain,
3078 ncopies, vect_stmts_counter++);
3085 return true;
3090 /* Vectorize SLP instance tree in postorder. */
3092 static bool
3093 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3094 unsigned int vectorization_factor)
3096 gimple stmt;
3097 bool grouped_store, is_store;
3098 gimple_stmt_iterator si;
3099 stmt_vec_info stmt_info;
3100 unsigned int vec_stmts_size, nunits, group_size;
3101 tree vectype;
3102 int i;
3103 slp_tree child;
3105 if (!node)
3106 return false;
3108 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3109 vect_schedule_slp_instance (child, instance, vectorization_factor);
3111 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3112 stmt_info = vinfo_for_stmt (stmt);
3114 /* VECTYPE is the type of the destination. */
3115 vectype = STMT_VINFO_VECTYPE (stmt_info);
3116 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3117 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3119 /* For each SLP instance calculate number of vector stmts to be created
3120 for the scalar stmts in each node of the SLP tree. Number of vector
3121 elements in one vector iteration is the number of scalar elements in
3122 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3123 size. */
3124 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3126 if (!SLP_TREE_VEC_STMTS (node).exists ())
3128 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3129 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3132 if (dump_enabled_p ())
3134 dump_printf_loc (MSG_NOTE,vect_location,
3135 "------>vectorizing SLP node starting from: ");
3136 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3137 dump_printf (MSG_NOTE, "\n");
3140 /* Loads should be inserted before the first load. */
3141 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3142 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3143 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3144 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3145 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3146 else if (is_pattern_stmt_p (stmt_info))
3147 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3148 else
3149 si = gsi_for_stmt (stmt);
3151 /* Stores should be inserted just before the last store. */
3152 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3153 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3155 gimple last_store = vect_find_last_store_in_slp_instance (instance);
3156 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3157 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3158 si = gsi_for_stmt (last_store);
3161 /* Mark the first element of the reduction chain as reduction to properly
3162 transform the node. In the analysis phase only the last element of the
3163 chain is marked as reduction. */
3164 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3165 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3167 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3168 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3171 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3172 return is_store;
3175 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3176 For loop vectorization this is done in vectorizable_call, but for SLP
3177 it needs to be deferred until end of vect_schedule_slp, because multiple
3178 SLP instances may refer to the same scalar stmt. */
3180 static void
3181 vect_remove_slp_scalar_calls (slp_tree node)
3183 gimple stmt, new_stmt;
3184 gimple_stmt_iterator gsi;
3185 int i;
3186 slp_tree child;
3187 tree lhs;
3188 stmt_vec_info stmt_info;
3190 if (!node)
3191 return;
3193 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3194 vect_remove_slp_scalar_calls (child);
3196 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3198 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3199 continue;
3200 stmt_info = vinfo_for_stmt (stmt);
3201 if (stmt_info == NULL
3202 || is_pattern_stmt_p (stmt_info)
3203 || !PURE_SLP_STMT (stmt_info))
3204 continue;
3205 lhs = gimple_call_lhs (stmt);
3206 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3207 set_vinfo_for_stmt (new_stmt, stmt_info);
3208 set_vinfo_for_stmt (stmt, NULL);
3209 STMT_VINFO_STMT (stmt_info) = new_stmt;
3210 gsi = gsi_for_stmt (stmt);
3211 gsi_replace (&gsi, new_stmt, false);
3212 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3216 /* Generate vector code for all SLP instances in the loop/basic block. */
3218 bool
3219 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3221 vec<slp_instance> slp_instances;
3222 slp_instance instance;
3223 unsigned int i, vf;
3224 bool is_store = false;
3226 if (loop_vinfo)
3228 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3229 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3231 else
3233 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3234 vf = 1;
3237 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3239 /* Schedule the tree of INSTANCE. */
3240 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3241 instance, vf);
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_NOTE, vect_location,
3244 "vectorizing stmts using SLP.\n");
3247 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3249 slp_tree root = SLP_INSTANCE_TREE (instance);
3250 gimple store;
3251 unsigned int j;
3252 gimple_stmt_iterator gsi;
3254 /* Remove scalar call stmts. Do not do this for basic-block
3255 vectorization as not all uses may be vectorized.
3256 ??? Why should this be necessary? DCE should be able to
3257 remove the stmts itself.
3258 ??? For BB vectorization we can as well remove scalar
3259 stmts starting from the SLP tree root if they have no
3260 uses. */
3261 if (loop_vinfo)
3262 vect_remove_slp_scalar_calls (root);
3264 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3265 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3267 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3268 break;
3270 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3271 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3272 /* Free the attached stmt_vec_info and remove the stmt. */
3273 gsi = gsi_for_stmt (store);
3274 unlink_stmt_vdef (store);
3275 gsi_remove (&gsi, true);
3276 release_defs (store);
3277 free_stmt_vec_info (store);
3281 return is_store;
3285 /* Vectorize the basic block. */
3287 void
3288 vect_slp_transform_bb (basic_block bb)
3290 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3291 gimple_stmt_iterator si;
3293 gcc_assert (bb_vinfo);
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3298 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3300 gimple stmt = gsi_stmt (si);
3301 stmt_vec_info stmt_info;
3303 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_NOTE, vect_location,
3306 "------>SLPing statement: ");
3307 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3308 dump_printf (MSG_NOTE, "\n");
3311 stmt_info = vinfo_for_stmt (stmt);
3312 gcc_assert (stmt_info);
3314 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3315 if (STMT_SLP_TYPE (stmt_info))
3317 vect_schedule_slp (NULL, bb_vinfo);
3318 break;
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_NOTE, vect_location,
3324 "BASIC BLOCK VECTORIZED\n");
3326 destroy_bb_vec_info (bb_vinfo);