1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "cfglayout.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
46 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
48 /* Function vect_mark_relevant.
50 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
53 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
54 enum vect_relevant relevant
, bool live_p
)
56 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
57 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
58 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
60 if (vect_print_dump_info (REPORT_DETAILS
))
61 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
63 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
67 /* This is the last stmt in a sequence that was detected as a
68 pattern that can potentially be vectorized. Don't mark the stmt
69 as relevant/live because it's not going to be vectorized.
70 Instead mark the pattern-stmt that replaces it. */
72 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
74 if (vect_print_dump_info (REPORT_DETAILS
))
75 fprintf (vect_dump
, "last stmt in pattern. don't mark relevant/live.");
76 stmt_info
= vinfo_for_stmt (pattern_stmt
);
77 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
78 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
79 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
83 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
84 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
85 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
87 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
88 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
90 if (vect_print_dump_info (REPORT_DETAILS
))
91 fprintf (vect_dump
, "already marked relevant/live.");
95 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
99 /* Function vect_stmt_relevant_p.
101 Return true if STMT in loop that is represented by LOOP_VINFO is
102 "relevant for vectorization".
104 A stmt is considered "relevant for vectorization" if:
105 - it has uses outside the loop.
106 - it has vdefs (it alters memory).
107 - control stmts in the loop (except for the exit condition).
109 CHECKME: what other side effects would the vectorizer allow? */
112 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
113 enum vect_relevant
*relevant
, bool *live_p
)
115 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
117 imm_use_iterator imm_iter
;
121 *relevant
= vect_unused_in_scope
;
124 /* cond stmt other than loop exit cond. */
125 if (is_ctrl_stmt (stmt
)
126 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
127 != loop_exit_ctrl_vec_info_type
)
128 *relevant
= vect_used_in_scope
;
130 /* changing memory. */
131 if (gimple_code (stmt
) != GIMPLE_PHI
)
132 if (gimple_vdef (stmt
))
134 if (vect_print_dump_info (REPORT_DETAILS
))
135 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
136 *relevant
= vect_used_in_scope
;
139 /* uses outside the loop. */
140 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
142 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
144 basic_block bb
= gimple_bb (USE_STMT (use_p
));
145 if (!flow_bb_inside_loop_p (loop
, bb
))
147 if (vect_print_dump_info (REPORT_DETAILS
))
148 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
150 if (is_gimple_debug (USE_STMT (use_p
)))
153 /* We expect all such uses to be in the loop exit phis
154 (because of loop closed form) */
155 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
156 gcc_assert (bb
== single_exit (loop
)->dest
);
163 return (*live_p
|| *relevant
);
167 /* Function exist_non_indexing_operands_for_use_p
169 USE is one of the uses attached to STMT. Check if USE is
170 used in STMT for anything other than indexing an array. */
173 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
176 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
178 /* USE corresponds to some operand in STMT. If there is no data
179 reference in STMT, then any operand that corresponds to USE
180 is not indexing an array. */
181 if (!STMT_VINFO_DATA_REF (stmt_info
))
184 /* STMT has a data_ref. FORNOW this means that its of one of
188 (This should have been verified in analyze_data_refs).
190 'var' in the second case corresponds to a def, not a use,
191 so USE cannot correspond to any operands that are not used
194 Therefore, all we need to check is if STMT falls into the
195 first case, and whether var corresponds to USE. */
197 if (!gimple_assign_copy_p (stmt
))
199 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
201 operand
= gimple_assign_rhs1 (stmt
);
202 if (TREE_CODE (operand
) != SSA_NAME
)
213 Function process_use.
216 - a USE in STMT in a loop represented by LOOP_VINFO
217 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
218 that defined USE. This is done by calling mark_relevant and passing it
219 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
222 Generally, LIVE_P and RELEVANT are used to define the liveness and
223 relevance info of the DEF_STMT of this USE:
224 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
225 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
227 - case 1: If USE is used only for address computations (e.g. array indexing),
228 which does not need to be directly vectorized, then the liveness/relevance
229 of the respective DEF_STMT is left unchanged.
230 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
231 skip DEF_STMT cause it had already been processed.
232 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
233 be modified accordingly.
235 Return true if everything is as expected. Return false otherwise. */
238 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
239 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
)
241 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
242 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
243 stmt_vec_info dstmt_vinfo
;
244 basic_block bb
, def_bb
;
247 enum vect_def_type dt
;
249 /* case 1: we are only interested in uses that need to be vectorized. Uses
250 that are used for address computation are not considered relevant. */
251 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
254 if (!vect_is_simple_use (use
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
256 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
257 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
261 if (!def_stmt
|| gimple_nop_p (def_stmt
))
264 def_bb
= gimple_bb (def_stmt
);
265 if (!flow_bb_inside_loop_p (loop
, def_bb
))
267 if (vect_print_dump_info (REPORT_DETAILS
))
268 fprintf (vect_dump
, "def_stmt is out of loop.");
272 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
273 DEF_STMT must have already been processed, because this should be the
274 only way that STMT, which is a reduction-phi, was put in the worklist,
275 as there should be no other uses for DEF_STMT in the loop. So we just
276 check that everything is as expected, and we are done. */
277 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
278 bb
= gimple_bb (stmt
);
279 if (gimple_code (stmt
) == GIMPLE_PHI
280 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
281 && gimple_code (def_stmt
) != GIMPLE_PHI
282 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
283 && bb
->loop_father
== def_bb
->loop_father
)
285 if (vect_print_dump_info (REPORT_DETAILS
))
286 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
287 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
288 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
289 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
290 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
291 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
295 /* case 3a: outer-loop stmt defining an inner-loop stmt:
296 outer-loop-header-bb:
302 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
304 if (vect_print_dump_info (REPORT_DETAILS
))
305 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
309 case vect_unused_in_scope
:
310 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
311 vect_used_in_scope
: vect_unused_in_scope
;
314 case vect_used_in_outer_by_reduction
:
315 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
316 relevant
= vect_used_by_reduction
;
319 case vect_used_in_outer
:
320 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
321 relevant
= vect_used_in_scope
;
324 case vect_used_in_scope
:
332 /* case 3b: inner-loop stmt defining an outer-loop stmt:
333 outer-loop-header-bb:
337 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
339 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
341 if (vect_print_dump_info (REPORT_DETAILS
))
342 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
346 case vect_unused_in_scope
:
347 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
348 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
349 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
352 case vect_used_by_reduction
:
353 relevant
= vect_used_in_outer_by_reduction
;
356 case vect_used_in_scope
:
357 relevant
= vect_used_in_outer
;
365 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
370 /* Function vect_mark_stmts_to_be_vectorized.
372 Not all stmts in the loop need to be vectorized. For example:
381 Stmt 1 and 3 do not need to be vectorized, because loop control and
382 addressing of vectorized data-refs are handled differently.
384 This pass detects such stmts. */
387 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
389 VEC(gimple
,heap
) *worklist
;
390 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
391 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
392 unsigned int nbbs
= loop
->num_nodes
;
393 gimple_stmt_iterator si
;
396 stmt_vec_info stmt_vinfo
;
400 enum vect_relevant relevant
, tmp_relevant
;
401 enum vect_def_type def_type
;
403 if (vect_print_dump_info (REPORT_DETAILS
))
404 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
406 worklist
= VEC_alloc (gimple
, heap
, 64);
408 /* 1. Init worklist. */
409 for (i
= 0; i
< nbbs
; i
++)
412 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
415 if (vect_print_dump_info (REPORT_DETAILS
))
417 fprintf (vect_dump
, "init: phi relevant? ");
418 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
421 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
422 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
424 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
426 stmt
= gsi_stmt (si
);
427 if (vect_print_dump_info (REPORT_DETAILS
))
429 fprintf (vect_dump
, "init: stmt relevant? ");
430 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
433 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
434 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
438 /* 2. Process_worklist */
439 while (VEC_length (gimple
, worklist
) > 0)
444 stmt
= VEC_pop (gimple
, worklist
);
445 if (vect_print_dump_info (REPORT_DETAILS
))
447 fprintf (vect_dump
, "worklist: examine stmt: ");
448 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
451 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
452 (DEF_STMT) as relevant/irrelevant and live/dead according to the
453 liveness and relevance properties of STMT. */
454 stmt_vinfo
= vinfo_for_stmt (stmt
);
455 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
456 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
458 /* Generally, the liveness and relevance properties of STMT are
459 propagated as is to the DEF_STMTs of its USEs:
460 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
461 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
463 One exception is when STMT has been identified as defining a reduction
464 variable; in this case we set the liveness/relevance as follows:
466 relevant = vect_used_by_reduction
467 This is because we distinguish between two kinds of relevant stmts -
468 those that are used by a reduction computation, and those that are
469 (also) used by a regular computation. This allows us later on to
470 identify stmts that are used solely by a reduction, and therefore the
471 order of the results that they produce does not have to be kept. */
473 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
474 tmp_relevant
= relevant
;
477 case vect_reduction_def
:
478 switch (tmp_relevant
)
480 case vect_unused_in_scope
:
481 relevant
= vect_used_by_reduction
;
484 case vect_used_by_reduction
:
485 if (gimple_code (stmt
) == GIMPLE_PHI
)
490 if (vect_print_dump_info (REPORT_DETAILS
))
491 fprintf (vect_dump
, "unsupported use of reduction.");
493 VEC_free (gimple
, heap
, worklist
);
500 case vect_nested_cycle
:
501 if (tmp_relevant
!= vect_unused_in_scope
502 && tmp_relevant
!= vect_used_in_outer_by_reduction
503 && tmp_relevant
!= vect_used_in_outer
)
505 if (vect_print_dump_info (REPORT_DETAILS
))
506 fprintf (vect_dump
, "unsupported use of nested cycle.");
508 VEC_free (gimple
, heap
, worklist
);
515 case vect_double_reduction_def
:
516 if (tmp_relevant
!= vect_unused_in_scope
517 && tmp_relevant
!= vect_used_by_reduction
)
519 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "unsupported use of double reduction.");
522 VEC_free (gimple
, heap
, worklist
);
533 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
535 tree op
= USE_FROM_PTR (use_p
);
536 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
, &worklist
))
538 VEC_free (gimple
, heap
, worklist
);
542 } /* while worklist */
544 VEC_free (gimple
, heap
, worklist
);
550 cost_for_stmt (gimple stmt
)
552 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
554 switch (STMT_VINFO_TYPE (stmt_info
))
556 case load_vec_info_type
:
557 return TARG_SCALAR_LOAD_COST
;
558 case store_vec_info_type
:
559 return TARG_SCALAR_STORE_COST
;
560 case op_vec_info_type
:
561 case condition_vec_info_type
:
562 case assignment_vec_info_type
:
563 case reduc_vec_info_type
:
564 case induc_vec_info_type
:
565 case type_promotion_vec_info_type
:
566 case type_demotion_vec_info_type
:
567 case type_conversion_vec_info_type
:
568 case call_vec_info_type
:
569 return TARG_SCALAR_STMT_COST
;
570 case undef_vec_info_type
:
576 /* Function vect_model_simple_cost.
578 Models cost for simple operations, i.e. those that only emit ncopies of a
579 single op. Right now, this does not account for multiple insns that could
580 be generated for the single vector op. We will handle that shortly. */
583 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
584 enum vect_def_type
*dt
, slp_tree slp_node
)
587 int inside_cost
= 0, outside_cost
= 0;
589 /* The SLP costs were already calculated during SLP tree build. */
590 if (PURE_SLP_STMT (stmt_info
))
593 inside_cost
= ncopies
* TARG_VEC_STMT_COST
;
595 /* FORNOW: Assuming maximum 2 args per stmts. */
596 for (i
= 0; i
< 2; i
++)
598 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
599 outside_cost
+= TARG_SCALAR_TO_VEC_COST
;
602 if (vect_print_dump_info (REPORT_COST
))
603 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
604 "outside_cost = %d .", inside_cost
, outside_cost
);
606 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
607 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
608 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
612 /* Function vect_cost_strided_group_size
614 For strided load or store, return the group_size only if it is the first
615 load or store of a group, else return 1. This ensures that group size is
616 only returned once per group. */
619 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
621 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
623 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
624 return DR_GROUP_SIZE (stmt_info
);
630 /* Function vect_model_store_cost
632 Models cost for stores. In the case of strided accesses, one access
633 has the overhead of the strided access attributed to it. */
636 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
637 enum vect_def_type dt
, slp_tree slp_node
)
640 int inside_cost
= 0, outside_cost
= 0;
642 /* The SLP costs were already calculated during SLP tree build. */
643 if (PURE_SLP_STMT (stmt_info
))
646 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
647 outside_cost
= TARG_SCALAR_TO_VEC_COST
;
649 /* Strided access? */
650 if (DR_GROUP_FIRST_DR (stmt_info
) && !slp_node
)
651 group_size
= vect_cost_strided_group_size (stmt_info
);
652 /* Not a strided access. */
656 /* Is this an access in a group of stores, which provide strided access?
657 If so, add in the cost of the permutes. */
660 /* Uses a high and low interleave operation for each needed permute. */
661 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
662 * TARG_VEC_STMT_COST
;
664 if (vect_print_dump_info (REPORT_COST
))
665 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
670 /* Costs of the stores. */
671 inside_cost
+= ncopies
* TARG_VEC_STORE_COST
;
673 if (vect_print_dump_info (REPORT_COST
))
674 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
675 "outside_cost = %d .", inside_cost
, outside_cost
);
677 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
678 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
679 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
683 /* Function vect_model_load_cost
685 Models cost for loads. In the case of strided accesses, the last access
686 has the overhead of the strided access attributed to it. Since unaligned
687 accesses are supported for loads, we also account for the costs of the
688 access scheme chosen. */
691 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, slp_tree slp_node
)
695 int alignment_support_cheme
;
697 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
698 int inside_cost
= 0, outside_cost
= 0;
700 /* The SLP costs were already calculated during SLP tree build. */
701 if (PURE_SLP_STMT (stmt_info
))
704 /* Strided accesses? */
705 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
706 if (first_stmt
&& !slp_node
)
708 group_size
= vect_cost_strided_group_size (stmt_info
);
709 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
711 /* Not a strided access. */
718 alignment_support_cheme
= vect_supportable_dr_alignment (first_dr
);
720 /* Is this an access in a group of loads providing strided access?
721 If so, add in the cost of the permutes. */
724 /* Uses an even and odd extract operations for each needed permute. */
725 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
726 * TARG_VEC_STMT_COST
;
728 if (vect_print_dump_info (REPORT_COST
))
729 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
734 /* The loads themselves. */
735 switch (alignment_support_cheme
)
739 inside_cost
+= ncopies
* TARG_VEC_LOAD_COST
;
741 if (vect_print_dump_info (REPORT_COST
))
742 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
746 case dr_unaligned_supported
:
748 /* Here, we assign an additional cost for the unaligned load. */
749 inside_cost
+= ncopies
* TARG_VEC_UNALIGNED_LOAD_COST
;
751 if (vect_print_dump_info (REPORT_COST
))
752 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
757 case dr_explicit_realign
:
759 inside_cost
+= ncopies
* (2*TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
761 /* FIXME: If the misalignment remains fixed across the iterations of
762 the containing loop, the following cost should be added to the
764 if (targetm
.vectorize
.builtin_mask_for_load
)
765 inside_cost
+= TARG_VEC_STMT_COST
;
769 case dr_explicit_realign_optimized
:
771 if (vect_print_dump_info (REPORT_COST
))
772 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
775 /* Unaligned software pipeline has a load of an address, an initial
776 load, and possibly a mask operation to "prime" the loop. However,
777 if this is an access in a group of loads, which provide strided
778 access, then the above cost should only be considered for one
779 access in the group. Inside the loop, there is a load op
780 and a realignment op. */
782 if ((!DR_GROUP_FIRST_DR (stmt_info
)) || group_size
> 1 || slp_node
)
784 outside_cost
= 2*TARG_VEC_STMT_COST
;
785 if (targetm
.vectorize
.builtin_mask_for_load
)
786 outside_cost
+= TARG_VEC_STMT_COST
;
789 inside_cost
+= ncopies
* (TARG_VEC_LOAD_COST
+ TARG_VEC_STMT_COST
);
798 if (vect_print_dump_info (REPORT_COST
))
799 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
800 "outside_cost = %d .", inside_cost
, outside_cost
);
802 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
803 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
804 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
808 /* Function vect_init_vector.
810 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
811 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
812 is not NULL. Otherwise, place the initialization at the loop preheader.
813 Return the DEF of INIT_STMT.
814 It will be used in the vectorization of STMT. */
817 vect_init_vector (gimple stmt
, tree vector_var
, tree vector_type
,
818 gimple_stmt_iterator
*gsi
)
820 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
828 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
829 add_referenced_var (new_var
);
830 init_stmt
= gimple_build_assign (new_var
, vector_var
);
831 new_temp
= make_ssa_name (new_var
, init_stmt
);
832 gimple_assign_set_lhs (init_stmt
, new_temp
);
835 vect_finish_stmt_generation (stmt
, init_stmt
, gsi
);
838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
842 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
844 if (nested_in_vect_loop_p (loop
, stmt
))
847 pe
= loop_preheader_edge (loop
);
848 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
849 gcc_assert (!new_bb
);
853 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
855 gimple_stmt_iterator gsi_bb_start
;
857 gcc_assert (bb_vinfo
);
858 bb
= BB_VINFO_BB (bb_vinfo
);
859 gsi_bb_start
= gsi_after_labels (bb
);
860 gsi_insert_before (&gsi_bb_start
, init_stmt
, GSI_SAME_STMT
);
864 if (vect_print_dump_info (REPORT_DETAILS
))
866 fprintf (vect_dump
, "created new init_stmt: ");
867 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
870 vec_oprnd
= gimple_assign_lhs (init_stmt
);
875 /* Function vect_get_vec_def_for_operand.
877 OP is an operand in STMT. This function returns a (vector) def that will be
878 used in the vectorized stmt for STMT.
880 In the case that OP is an SSA_NAME which is defined in the loop, then
881 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
883 In case OP is an invariant or constant, a new stmt that creates a vector def
884 needs to be introduced. */
887 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
892 stmt_vec_info def_stmt_info
= NULL
;
893 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
894 tree vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
895 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
896 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
902 enum vect_def_type dt
;
906 if (vect_print_dump_info (REPORT_DETAILS
))
908 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
909 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
912 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, NULL
, &def_stmt
, &def
,
914 gcc_assert (is_simple_use
);
915 if (vect_print_dump_info (REPORT_DETAILS
))
919 fprintf (vect_dump
, "def = ");
920 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
924 fprintf (vect_dump
, " def_stmt = ");
925 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
931 /* Case 1: operand is a constant. */
932 case vect_constant_def
:
934 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
935 gcc_assert (vector_type
);
940 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
941 if (vect_print_dump_info (REPORT_DETAILS
))
942 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
944 for (i
= nunits
- 1; i
>= 0; --i
)
946 t
= tree_cons (NULL_TREE
, op
, t
);
948 vec_cst
= build_vector (vector_type
, t
);
949 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
952 /* Case 2: operand is defined outside the loop - loop invariant. */
953 case vect_external_def
:
955 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
956 gcc_assert (vector_type
);
957 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
962 /* Create 'vec_inv = {inv,inv,..,inv}' */
963 if (vect_print_dump_info (REPORT_DETAILS
))
964 fprintf (vect_dump
, "Create vector_inv.");
966 for (i
= nunits
- 1; i
>= 0; --i
)
968 t
= tree_cons (NULL_TREE
, def
, t
);
971 /* FIXME: use build_constructor directly. */
972 vec_inv
= build_constructor_from_list (vector_type
, t
);
973 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
976 /* Case 3: operand is defined inside the loop. */
977 case vect_internal_def
:
980 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
982 /* Get the def from the vectorized stmt. */
983 def_stmt_info
= vinfo_for_stmt (def_stmt
);
984 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
985 gcc_assert (vec_stmt
);
986 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
987 vec_oprnd
= PHI_RESULT (vec_stmt
);
988 else if (is_gimple_call (vec_stmt
))
989 vec_oprnd
= gimple_call_lhs (vec_stmt
);
991 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
995 /* Case 4: operand is defined by a loop header phi - reduction */
996 case vect_reduction_def
:
997 case vect_double_reduction_def
:
998 case vect_nested_cycle
:
1002 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1003 loop
= (gimple_bb (def_stmt
))->loop_father
;
1005 /* Get the def before the loop */
1006 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1007 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1010 /* Case 5: operand is defined by loop-header phi - induction. */
1011 case vect_induction_def
:
1013 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1015 /* Get the def from the vectorized stmt. */
1016 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1017 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1018 gcc_assert (vec_stmt
&& gimple_code (vec_stmt
) == GIMPLE_PHI
);
1019 vec_oprnd
= PHI_RESULT (vec_stmt
);
1029 /* Function vect_get_vec_def_for_stmt_copy
1031 Return a vector-def for an operand. This function is used when the
1032 vectorized stmt to be created (by the caller to this function) is a "copy"
1033 created in case the vectorized result cannot fit in one vector, and several
1034 copies of the vector-stmt are required. In this case the vector-def is
1035 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1036 of the stmt that defines VEC_OPRND.
1037 DT is the type of the vector def VEC_OPRND.
1040 In case the vectorization factor (VF) is bigger than the number
1041 of elements that can fit in a vectype (nunits), we have to generate
1042 more than one vector stmt to vectorize the scalar stmt. This situation
1043 arises when there are multiple data-types operated upon in the loop; the
1044 smallest data-type determines the VF, and as a result, when vectorizing
1045 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1046 vector stmt (each computing a vector of 'nunits' results, and together
1047 computing 'VF' results in each iteration). This function is called when
1048 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1049 which VF=16 and nunits=4, so the number of copies required is 4):
1051 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1053 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1054 VS1.1: vx.1 = memref1 VS1.2
1055 VS1.2: vx.2 = memref2 VS1.3
1056 VS1.3: vx.3 = memref3
1058 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1059 VSnew.1: vz1 = vx.1 + ... VSnew.2
1060 VSnew.2: vz2 = vx.2 + ... VSnew.3
1061 VSnew.3: vz3 = vx.3 + ...
1063 The vectorization of S1 is explained in vectorizable_load.
1064 The vectorization of S2:
1065 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1066 the function 'vect_get_vec_def_for_operand' is called to
1067 get the relevant vector-def for each operand of S2. For operand x it
1068 returns the vector-def 'vx.0'.
1070 To create the remaining copies of the vector-stmt (VSnew.j), this
1071 function is called to get the relevant vector-def for each operand. It is
1072 obtained from the respective VS1.j stmt, which is recorded in the
1073 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1075 For example, to obtain the vector-def 'vx.1' in order to create the
1076 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1077 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1078 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1079 and return its def ('vx.1').
1080 Overall, to create the above sequence this function will be called 3 times:
1081 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1082 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1083 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1086 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1088 gimple vec_stmt_for_operand
;
1089 stmt_vec_info def_stmt_info
;
1091 /* Do nothing; can reuse same def. */
1092 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1095 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1096 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1097 gcc_assert (def_stmt_info
);
1098 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1099 gcc_assert (vec_stmt_for_operand
);
1100 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1101 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1102 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1104 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1109 /* Get vectorized definitions for the operands to create a copy of an original
1110 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1113 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1114 VEC(tree
,heap
) **vec_oprnds0
,
1115 VEC(tree
,heap
) **vec_oprnds1
)
1117 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1119 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1120 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1122 if (vec_oprnds1
&& *vec_oprnds1
)
1124 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1125 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1126 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1131 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1134 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1135 VEC(tree
,heap
) **vec_oprnds0
, VEC(tree
,heap
) **vec_oprnds1
,
1139 vect_get_slp_defs (slp_node
, vec_oprnds0
, vec_oprnds1
, -1);
1144 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1145 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1146 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1150 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1151 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1152 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1158 /* Function vect_finish_stmt_generation.
1160 Insert a new stmt. */
1163 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1164 gimple_stmt_iterator
*gsi
)
1166 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1167 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1168 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1170 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1172 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1174 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1177 if (vect_print_dump_info (REPORT_DETAILS
))
1179 fprintf (vect_dump
, "add new stmt: ");
1180 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1183 gimple_set_location (vec_stmt
, gimple_location (gsi_stmt (*gsi
)));
1186 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1187 a function declaration if the target has a vectorized version
1188 of the function, or NULL_TREE if the function cannot be vectorized. */
1191 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1193 tree fndecl
= gimple_call_fndecl (call
);
1195 /* We only handle functions that do not read or clobber memory -- i.e.
1196 const or novops ones. */
1197 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1201 || TREE_CODE (fndecl
) != FUNCTION_DECL
1202 || !DECL_BUILT_IN (fndecl
))
1205 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1209 /* Function vectorizable_call.
1211 Check if STMT performs a function call that can be vectorized.
1212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1217 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
)
1222 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1223 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1224 tree vectype_out
, vectype_in
;
1227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1228 tree fndecl
, new_temp
, def
, rhs_type
;
1230 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
1231 gimple new_stmt
= NULL
;
1233 VEC(tree
, heap
) *vargs
= NULL
;
1234 enum { NARROW
, NONE
, WIDEN
} modifier
;
1237 /* FORNOW: unsupported in basic block SLP. */
1238 gcc_assert (loop_vinfo
);
1240 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1243 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1246 /* FORNOW: SLP not supported. */
1247 if (STMT_SLP_TYPE (stmt_info
))
1250 /* Is STMT a vectorizable call? */
1251 if (!is_gimple_call (stmt
))
1254 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1257 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1259 /* Process function arguments. */
1260 rhs_type
= NULL_TREE
;
1261 vectype_in
= NULL_TREE
;
1262 nargs
= gimple_call_num_args (stmt
);
1264 /* Bail out if the function has more than two arguments, we
1265 do not have interesting builtin functions to vectorize with
1266 more than two arguments. No arguments is also not good. */
1267 if (nargs
== 0 || nargs
> 2)
1270 for (i
= 0; i
< nargs
; i
++)
1274 op
= gimple_call_arg (stmt
, i
);
1276 /* We can only handle calls with arguments of the same type. */
1278 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1280 if (vect_print_dump_info (REPORT_DETAILS
))
1281 fprintf (vect_dump
, "argument types differ.");
1285 rhs_type
= TREE_TYPE (op
);
1287 if (!vect_is_simple_use_1 (op
, loop_vinfo
, NULL
,
1288 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1290 if (vect_print_dump_info (REPORT_DETAILS
))
1291 fprintf (vect_dump
, "use not simple.");
1296 vectype_in
= opvectype
;
1298 && opvectype
!= vectype_in
)
1300 if (vect_print_dump_info (REPORT_DETAILS
))
1301 fprintf (vect_dump
, "argument vector types differ.");
1305 /* If all arguments are external or constant defs use a vector type with
1306 the same size as the output vector type. */
1308 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1310 gcc_assert (vectype_in
);
1313 if (vect_print_dump_info (REPORT_DETAILS
))
1315 fprintf (vect_dump
, "no vectype for scalar type ");
1316 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1323 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1324 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1325 if (nunits_in
== nunits_out
/ 2)
1327 else if (nunits_out
== nunits_in
)
1329 else if (nunits_out
== nunits_in
/ 2)
1334 /* For now, we only vectorize functions if a target specific builtin
1335 is available. TODO -- in some cases, it might be profitable to
1336 insert the calls for pieces of the vector, in order to be able
1337 to vectorize other operations in the loop. */
1338 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1339 if (fndecl
== NULL_TREE
)
1341 if (vect_print_dump_info (REPORT_DETAILS
))
1342 fprintf (vect_dump
, "function is not vectorizable.");
1347 gcc_assert (!gimple_vuse (stmt
));
1349 if (modifier
== NARROW
)
1350 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1352 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1354 /* Sanity check: make sure that at least one copy of the vectorized stmt
1355 needs to be generated. */
1356 gcc_assert (ncopies
>= 1);
1358 if (!vec_stmt
) /* transformation not required. */
1360 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1361 if (vect_print_dump_info (REPORT_DETAILS
))
1362 fprintf (vect_dump
, "=== vectorizable_call ===");
1363 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
1369 if (vect_print_dump_info (REPORT_DETAILS
))
1370 fprintf (vect_dump
, "transform operation.");
1373 scalar_dest
= gimple_call_lhs (stmt
);
1374 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1376 prev_stmt_info
= NULL
;
1380 for (j
= 0; j
< ncopies
; ++j
)
1382 /* Build argument list for the vectorized call. */
1384 vargs
= VEC_alloc (tree
, heap
, nargs
);
1386 VEC_truncate (tree
, vargs
, 0);
1388 for (i
= 0; i
< nargs
; i
++)
1390 op
= gimple_call_arg (stmt
, i
);
1393 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1396 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1398 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1401 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1404 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1405 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1406 gimple_call_set_lhs (new_stmt
, new_temp
);
1408 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1409 mark_symbols_for_renaming (new_stmt
);
1412 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1414 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1416 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1422 for (j
= 0; j
< ncopies
; ++j
)
1424 /* Build argument list for the vectorized call. */
1426 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1428 VEC_truncate (tree
, vargs
, 0);
1430 for (i
= 0; i
< nargs
; i
++)
1432 op
= gimple_call_arg (stmt
, i
);
1436 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1438 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1442 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
);
1444 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1446 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1449 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1450 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1453 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1454 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1455 gimple_call_set_lhs (new_stmt
, new_temp
);
1457 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1458 mark_symbols_for_renaming (new_stmt
);
1461 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1463 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1465 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1468 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1473 /* No current target implements this case. */
1477 VEC_free (tree
, heap
, vargs
);
1479 /* Update the exception handling table with the vector stmt if necessary. */
1480 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
1481 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
1483 /* The call in STMT might prevent it from being removed in dce.
1484 We however cannot remove it here, due to the way the ssa name
1485 it defines is mapped to the new definition. So just replace
1486 rhs of the statement with something harmless. */
1488 type
= TREE_TYPE (scalar_dest
);
1489 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
1490 fold_convert (type
, integer_zero_node
));
1491 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1492 set_vinfo_for_stmt (stmt
, NULL
);
1493 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1494 gsi_replace (gsi
, new_stmt
, false);
1495 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
1501 /* Function vect_gen_widened_results_half
1503 Create a vector stmt whose code, type, number of arguments, and result
1504 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1505 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1506 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1507 needs to be created (DECL is a function-decl of a target-builtin).
1508 STMT is the original scalar stmt that we are vectorizing. */
1511 vect_gen_widened_results_half (enum tree_code code
,
1513 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
1514 tree vec_dest
, gimple_stmt_iterator
*gsi
,
1520 /* Generate half of the widened result: */
1521 if (code
== CALL_EXPR
)
1523 /* Target specific support */
1524 if (op_type
== binary_op
)
1525 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
1527 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
1528 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1529 gimple_call_set_lhs (new_stmt
, new_temp
);
1533 /* Generic support */
1534 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
1535 if (op_type
!= binary_op
)
1537 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
1539 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1540 gimple_assign_set_lhs (new_stmt
, new_temp
);
1542 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1548 /* Check if STMT performs a conversion operation, that can be vectorized.
1549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1550 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1551 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1554 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
1555 gimple
*vec_stmt
, slp_tree slp_node
)
1560 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1561 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1562 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1563 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
1564 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
1568 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
1569 gimple new_stmt
= NULL
;
1570 stmt_vec_info prev_stmt_info
;
1573 tree vectype_out
, vectype_in
;
1577 enum { NARROW
, NONE
, WIDEN
} modifier
;
1579 VEC(tree
,heap
) *vec_oprnds0
= NULL
;
1581 VEC(tree
,heap
) *dummy
= NULL
;
1584 /* Is STMT a vectorizable conversion? */
1586 /* FORNOW: unsupported in basic block SLP. */
1587 gcc_assert (loop_vinfo
);
1589 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1592 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1595 if (!is_gimple_assign (stmt
))
1598 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
1601 code
= gimple_assign_rhs_code (stmt
);
1602 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
1605 /* Check types of lhs and rhs. */
1606 scalar_dest
= gimple_assign_lhs (stmt
);
1607 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1609 op0
= gimple_assign_rhs1 (stmt
);
1610 rhs_type
= TREE_TYPE (op0
);
1611 /* Check the operands of the operation. */
1612 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, NULL
,
1613 &def_stmt
, &def
, &dt
[0], &vectype_in
))
1615 if (vect_print_dump_info (REPORT_DETAILS
))
1616 fprintf (vect_dump
, "use not simple.");
1619 /* If op0 is an external or constant defs use a vector type of
1620 the same size as the output vector type. */
1622 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1624 gcc_assert (vectype_in
);
1627 if (vect_print_dump_info (REPORT_DETAILS
))
1629 fprintf (vect_dump
, "no vectype for scalar type ");
1630 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1637 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1638 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1639 if (nunits_in
== nunits_out
/ 2)
1641 else if (nunits_out
== nunits_in
)
1643 else if (nunits_out
== nunits_in
/ 2)
1648 if (modifier
== NARROW
)
1649 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1651 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1653 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1654 this, so we can safely override NCOPIES with 1 here. */
1658 /* Sanity check: make sure that at least one copy of the vectorized stmt
1659 needs to be generated. */
1660 gcc_assert (ncopies
>= 1);
1662 /* Supportable by target? */
1663 if ((modifier
== NONE
1664 && !targetm
.vectorize
.builtin_conversion (code
, vectype_out
, vectype_in
))
1665 || (modifier
== WIDEN
1666 && !supportable_widening_operation (code
, stmt
,
1667 vectype_out
, vectype_in
,
1670 &dummy_int
, &dummy
))
1671 || (modifier
== NARROW
1672 && !supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
1673 &code1
, &dummy_int
, &dummy
)))
1675 if (vect_print_dump_info (REPORT_DETAILS
))
1676 fprintf (vect_dump
, "conversion not supported by target.");
1680 if (modifier
!= NONE
)
1682 /* FORNOW: SLP not supported. */
1683 if (STMT_SLP_TYPE (stmt_info
))
1687 if (!vec_stmt
) /* transformation not required. */
1689 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
1694 if (vect_print_dump_info (REPORT_DETAILS
))
1695 fprintf (vect_dump
, "transform conversion.");
1698 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1700 if (modifier
== NONE
&& !slp_node
)
1701 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1703 prev_stmt_info
= NULL
;
1707 for (j
= 0; j
< ncopies
; j
++)
1710 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
1712 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
1715 targetm
.vectorize
.builtin_conversion (code
,
1716 vectype_out
, vectype_in
);
1717 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
1719 /* Arguments are ready. create the new vector stmt. */
1720 new_stmt
= gimple_build_call (builtin_decl
, 1, vop0
);
1721 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1722 gimple_call_set_lhs (new_stmt
, new_temp
);
1723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1725 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
1729 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1731 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1732 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1737 /* In case the vectorization factor (VF) is bigger than the number
1738 of elements that we can fit in a vectype (nunits), we have to
1739 generate more than one vector stmt - i.e - we need to "unroll"
1740 the vector stmt by a factor VF/nunits. */
1741 for (j
= 0; j
< ncopies
; j
++)
1744 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1746 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
1748 /* Generate first half of the widened result: */
1750 = vect_gen_widened_results_half (code1
, decl1
,
1751 vec_oprnd0
, vec_oprnd1
,
1752 unary_op
, vec_dest
, gsi
, stmt
);
1754 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1756 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1757 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1759 /* Generate second half of the widened result: */
1761 = vect_gen_widened_results_half (code2
, decl2
,
1762 vec_oprnd0
, vec_oprnd1
,
1763 unary_op
, vec_dest
, gsi
, stmt
);
1764 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1765 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1770 /* In case the vectorization factor (VF) is bigger than the number
1771 of elements that we can fit in a vectype (nunits), we have to
1772 generate more than one vector stmt - i.e - we need to "unroll"
1773 the vector stmt by a factor VF/nunits. */
1774 for (j
= 0; j
< ncopies
; j
++)
1779 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1780 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
1784 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
1785 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
1788 /* Arguments are ready. Create the new vector stmt. */
1789 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
, vec_oprnd0
,
1791 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1792 gimple_assign_set_lhs (new_stmt
, new_temp
);
1793 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1796 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1798 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1800 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1803 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1807 VEC_free (tree
, heap
, vec_oprnds0
);
1811 /* Function vectorizable_assignment.
1813 Check if STMT performs an assignment (copy) that can be vectorized.
1814 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1815 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1816 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1819 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
1820 gimple
*vec_stmt
, slp_tree slp_node
)
1825 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1826 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1827 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1831 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
1832 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1835 VEC(tree
,heap
) *vec_oprnds
= NULL
;
1837 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1838 gimple new_stmt
= NULL
;
1839 stmt_vec_info prev_stmt_info
= NULL
;
1840 enum tree_code code
;
1843 /* Multiple types in SLP are handled by creating the appropriate number of
1844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1849 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1851 gcc_assert (ncopies
>= 1);
1853 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1856 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1859 /* Is vectorizable assignment? */
1860 if (!is_gimple_assign (stmt
))
1863 scalar_dest
= gimple_assign_lhs (stmt
);
1864 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
1867 code
= gimple_assign_rhs_code (stmt
);
1868 if (gimple_assign_single_p (stmt
)
1869 || code
== PAREN_EXPR
1870 || CONVERT_EXPR_CODE_P (code
))
1871 op
= gimple_assign_rhs1 (stmt
);
1875 if (!vect_is_simple_use_1 (op
, loop_vinfo
, bb_vinfo
,
1876 &def_stmt
, &def
, &dt
[0], &vectype_in
))
1878 if (vect_print_dump_info (REPORT_DETAILS
))
1879 fprintf (vect_dump
, "use not simple.");
1883 /* We can handle NOP_EXPR conversions that do not change the number
1884 of elements or the vector size. */
1885 if (CONVERT_EXPR_CODE_P (code
)
1887 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
1888 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
1889 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
1892 if (!vec_stmt
) /* transformation not required. */
1894 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
1895 if (vect_print_dump_info (REPORT_DETAILS
))
1896 fprintf (vect_dump
, "=== vectorizable_assignment ===");
1897 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
1902 if (vect_print_dump_info (REPORT_DETAILS
))
1903 fprintf (vect_dump
, "transform assignment.");
1906 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
1909 for (j
= 0; j
< ncopies
; j
++)
1913 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
1915 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
1917 /* Arguments are ready. create the new vector stmt. */
1918 for (i
= 0; VEC_iterate (tree
, vec_oprnds
, i
, vop
); i
++)
1920 if (CONVERT_EXPR_CODE_P (code
))
1921 vop
= build1_stat (VIEW_CONVERT_EXPR
, vectype
, vop
);
1922 new_stmt
= gimple_build_assign (vec_dest
, vop
);
1923 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1924 gimple_assign_set_lhs (new_stmt
, new_temp
);
1925 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1927 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
1934 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1936 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1938 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1941 VEC_free (tree
, heap
, vec_oprnds
);
1945 /* Function vectorizable_operation.
1947 Check if STMT performs a binary or unary operation that can be vectorized.
1948 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1949 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1950 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1953 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
1954 gimple
*vec_stmt
, slp_tree slp_node
)
1958 tree op0
, op1
= NULL
;
1959 tree vec_oprnd1
= NULL_TREE
;
1960 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1962 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1963 enum tree_code code
;
1964 enum machine_mode vec_mode
;
1969 enum machine_mode optab_op2_mode
;
1972 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
1973 gimple new_stmt
= NULL
;
1974 stmt_vec_info prev_stmt_info
;
1980 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
1983 bool scalar_shift_arg
= false;
1984 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1987 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1990 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1993 /* Is STMT a vectorizable binary/unary operation? */
1994 if (!is_gimple_assign (stmt
))
1997 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2000 code
= gimple_assign_rhs_code (stmt
);
2002 /* For pointer addition, we should use the normal plus for
2003 the vector addition. */
2004 if (code
== POINTER_PLUS_EXPR
)
2007 /* Support only unary or binary operations. */
2008 op_type
= TREE_CODE_LENGTH (code
);
2009 if (op_type
!= unary_op
&& op_type
!= binary_op
)
2011 if (vect_print_dump_info (REPORT_DETAILS
))
2012 fprintf (vect_dump
, "num. args = %d (not unary/binary op).", op_type
);
2016 scalar_dest
= gimple_assign_lhs (stmt
);
2017 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2019 op0
= gimple_assign_rhs1 (stmt
);
2020 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, bb_vinfo
,
2021 &def_stmt
, &def
, &dt
[0], &vectype
))
2023 if (vect_print_dump_info (REPORT_DETAILS
))
2024 fprintf (vect_dump
, "use not simple.");
2027 /* If op0 is an external or constant def use a vector type with
2028 the same size as the output vector type. */
2030 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
2032 gcc_assert (vectype
);
2035 if (vect_print_dump_info (REPORT_DETAILS
))
2037 fprintf (vect_dump
, "no vectype for scalar type ");
2038 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
2044 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2045 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
2046 if (nunits_out
!= nunits_in
)
2049 if (op_type
== binary_op
)
2051 op1
= gimple_assign_rhs2 (stmt
);
2052 if (!vect_is_simple_use (op1
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2055 if (vect_print_dump_info (REPORT_DETAILS
))
2056 fprintf (vect_dump
, "use not simple.");
2062 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2066 /* Multiple types in SLP are handled by creating the appropriate number of
2067 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2072 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2074 gcc_assert (ncopies
>= 1);
2076 /* If this is a shift/rotate, determine whether the shift amount is a vector,
2077 or scalar. If the shift/rotate amount is a vector, use the vector/vector
2079 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
2080 || code
== RROTATE_EXPR
)
2082 /* vector shifted by vector */
2083 if (dt
[1] == vect_internal_def
)
2085 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2086 if (vect_print_dump_info (REPORT_DETAILS
))
2087 fprintf (vect_dump
, "vector/vector shift/rotate found.");
2090 /* See if the machine has a vector shifted by scalar insn and if not
2091 then see if it has a vector shifted by vector insn */
2092 else if (dt
[1] == vect_constant_def
|| dt
[1] == vect_external_def
)
2094 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2096 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
2097 != CODE_FOR_nothing
))
2099 scalar_shift_arg
= true;
2100 if (vect_print_dump_info (REPORT_DETAILS
))
2101 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
2105 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2107 && (optab_handler (optab
, TYPE_MODE (vectype
))->insn_code
2108 != CODE_FOR_nothing
))
2110 if (vect_print_dump_info (REPORT_DETAILS
))
2111 fprintf (vect_dump
, "vector/vector shift/rotate found.");
2113 /* Unlike the other binary operators, shifts/rotates have
2114 the rhs being int, instead of the same type as the lhs,
2115 so make sure the scalar is the right type if we are
2116 dealing with vectors of short/char. */
2117 if (dt
[1] == vect_constant_def
)
2118 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
2125 if (vect_print_dump_info (REPORT_DETAILS
))
2126 fprintf (vect_dump
, "operand mode requires invariant argument.");
2131 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2133 /* Supportable by target? */
2136 if (vect_print_dump_info (REPORT_DETAILS
))
2137 fprintf (vect_dump
, "no optab.");
2140 vec_mode
= TYPE_MODE (vectype
);
2141 icode
= (int) optab_handler (optab
, vec_mode
)->insn_code
;
2142 if (icode
== CODE_FOR_nothing
)
2144 if (vect_print_dump_info (REPORT_DETAILS
))
2145 fprintf (vect_dump
, "op not supported by target.");
2146 /* Check only during analysis. */
2147 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2148 || (vf
< vect_min_worthwhile_factor (code
)
2151 if (vect_print_dump_info (REPORT_DETAILS
))
2152 fprintf (vect_dump
, "proceeding using word mode.");
2155 /* Worthwhile without SIMD support? Check only during analysis. */
2156 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2157 && vf
< vect_min_worthwhile_factor (code
)
2160 if (vect_print_dump_info (REPORT_DETAILS
))
2161 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2165 if (!vec_stmt
) /* transformation not required. */
2167 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
2168 if (vect_print_dump_info (REPORT_DETAILS
))
2169 fprintf (vect_dump
, "=== vectorizable_operation ===");
2170 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2176 if (vect_print_dump_info (REPORT_DETAILS
))
2177 fprintf (vect_dump
, "transform binary/unary operation.");
2180 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2182 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2183 created in the previous stages of the recursion, so no allocation is
2184 needed, except for the case of shift with scalar shift argument. In that
2185 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2186 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2187 In case of loop-based vectorization we allocate VECs of size 1. We
2188 allocate VEC_OPRNDS1 only in case of binary operation. */
2191 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2192 if (op_type
== binary_op
)
2193 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2195 else if (scalar_shift_arg
)
2196 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2198 /* In case the vectorization factor (VF) is bigger than the number
2199 of elements that we can fit in a vectype (nunits), we have to generate
2200 more than one vector stmt - i.e - we need to "unroll" the
2201 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2202 from one copy of the vector stmt to the next, in the field
2203 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2204 stages to find the correct vector defs to be used when vectorizing
2205 stmts that use the defs of the current stmt. The example below illustrates
2206 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2207 4 vectorized stmts):
2209 before vectorization:
2210 RELATED_STMT VEC_STMT
2214 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2216 RELATED_STMT VEC_STMT
2217 VS1_0: vx0 = memref0 VS1_1 -
2218 VS1_1: vx1 = memref1 VS1_2 -
2219 VS1_2: vx2 = memref2 VS1_3 -
2220 VS1_3: vx3 = memref3 - -
2221 S1: x = load - VS1_0
2224 step2: vectorize stmt S2 (done here):
2225 To vectorize stmt S2 we first need to find the relevant vector
2226 def for the first operand 'x'. This is, as usual, obtained from
2227 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2228 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2229 relevant vector def 'vx0'. Having found 'vx0' we can generate
2230 the vector stmt VS2_0, and as usual, record it in the
2231 STMT_VINFO_VEC_STMT of stmt S2.
2232 When creating the second copy (VS2_1), we obtain the relevant vector
2233 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2234 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2235 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2236 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2237 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2238 chain of stmts and pointers:
2239 RELATED_STMT VEC_STMT
2240 VS1_0: vx0 = memref0 VS1_1 -
2241 VS1_1: vx1 = memref1 VS1_2 -
2242 VS1_2: vx2 = memref2 VS1_3 -
2243 VS1_3: vx3 = memref3 - -
2244 S1: x = load - VS1_0
2245 VS2_0: vz0 = vx0 + v1 VS2_1 -
2246 VS2_1: vz1 = vx1 + v1 VS2_2 -
2247 VS2_2: vz2 = vx2 + v1 VS2_3 -
2248 VS2_3: vz3 = vx3 + v1 - -
2249 S2: z = x + 1 - VS2_0 */
2251 prev_stmt_info
= NULL
;
2252 for (j
= 0; j
< ncopies
; j
++)
2257 if (op_type
== binary_op
&& scalar_shift_arg
)
2259 /* Vector shl and shr insn patterns can be defined with scalar
2260 operand 2 (shift operand). In this case, use constant or loop
2261 invariant op1 directly, without extending it to vector mode
2263 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2264 if (!VECTOR_MODE_P (optab_op2_mode
))
2266 if (vect_print_dump_info (REPORT_DETAILS
))
2267 fprintf (vect_dump
, "operand 1 using scalar mode.");
2269 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2272 /* Store vec_oprnd1 for every vector stmt to be created
2273 for SLP_NODE. We check during the analysis that all the
2274 shift arguments are the same.
2275 TODO: Allow different constants for different vector
2276 stmts generated for an SLP instance. */
2277 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2278 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2283 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2284 (a special case for certain kind of vector shifts); otherwise,
2285 operand 1 should be of a vector type (the usual case). */
2286 if (op_type
== binary_op
&& !vec_oprnd1
)
2287 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
2290 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2294 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
2296 /* Arguments are ready. Create the new vector stmt. */
2297 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vop0
); i
++)
2299 vop1
= ((op_type
== binary_op
)
2300 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL
);
2301 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2302 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2303 gimple_assign_set_lhs (new_stmt
, new_temp
);
2304 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2306 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2313 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2315 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2316 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2319 VEC_free (tree
, heap
, vec_oprnds0
);
2321 VEC_free (tree
, heap
, vec_oprnds1
);
2327 /* Get vectorized definitions for loop-based vectorization. For the first
2328 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2329 scalar operand), and for the rest we get a copy with
2330 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2331 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2332 The vectors are collected into VEC_OPRNDS. */
2335 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2336 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2340 /* Get first vector operand. */
2341 /* All the vector operands except the very first one (that is scalar oprnd)
2343 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2344 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2346 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2348 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2350 /* Get second vector operand. */
2351 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2352 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2356 /* For conversion in multiple steps, continue to get operands
2359 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2363 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2364 For multi-step conversions store the resulting vectors and call the function
2368 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2369 int multi_step_cvt
, gimple stmt
,
2370 VEC (tree
, heap
) *vec_dsts
,
2371 gimple_stmt_iterator
*gsi
,
2372 slp_tree slp_node
, enum tree_code code
,
2373 stmt_vec_info
*prev_stmt_info
)
2376 tree vop0
, vop1
, new_tmp
, vec_dest
;
2378 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2380 vec_dest
= VEC_pop (tree
, vec_dsts
);
2382 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2384 /* Create demotion operation. */
2385 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2386 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2387 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2388 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2389 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2390 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2393 /* Store the resulting vector for next recursive call. */
2394 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2397 /* This is the last step of the conversion sequence. Store the
2398 vectors in SLP_NODE or in vector info of the scalar statement
2399 (or in STMT_VINFO_RELATED_STMT chain). */
2401 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2404 if (!*prev_stmt_info
)
2405 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2407 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2409 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2414 /* For multi-step demotion operations we first generate demotion operations
2415 from the source type to the intermediate types, and then combine the
2416 results (stored in VEC_OPRNDS) in demotion operation to the destination
2420 /* At each level of recursion we have have of the operands we had at the
2422 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2423 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2424 stmt
, vec_dsts
, gsi
, slp_node
,
2425 code
, prev_stmt_info
);
2430 /* Function vectorizable_type_demotion
2432 Check if STMT performs a binary or unary operation that involves
2433 type demotion, and if it can be vectorized.
2434 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2435 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2436 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2439 vectorizable_type_demotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2440 gimple
*vec_stmt
, slp_tree slp_node
)
2445 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2446 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2447 enum tree_code code
, code1
= ERROR_MARK
;
2450 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2451 stmt_vec_info prev_stmt_info
;
2458 int multi_step_cvt
= 0;
2459 VEC (tree
, heap
) *vec_oprnds0
= NULL
;
2460 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
2461 tree last_oprnd
, intermediate_type
;
2463 /* FORNOW: not supported by basic block SLP vectorization. */
2464 gcc_assert (loop_vinfo
);
2466 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2469 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2472 /* Is STMT a vectorizable type-demotion operation? */
2473 if (!is_gimple_assign (stmt
))
2476 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2479 code
= gimple_assign_rhs_code (stmt
);
2480 if (!CONVERT_EXPR_CODE_P (code
))
2483 scalar_dest
= gimple_assign_lhs (stmt
);
2484 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2486 /* Check the operands of the operation. */
2487 op0
= gimple_assign_rhs1 (stmt
);
2488 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2489 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2490 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
2491 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
2492 && CONVERT_EXPR_CODE_P (code
))))
2494 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, NULL
,
2495 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2497 if (vect_print_dump_info (REPORT_DETAILS
))
2498 fprintf (vect_dump
, "use not simple.");
2501 /* If op0 is an external def use a vector type with the
2502 same size as the output vector type if possible. */
2504 vectype_in
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
2506 gcc_assert (vectype_in
);
2509 if (vect_print_dump_info (REPORT_DETAILS
))
2511 fprintf (vect_dump
, "no vectype for scalar type ");
2512 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
2518 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2519 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2520 if (nunits_in
>= nunits_out
)
2523 /* Multiple types in SLP are handled by creating the appropriate number of
2524 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2529 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2530 gcc_assert (ncopies
>= 1);
2532 /* Supportable by target? */
2533 if (!supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2534 &code1
, &multi_step_cvt
, &interm_types
))
2537 if (!vec_stmt
) /* transformation not required. */
2539 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2540 if (vect_print_dump_info (REPORT_DETAILS
))
2541 fprintf (vect_dump
, "=== vectorizable_demotion ===");
2542 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2547 if (vect_print_dump_info (REPORT_DETAILS
))
2548 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
2551 /* In case of multi-step demotion, we first generate demotion operations to
2552 the intermediate types, and then from that types to the final one.
2553 We create vector destinations for the intermediate type (TYPES) received
2554 from supportable_narrowing_operation, and store them in the correct order
2555 for future use in vect_create_vectorized_demotion_stmts(). */
2557 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2559 vec_dsts
= VEC_alloc (tree
, heap
, 1);
2561 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2562 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2566 for (i
= VEC_length (tree
, interm_types
) - 1;
2567 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2569 vec_dest
= vect_create_destination_var (scalar_dest
,
2571 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2575 /* In case the vectorization factor (VF) is bigger than the number
2576 of elements that we can fit in a vectype (nunits), we have to generate
2577 more than one vector stmt - i.e - we need to "unroll" the
2578 vector stmt by a factor VF/nunits. */
2580 prev_stmt_info
= NULL
;
2581 for (j
= 0; j
< ncopies
; j
++)
2585 vect_get_slp_defs (slp_node
, &vec_oprnds0
, NULL
, -1);
2588 VEC_free (tree
, heap
, vec_oprnds0
);
2589 vec_oprnds0
= VEC_alloc (tree
, heap
,
2590 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) * 2 : 2));
2591 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2592 vect_pow2 (multi_step_cvt
) - 1);
2595 /* Arguments are ready. Create the new vector stmts. */
2596 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
2597 vect_create_vectorized_demotion_stmts (&vec_oprnds0
,
2598 multi_step_cvt
, stmt
, tmp_vec_dsts
,
2599 gsi
, slp_node
, code1
,
2603 VEC_free (tree
, heap
, vec_oprnds0
);
2604 VEC_free (tree
, heap
, vec_dsts
);
2605 VEC_free (tree
, heap
, tmp_vec_dsts
);
2606 VEC_free (tree
, heap
, interm_types
);
2608 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2613 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2614 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2615 the resulting vectors and call the function recursively. */
2618 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2619 VEC (tree
, heap
) **vec_oprnds1
,
2620 int multi_step_cvt
, gimple stmt
,
2621 VEC (tree
, heap
) *vec_dsts
,
2622 gimple_stmt_iterator
*gsi
,
2623 slp_tree slp_node
, enum tree_code code1
,
2624 enum tree_code code2
, tree decl1
,
2625 tree decl2
, int op_type
,
2626 stmt_vec_info
*prev_stmt_info
)
2629 tree vop0
, vop1
, new_tmp1
, new_tmp2
, vec_dest
;
2630 gimple new_stmt1
, new_stmt2
;
2631 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2632 VEC (tree
, heap
) *vec_tmp
;
2634 vec_dest
= VEC_pop (tree
, vec_dsts
);
2635 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2637 for (i
= 0; VEC_iterate (tree
, *vec_oprnds0
, i
, vop0
); i
++)
2639 if (op_type
== binary_op
)
2640 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2644 /* Generate the two halves of promotion operation. */
2645 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2646 op_type
, vec_dest
, gsi
, stmt
);
2647 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2648 op_type
, vec_dest
, gsi
, stmt
);
2649 if (is_gimple_call (new_stmt1
))
2651 new_tmp1
= gimple_call_lhs (new_stmt1
);
2652 new_tmp2
= gimple_call_lhs (new_stmt2
);
2656 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2657 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2662 /* Store the results for the recursive call. */
2663 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2664 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2668 /* Last step of promotion sequience - store the results. */
2671 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt1
);
2672 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt2
);
2676 if (!*prev_stmt_info
)
2677 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt1
;
2679 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt1
;
2681 *prev_stmt_info
= vinfo_for_stmt (new_stmt1
);
2682 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt2
;
2683 *prev_stmt_info
= vinfo_for_stmt (new_stmt2
);
2690 /* For multi-step promotion operation we first generate we call the
2691 function recurcively for every stage. We start from the input type,
2692 create promotion operations to the intermediate types, and then
2693 create promotions to the output type. */
2694 *vec_oprnds0
= VEC_copy (tree
, heap
, vec_tmp
);
2695 VEC_free (tree
, heap
, vec_tmp
);
2696 vect_create_vectorized_promotion_stmts (vec_oprnds0
, vec_oprnds1
,
2697 multi_step_cvt
- 1, stmt
,
2698 vec_dsts
, gsi
, slp_node
, code1
,
2699 code2
, decl2
, decl2
, op_type
,
2705 /* Function vectorizable_type_promotion
2707 Check if STMT performs a binary or unary operation that involves
2708 type promotion, and if it can be vectorized.
2709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2714 vectorizable_type_promotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2715 gimple
*vec_stmt
, slp_tree slp_node
)
2719 tree op0
, op1
= NULL
;
2720 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
2721 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2722 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2723 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2724 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2728 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2729 stmt_vec_info prev_stmt_info
;
2736 tree intermediate_type
= NULL_TREE
;
2737 int multi_step_cvt
= 0;
2738 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2739 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
2741 /* FORNOW: not supported by basic block SLP vectorization. */
2742 gcc_assert (loop_vinfo
);
2744 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2747 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2750 /* Is STMT a vectorizable type-promotion operation? */
2751 if (!is_gimple_assign (stmt
))
2754 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2757 code
= gimple_assign_rhs_code (stmt
);
2758 if (!CONVERT_EXPR_CODE_P (code
)
2759 && code
!= WIDEN_MULT_EXPR
)
2762 scalar_dest
= gimple_assign_lhs (stmt
);
2763 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2765 /* Check the operands of the operation. */
2766 op0
= gimple_assign_rhs1 (stmt
);
2767 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2768 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2769 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
2770 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
2771 && CONVERT_EXPR_CODE_P (code
))))
2773 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, NULL
,
2774 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2776 if (vect_print_dump_info (REPORT_DETAILS
))
2777 fprintf (vect_dump
, "use not simple.");
2780 /* If op0 is an external or constant def use a vector type with
2781 the same size as the output vector type. */
2783 vectype_in
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
2785 gcc_assert (vectype_in
);
2788 if (vect_print_dump_info (REPORT_DETAILS
))
2790 fprintf (vect_dump
, "no vectype for scalar type ");
2791 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
2797 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2798 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2799 if (nunits_in
<= nunits_out
)
2802 /* Multiple types in SLP are handled by creating the appropriate number of
2803 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2808 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2810 gcc_assert (ncopies
>= 1);
2812 op_type
= TREE_CODE_LENGTH (code
);
2813 if (op_type
== binary_op
)
2815 op1
= gimple_assign_rhs2 (stmt
);
2816 if (!vect_is_simple_use (op1
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
[1]))
2818 if (vect_print_dump_info (REPORT_DETAILS
))
2819 fprintf (vect_dump
, "use not simple.");
2824 /* Supportable by target? */
2825 if (!supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2826 &decl1
, &decl2
, &code1
, &code2
,
2827 &multi_step_cvt
, &interm_types
))
2830 /* Binary widening operation can only be supported directly by the
2832 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2834 if (!vec_stmt
) /* transformation not required. */
2836 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2837 if (vect_print_dump_info (REPORT_DETAILS
))
2838 fprintf (vect_dump
, "=== vectorizable_promotion ===");
2839 vect_model_simple_cost (stmt_info
, 2*ncopies
, dt
, NULL
);
2845 if (vect_print_dump_info (REPORT_DETAILS
))
2846 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
2850 /* In case of multi-step promotion, we first generate promotion operations
2851 to the intermediate types, and then from that types to the final one.
2852 We store vector destination in VEC_DSTS in the correct order for
2853 recursive creation of promotion operations in
2854 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2855 according to TYPES recieved from supportable_widening_operation(). */
2857 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2859 vec_dsts
= VEC_alloc (tree
, heap
, 1);
2861 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2862 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2866 for (i
= VEC_length (tree
, interm_types
) - 1;
2867 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2869 vec_dest
= vect_create_destination_var (scalar_dest
,
2871 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2877 vec_oprnds0
= VEC_alloc (tree
, heap
,
2878 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2879 if (op_type
== binary_op
)
2880 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2883 /* In case the vectorization factor (VF) is bigger than the number
2884 of elements that we can fit in a vectype (nunits), we have to generate
2885 more than one vector stmt - i.e - we need to "unroll" the
2886 vector stmt by a factor VF/nunits. */
2888 prev_stmt_info
= NULL
;
2889 for (j
= 0; j
< ncopies
; j
++)
2895 vect_get_slp_defs (slp_node
, &vec_oprnds0
, &vec_oprnds1
, -1);
2898 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2899 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2900 if (op_type
== binary_op
)
2902 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
2903 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2909 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2910 VEC_replace (tree
, vec_oprnds0
, 0, vec_oprnd0
);
2911 if (op_type
== binary_op
)
2913 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd1
);
2914 VEC_replace (tree
, vec_oprnds1
, 0, vec_oprnd1
);
2918 /* Arguments are ready. Create the new vector stmts. */
2919 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
2920 vect_create_vectorized_promotion_stmts (&vec_oprnds0
, &vec_oprnds1
,
2921 multi_step_cvt
, stmt
,
2923 gsi
, slp_node
, code1
, code2
,
2924 decl1
, decl2
, op_type
,
2928 VEC_free (tree
, heap
, vec_dsts
);
2929 VEC_free (tree
, heap
, tmp_vec_dsts
);
2930 VEC_free (tree
, heap
, interm_types
);
2931 VEC_free (tree
, heap
, vec_oprnds0
);
2932 VEC_free (tree
, heap
, vec_oprnds1
);
2934 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2939 /* Function vectorizable_store.
2941 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2943 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2944 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2945 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2948 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2954 tree vec_oprnd
= NULL_TREE
;
2955 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2956 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
2957 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2958 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2959 struct loop
*loop
= NULL
;
2960 enum machine_mode vec_mode
;
2962 enum dr_alignment_support alignment_support_scheme
;
2965 enum vect_def_type dt
;
2966 stmt_vec_info prev_stmt_info
= NULL
;
2967 tree dataref_ptr
= NULL_TREE
;
2968 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2971 gimple next_stmt
, first_stmt
= NULL
;
2972 bool strided_store
= false;
2973 unsigned int group_size
, i
;
2974 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
2976 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2977 bool slp
= (slp_node
!= NULL
);
2978 unsigned int vec_num
;
2979 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2982 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2984 /* Multiple types in SLP are handled by creating the appropriate number of
2985 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2990 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2992 gcc_assert (ncopies
>= 1);
2994 /* FORNOW. This restriction should be relaxed. */
2995 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
2997 if (vect_print_dump_info (REPORT_DETAILS
))
2998 fprintf (vect_dump
, "multiple types in nested loop.");
3002 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3005 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3008 /* Is vectorizable store? */
3010 if (!is_gimple_assign (stmt
))
3013 scalar_dest
= gimple_assign_lhs (stmt
);
3014 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3015 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3016 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3017 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3018 && TREE_CODE (scalar_dest
) != REALPART_EXPR
)
3021 gcc_assert (gimple_assign_single_p (stmt
));
3022 op
= gimple_assign_rhs1 (stmt
);
3023 if (!vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
, &dt
))
3025 if (vect_print_dump_info (REPORT_DETAILS
))
3026 fprintf (vect_dump
, "use not simple.");
3030 /* The scalar rhs type needs to be trivially convertible to the vector
3031 component type. This should always be the case. */
3032 if (!useless_type_conversion_p (TREE_TYPE (vectype
), TREE_TYPE (op
)))
3034 if (vect_print_dump_info (REPORT_DETAILS
))
3035 fprintf (vect_dump
, "??? operands of different types");
3039 vec_mode
= TYPE_MODE (vectype
);
3040 /* FORNOW. In some cases can vectorize even if data-type not supported
3041 (e.g. - array initialization with 0). */
3042 if (optab_handler (mov_optab
, (int)vec_mode
)->insn_code
== CODE_FOR_nothing
)
3045 if (!STMT_VINFO_DATA_REF (stmt_info
))
3048 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
3050 strided_store
= true;
3051 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3052 if (!vect_strided_store_supported (vectype
)
3053 && !PURE_SLP_STMT (stmt_info
) && !slp
)
3056 if (first_stmt
== stmt
)
3058 /* STMT is the leader of the group. Check the operands of all the
3059 stmts of the group. */
3060 next_stmt
= DR_GROUP_NEXT_DR (stmt_info
);
3063 gcc_assert (gimple_assign_single_p (next_stmt
));
3064 op
= gimple_assign_rhs1 (next_stmt
);
3065 if (!vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3068 if (vect_print_dump_info (REPORT_DETAILS
))
3069 fprintf (vect_dump
, "use not simple.");
3072 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3077 if (!vec_stmt
) /* transformation not required. */
3079 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3080 vect_model_store_cost (stmt_info
, ncopies
, dt
, NULL
);
3088 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3089 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3091 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3094 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3096 /* We vectorize all the stmts of the interleaving group when we
3097 reach the last stmt in the group. */
3098 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3099 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3108 strided_store
= false;
3109 /* VEC_NUM is the number of vect stmts to be created for this
3111 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3112 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3113 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3116 /* VEC_NUM is the number of vect stmts to be created for this
3118 vec_num
= group_size
;
3124 group_size
= vec_num
= 1;
3127 if (vect_print_dump_info (REPORT_DETAILS
))
3128 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3130 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3131 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3133 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
3134 gcc_assert (alignment_support_scheme
);
3136 /* In case the vectorization factor (VF) is bigger than the number
3137 of elements that we can fit in a vectype (nunits), we have to generate
3138 more than one vector stmt - i.e - we need to "unroll" the
3139 vector stmt by a factor VF/nunits. For more details see documentation in
3140 vect_get_vec_def_for_copy_stmt. */
3142 /* In case of interleaving (non-unit strided access):
3149 We create vectorized stores starting from base address (the access of the
3150 first stmt in the chain (S2 in the above example), when the last store stmt
3151 of the chain (S4) is reached:
3154 VS2: &base + vec_size*1 = vx0
3155 VS3: &base + vec_size*2 = vx1
3156 VS4: &base + vec_size*3 = vx3
3158 Then permutation statements are generated:
3160 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3161 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3164 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3165 (the order of the data-refs in the output of vect_permute_store_chain
3166 corresponds to the order of scalar stmts in the interleaving chain - see
3167 the documentation of vect_permute_store_chain()).
3169 In case of both multiple types and interleaving, above vector stores and
3170 permutation stmts are created for every copy. The result vector stmts are
3171 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3172 STMT_VINFO_RELATED_STMT for the next copies.
3175 prev_stmt_info
= NULL
;
3176 for (j
= 0; j
< ncopies
; j
++)
3185 /* Get vectorized arguments for SLP_NODE. */
3186 vect_get_slp_defs (slp_node
, &vec_oprnds
, NULL
, -1);
3188 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
3192 /* For interleaved stores we collect vectorized defs for all the
3193 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3194 used as an input to vect_permute_store_chain(), and OPRNDS as
3195 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3197 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3198 OPRNDS are of size 1. */
3199 next_stmt
= first_stmt
;
3200 for (i
= 0; i
< group_size
; i
++)
3202 /* Since gaps are not supported for interleaved stores,
3203 GROUP_SIZE is the exact number of stmts in the chain.
3204 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3205 there is no interleaving, GROUP_SIZE is 1, and only one
3206 iteration of the loop will be executed. */
3207 gcc_assert (next_stmt
3208 && gimple_assign_single_p (next_stmt
));
3209 op
= gimple_assign_rhs1 (next_stmt
);
3211 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
3213 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
3214 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
3215 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3219 /* We should have catched mismatched types earlier. */
3220 gcc_assert (useless_type_conversion_p (vectype
,
3221 TREE_TYPE (vec_oprnd
)));
3222 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, NULL
, NULL_TREE
,
3223 &dummy
, &ptr_incr
, false,
3225 gcc_assert (bb_vinfo
|| !inv_p
);
3229 /* For interleaved stores we created vectorized defs for all the
3230 defs stored in OPRNDS in the previous iteration (previous copy).
3231 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3232 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3234 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3235 OPRNDS are of size 1. */
3236 for (i
= 0; i
< group_size
; i
++)
3238 op
= VEC_index (tree
, oprnds
, i
);
3239 vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
3241 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
3242 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
3243 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
3246 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
3251 result_chain
= VEC_alloc (tree
, heap
, group_size
);
3253 if (!vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
3258 next_stmt
= first_stmt
;
3259 for (i
= 0; i
< vec_num
; i
++)
3262 /* Bump the vector pointer. */
3263 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
3267 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
3268 else if (strided_store
)
3269 /* For strided stores vectorized defs are interleaved in
3270 vect_permute_store_chain(). */
3271 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
3273 if (aligned_access_p (first_dr
))
3274 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3277 int mis
= DR_MISALIGNMENT (first_dr
);
3278 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
3279 tmis
= size_binop (MULT_EXPR
, tmis
, size_int (BITS_PER_UNIT
));
3280 data_ref
= build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
3283 /* If accesses through a pointer to vectype do not alias the original
3284 memory reference we have a problem. This should never happen. */
3285 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref
),
3286 get_alias_set (gimple_assign_lhs (stmt
))));
3288 /* Arguments are ready. Create the new vector stmt. */
3289 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
3290 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3291 mark_symbols_for_renaming (new_stmt
);
3297 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3299 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3301 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3302 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3308 VEC_free (tree
, heap
, dr_chain
);
3309 VEC_free (tree
, heap
, oprnds
);
3311 VEC_free (tree
, heap
, result_chain
);
3316 /* vectorizable_load.
3318 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3320 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3321 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3322 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3325 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3326 slp_tree slp_node
, slp_instance slp_node_instance
)
3329 tree vec_dest
= NULL
;
3330 tree data_ref
= NULL
;
3331 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3332 stmt_vec_info prev_stmt_info
;
3333 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3334 struct loop
*loop
= NULL
;
3335 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
3336 bool nested_in_vect_loop
= false;
3337 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
3338 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3341 gimple new_stmt
= NULL
;
3343 enum dr_alignment_support alignment_support_scheme
;
3344 tree dataref_ptr
= NULL_TREE
;
3346 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3348 int i
, j
, group_size
;
3349 tree msq
= NULL_TREE
, lsq
;
3350 tree offset
= NULL_TREE
;
3351 tree realignment_token
= NULL_TREE
;
3353 VEC(tree
,heap
) *dr_chain
= NULL
;
3354 bool strided_load
= false;
3358 bool compute_in_loop
= false;
3359 struct loop
*at_loop
;
3361 bool slp
= (slp_node
!= NULL
);
3362 bool slp_perm
= false;
3363 enum tree_code code
;
3364 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3369 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3370 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3371 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3376 /* Multiple types in SLP are handled by creating the appropriate number of
3377 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3382 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3384 gcc_assert (ncopies
>= 1);
3386 /* FORNOW. This restriction should be relaxed. */
3387 if (nested_in_vect_loop
&& ncopies
> 1)
3389 if (vect_print_dump_info (REPORT_DETAILS
))
3390 fprintf (vect_dump
, "multiple types in nested loop.");
3394 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3397 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3400 /* Is vectorizable load? */
3401 if (!is_gimple_assign (stmt
))
3404 scalar_dest
= gimple_assign_lhs (stmt
);
3405 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
3408 code
= gimple_assign_rhs_code (stmt
);
3409 if (code
!= ARRAY_REF
3410 && code
!= INDIRECT_REF
3411 && code
!= COMPONENT_REF
3412 && code
!= IMAGPART_EXPR
3413 && code
!= REALPART_EXPR
)
3416 if (!STMT_VINFO_DATA_REF (stmt_info
))
3419 scalar_type
= TREE_TYPE (DR_REF (dr
));
3420 mode
= (int) TYPE_MODE (vectype
);
3422 /* FORNOW. In some cases can vectorize even if data-type not supported
3423 (e.g. - data copies). */
3424 if (optab_handler (mov_optab
, mode
)->insn_code
== CODE_FOR_nothing
)
3426 if (vect_print_dump_info (REPORT_DETAILS
))
3427 fprintf (vect_dump
, "Aligned load, but unsupported type.");
3431 /* The vector component type needs to be trivially convertible to the
3432 scalar lhs. This should always be the case. */
3433 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest
), TREE_TYPE (vectype
)))
3435 if (vect_print_dump_info (REPORT_DETAILS
))
3436 fprintf (vect_dump
, "??? operands of different types");
3440 /* Check if the load is a part of an interleaving chain. */
3441 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
3443 strided_load
= true;
3445 gcc_assert (! nested_in_vect_loop
);
3447 /* Check if interleaving is supported. */
3448 if (!vect_strided_load_supported (vectype
)
3449 && !PURE_SLP_STMT (stmt_info
) && !slp
)
3453 if (!vec_stmt
) /* transformation not required. */
3455 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
3456 vect_model_load_cost (stmt_info
, ncopies
, NULL
);
3460 if (vect_print_dump_info (REPORT_DETAILS
))
3461 fprintf (vect_dump
, "transform load.");
3467 first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3468 /* Check if the chain of loads is already vectorized. */
3469 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
3471 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3474 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3475 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3477 /* VEC_NUM is the number of vect stmts to be created for this group. */
3480 strided_load
= false;
3481 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3482 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
3486 vec_num
= group_size
;
3488 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
3494 group_size
= vec_num
= 1;
3497 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
);
3498 gcc_assert (alignment_support_scheme
);
3500 /* In case the vectorization factor (VF) is bigger than the number
3501 of elements that we can fit in a vectype (nunits), we have to generate
3502 more than one vector stmt - i.e - we need to "unroll" the
3503 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3504 from one copy of the vector stmt to the next, in the field
3505 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3506 stages to find the correct vector defs to be used when vectorizing
3507 stmts that use the defs of the current stmt. The example below illustrates
3508 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3509 4 vectorized stmts):
3511 before vectorization:
3512 RELATED_STMT VEC_STMT
3516 step 1: vectorize stmt S1:
3517 We first create the vector stmt VS1_0, and, as usual, record a
3518 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3519 Next, we create the vector stmt VS1_1, and record a pointer to
3520 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3521 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3523 RELATED_STMT VEC_STMT
3524 VS1_0: vx0 = memref0 VS1_1 -
3525 VS1_1: vx1 = memref1 VS1_2 -
3526 VS1_2: vx2 = memref2 VS1_3 -
3527 VS1_3: vx3 = memref3 - -
3528 S1: x = load - VS1_0
3531 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3532 information we recorded in RELATED_STMT field is used to vectorize
3535 /* In case of interleaving (non-unit strided access):
3542 Vectorized loads are created in the order of memory accesses
3543 starting from the access of the first stmt of the chain:
3546 VS2: vx1 = &base + vec_size*1
3547 VS3: vx3 = &base + vec_size*2
3548 VS4: vx4 = &base + vec_size*3
3550 Then permutation statements are generated:
3552 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3553 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3556 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3557 (the order of the data-refs in the output of vect_permute_load_chain
3558 corresponds to the order of scalar stmts in the interleaving chain - see
3559 the documentation of vect_permute_load_chain()).
3560 The generation of permutation stmts and recording them in
3561 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3563 In case of both multiple types and interleaving, the vector loads and
3564 permutation stmts above are created for every copy. The result vector stmts
3565 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3566 STMT_VINFO_RELATED_STMT for the next copies. */
3568 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3569 on a target that supports unaligned accesses (dr_unaligned_supported)
3570 we generate the following code:
3574 p = p + indx * vectype_size;
3579 Otherwise, the data reference is potentially unaligned on a target that
3580 does not support unaligned accesses (dr_explicit_realign_optimized) -
3581 then generate the following code, in which the data in each iteration is
3582 obtained by two vector loads, one from the previous iteration, and one
3583 from the current iteration:
3585 msq_init = *(floor(p1))
3586 p2 = initial_addr + VS - 1;
3587 realignment_token = call target_builtin;
3590 p2 = p2 + indx * vectype_size
3592 vec_dest = realign_load (msq, lsq, realignment_token)
3597 /* If the misalignment remains the same throughout the execution of the
3598 loop, we can create the init_addr and permutation mask at the loop
3599 preheader. Otherwise, it needs to be created inside the loop.
3600 This can only occur when vectorizing memory accesses in the inner-loop
3601 nested within an outer-loop that is being vectorized. */
3603 if (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3604 && (TREE_INT_CST_LOW (DR_STEP (dr
))
3605 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
3607 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
3608 compute_in_loop
= true;
3611 if ((alignment_support_scheme
== dr_explicit_realign_optimized
3612 || alignment_support_scheme
== dr_explicit_realign
)
3613 && !compute_in_loop
)
3615 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
3616 alignment_support_scheme
, NULL_TREE
,
3618 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
3620 phi
= SSA_NAME_DEF_STMT (msq
);
3621 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
3627 prev_stmt_info
= NULL
;
3628 for (j
= 0; j
< ncopies
; j
++)
3630 /* 1. Create the vector pointer update chain. */
3632 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
,
3634 &dummy
, &ptr_incr
, false,
3638 bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
, NULL_TREE
);
3640 for (i
= 0; i
< vec_num
; i
++)
3643 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
3646 /* 2. Create the vector-load in the loop. */
3647 switch (alignment_support_scheme
)
3650 gcc_assert (aligned_access_p (first_dr
));
3651 data_ref
= build_fold_indirect_ref (dataref_ptr
);
3653 case dr_unaligned_supported
:
3655 int mis
= DR_MISALIGNMENT (first_dr
);
3656 tree tmis
= (mis
== -1 ? size_zero_node
: size_int (mis
));
3658 tmis
= size_binop (MULT_EXPR
, tmis
, size_int(BITS_PER_UNIT
));
3660 build2 (MISALIGNED_INDIRECT_REF
, vectype
, dataref_ptr
, tmis
);
3663 case dr_explicit_realign
:
3666 tree vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
3668 if (compute_in_loop
)
3669 msq
= vect_setup_realignment (first_stmt
, gsi
,
3671 dr_explicit_realign
,
3674 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
3675 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3676 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
3677 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3678 gimple_assign_set_lhs (new_stmt
, new_temp
);
3679 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
3680 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
3681 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3684 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
3685 TYPE_SIZE_UNIT (scalar_type
));
3686 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
3687 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, ptr
);
3690 case dr_explicit_realign_optimized
:
3691 data_ref
= build1 (ALIGN_INDIRECT_REF
, vectype
, dataref_ptr
);
3696 /* If accesses through a pointer to vectype do not alias the original
3697 memory reference we have a problem. This should never happen. */
3698 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref
),
3699 get_alias_set (gimple_assign_rhs1 (stmt
))));
3700 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3701 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
3702 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3703 gimple_assign_set_lhs (new_stmt
, new_temp
);
3704 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3705 mark_symbols_for_renaming (new_stmt
);
3707 /* 3. Handle explicit realignment if necessary/supported. Create in
3708 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3709 if (alignment_support_scheme
== dr_explicit_realign_optimized
3710 || alignment_support_scheme
== dr_explicit_realign
)
3714 lsq
= gimple_assign_lhs (new_stmt
);
3715 if (!realignment_token
)
3716 realignment_token
= dataref_ptr
;
3717 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3718 tmp
= build3 (REALIGN_LOAD_EXPR
, vectype
, msq
, lsq
,
3720 new_stmt
= gimple_build_assign (vec_dest
, tmp
);
3721 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3722 gimple_assign_set_lhs (new_stmt
, new_temp
);
3723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3725 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
3728 if (i
== vec_num
- 1 && j
== ncopies
- 1)
3729 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
3735 /* 4. Handle invariant-load. */
3736 if (inv_p
&& !bb_vinfo
)
3738 gcc_assert (!strided_load
);
3739 gcc_assert (nested_in_vect_loop_p (loop
, stmt
));
3744 tree vec_inv
, bitpos
, bitsize
= TYPE_SIZE (scalar_type
);
3746 /* CHECKME: bitpos depends on endianess? */
3747 bitpos
= bitsize_zero_node
;
3748 vec_inv
= build3 (BIT_FIELD_REF
, scalar_type
, new_temp
,
3751 vect_create_destination_var (scalar_dest
, NULL_TREE
);
3752 new_stmt
= gimple_build_assign (vec_dest
, vec_inv
);
3753 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3754 gimple_assign_set_lhs (new_stmt
, new_temp
);
3755 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3757 for (k
= nunits
- 1; k
>= 0; --k
)
3758 t
= tree_cons (NULL_TREE
, new_temp
, t
);
3759 /* FIXME: use build_constructor directly. */
3760 vec_inv
= build_constructor_from_list (vectype
, t
);
3761 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
3762 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
3765 gcc_unreachable (); /* FORNOW. */
3768 /* Collect vector loads and later create their permutation in
3769 vect_transform_strided_load (). */
3770 if (strided_load
|| slp_perm
)
3771 VEC_quick_push (tree
, dr_chain
, new_temp
);
3773 /* Store vector loads in the corresponding SLP_NODE. */
3774 if (slp
&& !slp_perm
)
3775 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3778 if (slp
&& !slp_perm
)
3783 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
3784 slp_node_instance
, false))
3786 VEC_free (tree
, heap
, dr_chain
);
3794 if (!vect_transform_strided_load (stmt
, dr_chain
, group_size
, gsi
))
3797 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3798 VEC_free (tree
, heap
, dr_chain
);
3799 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3804 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3806 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3807 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3813 VEC_free (tree
, heap
, dr_chain
);
3818 /* Function vect_is_simple_cond.
3821 LOOP - the loop that is being vectorized.
3822 COND - Condition that is checked for simple use.
3824 Returns whether a COND can be vectorized. Checks whether
3825 condition operands are supportable using vec_is_simple_use. */
3828 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
)
3832 enum vect_def_type dt
;
3834 if (!COMPARISON_CLASS_P (cond
))
3837 lhs
= TREE_OPERAND (cond
, 0);
3838 rhs
= TREE_OPERAND (cond
, 1);
3840 if (TREE_CODE (lhs
) == SSA_NAME
)
3842 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
3843 if (!vect_is_simple_use (lhs
, loop_vinfo
, NULL
, &lhs_def_stmt
, &def
,
3847 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
3848 && TREE_CODE (lhs
) != FIXED_CST
)
3851 if (TREE_CODE (rhs
) == SSA_NAME
)
3853 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
3854 if (!vect_is_simple_use (rhs
, loop_vinfo
, NULL
, &rhs_def_stmt
, &def
,
3858 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
3859 && TREE_CODE (rhs
) != FIXED_CST
)
3865 /* vectorizable_condition.
3867 Check if STMT is conditional modify expression that can be vectorized.
3868 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3869 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3872 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
3873 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
3874 else caluse if it is 2).
3876 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3879 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
3880 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
)
3882 tree scalar_dest
= NULL_TREE
;
3883 tree vec_dest
= NULL_TREE
;
3884 tree op
= NULL_TREE
;
3885 tree cond_expr
, then_clause
, else_clause
;
3886 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3887 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3888 tree vec_cond_lhs
, vec_cond_rhs
, vec_then_clause
, vec_else_clause
;
3889 tree vec_compare
, vec_cond_expr
;
3891 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3892 enum machine_mode vec_mode
;
3894 enum vect_def_type dt
;
3895 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3896 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3897 enum tree_code code
;
3899 /* FORNOW: unsupported in basic block SLP. */
3900 gcc_assert (loop_vinfo
);
3902 gcc_assert (ncopies
>= 1);
3904 return false; /* FORNOW */
3906 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
3909 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3910 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
3914 /* FORNOW: SLP not supported. */
3915 if (STMT_SLP_TYPE (stmt_info
))
3918 /* FORNOW: not yet supported. */
3919 if (STMT_VINFO_LIVE_P (stmt_info
))
3921 if (vect_print_dump_info (REPORT_DETAILS
))
3922 fprintf (vect_dump
, "value used after loop.");
3926 /* Is vectorizable conditional operation? */
3927 if (!is_gimple_assign (stmt
))
3930 code
= gimple_assign_rhs_code (stmt
);
3932 if (code
!= COND_EXPR
)
3935 gcc_assert (gimple_assign_single_p (stmt
));
3936 op
= gimple_assign_rhs1 (stmt
);
3937 cond_expr
= TREE_OPERAND (op
, 0);
3938 then_clause
= TREE_OPERAND (op
, 1);
3939 else_clause
= TREE_OPERAND (op
, 2);
3941 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
))
3944 /* We do not handle two different vector types for the condition
3946 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)),
3947 TREE_TYPE (vectype
)))
3950 if (TREE_CODE (then_clause
) == SSA_NAME
)
3952 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
3953 if (!vect_is_simple_use (then_clause
, loop_vinfo
, NULL
,
3954 &then_def_stmt
, &def
, &dt
))
3957 else if (TREE_CODE (then_clause
) != INTEGER_CST
3958 && TREE_CODE (then_clause
) != REAL_CST
3959 && TREE_CODE (then_clause
) != FIXED_CST
)
3962 if (TREE_CODE (else_clause
) == SSA_NAME
)
3964 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
3965 if (!vect_is_simple_use (else_clause
, loop_vinfo
, NULL
,
3966 &else_def_stmt
, &def
, &dt
))
3969 else if (TREE_CODE (else_clause
) != INTEGER_CST
3970 && TREE_CODE (else_clause
) != REAL_CST
3971 && TREE_CODE (else_clause
) != FIXED_CST
)
3975 vec_mode
= TYPE_MODE (vectype
);
3979 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
3980 return expand_vec_cond_expr_p (TREE_TYPE (op
), vec_mode
);
3986 scalar_dest
= gimple_assign_lhs (stmt
);
3987 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3989 /* Handle cond expr. */
3991 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0), stmt
, NULL
);
3993 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1), stmt
, NULL
);
3994 if (reduc_index
== 1)
3995 vec_then_clause
= reduc_def
;
3997 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
, stmt
, NULL
);
3998 if (reduc_index
== 2)
3999 vec_else_clause
= reduc_def
;
4001 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
, stmt
, NULL
);
4003 /* Arguments are ready. Create the new vector stmt. */
4004 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
4005 vec_cond_lhs
, vec_cond_rhs
);
4006 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
4007 vec_compare
, vec_then_clause
, vec_else_clause
);
4009 *vec_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
4010 new_temp
= make_ssa_name (vec_dest
, *vec_stmt
);
4011 gimple_assign_set_lhs (*vec_stmt
, new_temp
);
4012 vect_finish_stmt_generation (stmt
, *vec_stmt
, gsi
);
4018 /* Make sure the statement is vectorizable. */
4021 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
4023 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4024 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4025 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
4027 tree scalar_type
, vectype
;
4029 if (vect_print_dump_info (REPORT_DETAILS
))
4031 fprintf (vect_dump
, "==> examining statement: ");
4032 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
4035 if (gimple_has_volatile_ops (stmt
))
4037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
4038 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
4043 /* Skip stmts that do not need to be vectorized. In loops this is expected
4045 - the COND_EXPR which is the loop exit condition
4046 - any LABEL_EXPRs in the loop
4047 - computations that are used only for array indexing or loop control.
4048 In basic blocks we only analyze statements that are a part of some SLP
4049 instance, therefore, all the statements are relevant. */
4051 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
4052 && !STMT_VINFO_LIVE_P (stmt_info
))
4054 if (vect_print_dump_info (REPORT_DETAILS
))
4055 fprintf (vect_dump
, "irrelevant.");
4060 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
4062 case vect_internal_def
:
4065 case vect_reduction_def
:
4066 case vect_nested_cycle
:
4067 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
4068 || relevance
== vect_used_in_outer_by_reduction
4069 || relevance
== vect_unused_in_scope
));
4072 case vect_induction_def
:
4073 case vect_constant_def
:
4074 case vect_external_def
:
4075 case vect_unknown_def_type
:
4082 gcc_assert (PURE_SLP_STMT (stmt_info
));
4084 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
4085 if (vect_print_dump_info (REPORT_DETAILS
))
4087 fprintf (vect_dump
, "get vectype for scalar type: ");
4088 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
4091 vectype
= get_vectype_for_scalar_type (scalar_type
);
4094 if (vect_print_dump_info (REPORT_DETAILS
))
4096 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
4097 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
4102 if (vect_print_dump_info (REPORT_DETAILS
))
4104 fprintf (vect_dump
, "vectype: ");
4105 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
4108 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
4111 if (STMT_VINFO_RELEVANT_P (stmt_info
))
4113 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
4114 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
4115 *need_to_vectorize
= true;
4120 && (STMT_VINFO_RELEVANT_P (stmt_info
)
4121 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
4122 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
, NULL
)
4123 || vectorizable_type_demotion (stmt
, NULL
, NULL
, NULL
)
4124 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
4125 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
4126 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
4127 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
4128 || vectorizable_call (stmt
, NULL
, NULL
)
4129 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
4130 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
4131 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0));
4135 ok
= (vectorizable_operation (stmt
, NULL
, NULL
, node
)
4136 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
4137 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
4138 || vectorizable_store (stmt
, NULL
, NULL
, node
));
4143 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
4145 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
4146 fprintf (vect_dump
, "supported: ");
4147 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
4156 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4157 need extra handling, except for vectorizable reductions. */
4158 if (STMT_VINFO_LIVE_P (stmt_info
)
4159 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
4160 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
4164 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
4166 fprintf (vect_dump
, "not vectorized: live stmt not ");
4167 fprintf (vect_dump
, "supported: ");
4168 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
4174 if (!PURE_SLP_STMT (stmt_info
))
4176 /* Groups of strided accesses whose size is not a power of 2 are not
4177 vectorizable yet using loop-vectorization. Therefore, if this stmt
4178 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4179 loop-based vectorized), the loop cannot be vectorized. */
4180 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
4181 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4182 DR_GROUP_FIRST_DR (stmt_info
)))) == -1)
4184 if (vect_print_dump_info (REPORT_DETAILS
))
4186 fprintf (vect_dump
, "not vectorized: the size of group "
4187 "of strided accesses is not a power of 2");
4188 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
4199 /* Function vect_transform_stmt.
4201 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4204 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
4205 bool *strided_store
, slp_tree slp_node
,
4206 slp_instance slp_node_instance
)
4208 bool is_store
= false;
4209 gimple vec_stmt
= NULL
;
4210 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4211 gimple orig_stmt_in_pattern
;
4214 switch (STMT_VINFO_TYPE (stmt_info
))
4216 case type_demotion_vec_info_type
:
4217 done
= vectorizable_type_demotion (stmt
, gsi
, &vec_stmt
, slp_node
);
4221 case type_promotion_vec_info_type
:
4222 done
= vectorizable_type_promotion (stmt
, gsi
, &vec_stmt
, slp_node
);
4226 case type_conversion_vec_info_type
:
4227 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
4231 case induc_vec_info_type
:
4232 gcc_assert (!slp_node
);
4233 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
4237 case op_vec_info_type
:
4238 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
4242 case assignment_vec_info_type
:
4243 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
4247 case load_vec_info_type
:
4248 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
4253 case store_vec_info_type
:
4254 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
4256 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && !slp_node
)
4258 /* In case of interleaving, the whole chain is vectorized when the
4259 last store in the chain is reached. Store stmts before the last
4260 one are skipped, and there vec_stmt_info shouldn't be freed
4262 *strided_store
= true;
4263 if (STMT_VINFO_VEC_STMT (stmt_info
))
4270 case condition_vec_info_type
:
4271 gcc_assert (!slp_node
);
4272 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0);
4276 case call_vec_info_type
:
4277 gcc_assert (!slp_node
);
4278 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
);
4281 case reduc_vec_info_type
:
4282 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
4287 if (!STMT_VINFO_LIVE_P (stmt_info
))
4289 if (vect_print_dump_info (REPORT_DETAILS
))
4290 fprintf (vect_dump
, "stmt not supported.");
4295 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4296 is being vectorized, but outside the immediately enclosing loop. */
4298 && STMT_VINFO_LOOP_VINFO (stmt_info
)
4299 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4300 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
4301 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
4302 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
4303 || STMT_VINFO_RELEVANT (stmt_info
) ==
4304 vect_used_in_outer_by_reduction
))
4306 struct loop
*innerloop
= LOOP_VINFO_LOOP (
4307 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
4308 imm_use_iterator imm_iter
;
4309 use_operand_p use_p
;
4313 if (vect_print_dump_info (REPORT_DETAILS
))
4314 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
4316 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4317 (to be used when vectorizing outer-loop stmts that use the DEF of
4319 if (gimple_code (stmt
) == GIMPLE_PHI
)
4320 scalar_dest
= PHI_RESULT (stmt
);
4322 scalar_dest
= gimple_assign_lhs (stmt
);
4324 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
4326 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
4328 exit_phi
= USE_STMT (use_p
);
4329 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
4334 /* Handle stmts whose DEF is used outside the loop-nest that is
4335 being vectorized. */
4336 if (STMT_VINFO_LIVE_P (stmt_info
)
4337 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
4339 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
4345 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
4346 orig_stmt_in_pattern
= STMT_VINFO_RELATED_STMT (stmt_info
);
4347 if (orig_stmt_in_pattern
)
4349 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (orig_stmt_in_pattern
);
4350 /* STMT was inserted by the vectorizer to replace a computation idiom.
4351 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4352 computed this idiom. We need to record a pointer to VEC_STMT in
4353 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4354 documentation of vect_pattern_recog. */
4355 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
4357 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo
) == stmt
);
4358 STMT_VINFO_VEC_STMT (stmt_vinfo
) = vec_stmt
;
4367 /* Remove a group of stores (for SLP or interleaving), free their
4371 vect_remove_stores (gimple first_stmt
)
4373 gimple next
= first_stmt
;
4375 gimple_stmt_iterator next_si
;
4379 /* Free the attached stmt_vec_info and remove the stmt. */
4380 next_si
= gsi_for_stmt (next
);
4381 gsi_remove (&next_si
, true);
4382 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
4383 free_stmt_vec_info (next
);
4389 /* Function new_stmt_vec_info.
4391 Create and initialize a new stmt_vec_info struct for STMT. */
4394 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
4395 bb_vec_info bb_vinfo
)
4398 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
4400 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
4401 STMT_VINFO_STMT (res
) = stmt
;
4402 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
4403 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
4404 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
4405 STMT_VINFO_LIVE_P (res
) = false;
4406 STMT_VINFO_VECTYPE (res
) = NULL
;
4407 STMT_VINFO_VEC_STMT (res
) = NULL
;
4408 STMT_VINFO_VECTORIZABLE (res
) = true;
4409 STMT_VINFO_IN_PATTERN_P (res
) = false;
4410 STMT_VINFO_RELATED_STMT (res
) = NULL
;
4411 STMT_VINFO_DATA_REF (res
) = NULL
;
4413 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
4414 STMT_VINFO_DR_OFFSET (res
) = NULL
;
4415 STMT_VINFO_DR_INIT (res
) = NULL
;
4416 STMT_VINFO_DR_STEP (res
) = NULL
;
4417 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
4419 if (gimple_code (stmt
) == GIMPLE_PHI
4420 && is_loop_header_bb_p (gimple_bb (stmt
)))
4421 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
4423 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
4425 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
4426 STMT_VINFO_INSIDE_OF_LOOP_COST (res
) = 0;
4427 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
4428 STMT_SLP_TYPE (res
) = loop_vect
;
4429 DR_GROUP_FIRST_DR (res
) = NULL
;
4430 DR_GROUP_NEXT_DR (res
) = NULL
;
4431 DR_GROUP_SIZE (res
) = 0;
4432 DR_GROUP_STORE_COUNT (res
) = 0;
4433 DR_GROUP_GAP (res
) = 0;
4434 DR_GROUP_SAME_DR_STMT (res
) = NULL
;
4435 DR_GROUP_READ_WRITE_DEPENDENCE (res
) = false;
4441 /* Create a hash table for stmt_vec_info. */
4444 init_stmt_vec_info_vec (void)
4446 gcc_assert (!stmt_vec_info_vec
);
4447 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
4451 /* Free hash table for stmt_vec_info. */
4454 free_stmt_vec_info_vec (void)
4456 gcc_assert (stmt_vec_info_vec
);
4457 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
4461 /* Free stmt vectorization related info. */
4464 free_stmt_vec_info (gimple stmt
)
4466 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4471 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
4472 set_vinfo_for_stmt (stmt
, NULL
);
4477 /* Function get_vectype_for_scalar_type.
4479 Returns the vector type corresponding to SCALAR_TYPE as supported
4483 get_vectype_for_scalar_type (tree scalar_type
)
4485 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
4486 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
4490 if (nbytes
== 0 || nbytes
>= UNITS_PER_SIMD_WORD (inner_mode
))
4493 /* We can't build a vector type of elements with alignment bigger than
4495 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
4498 /* If we'd build a vector type of elements whose mode precision doesn't
4499 match their types precision we'll get mismatched types on vector
4500 extracts via BIT_FIELD_REFs. This effectively means we disable
4501 vectorization of bool and/or enum types in some languages. */
4502 if (INTEGRAL_TYPE_P (scalar_type
)
4503 && GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
))
4506 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4508 nunits
= UNITS_PER_SIMD_WORD (inner_mode
) / nbytes
;
4510 vectype
= build_vector_type (scalar_type
, nunits
);
4511 if (vect_print_dump_info (REPORT_DETAILS
))
4513 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
4514 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
4520 if (vect_print_dump_info (REPORT_DETAILS
))
4522 fprintf (vect_dump
, "vectype: ");
4523 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
4526 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4527 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
4529 if (vect_print_dump_info (REPORT_DETAILS
))
4530 fprintf (vect_dump
, "mode not supported by target.");
4537 /* Function get_same_sized_vectype
4539 Returns a vector type corresponding to SCALAR_TYPE of size
4540 VECTOR_TYPE if supported by the target. */
4543 get_same_sized_vectype (tree scalar_type
, tree vector_type ATTRIBUTE_UNUSED
)
4545 return get_vectype_for_scalar_type (scalar_type
);
4548 /* Function vect_is_simple_use.
4551 LOOP_VINFO - the vect info of the loop that is being vectorized.
4552 BB_VINFO - the vect info of the basic block that is being vectorized.
4553 OPERAND - operand of a stmt in the loop or bb.
4554 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4556 Returns whether a stmt with OPERAND can be vectorized.
4557 For loops, supportable operands are constants, loop invariants, and operands
4558 that are defined by the current iteration of the loop. Unsupportable
4559 operands are those that are defined by a previous iteration of the loop (as
4560 is the case in reduction/induction computations).
4561 For basic blocks, supportable operands are constants and bb invariants.
4562 For now, operands defined outside the basic block are not supported. */
4565 vect_is_simple_use (tree operand
, loop_vec_info loop_vinfo
,
4566 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
4567 tree
*def
, enum vect_def_type
*dt
)
4570 stmt_vec_info stmt_vinfo
;
4571 struct loop
*loop
= NULL
;
4574 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4579 if (vect_print_dump_info (REPORT_DETAILS
))
4581 fprintf (vect_dump
, "vect_is_simple_use: operand ");
4582 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
4585 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
4587 *dt
= vect_constant_def
;
4591 if (is_gimple_min_invariant (operand
))
4594 *dt
= vect_external_def
;
4598 if (TREE_CODE (operand
) == PAREN_EXPR
)
4600 if (vect_print_dump_info (REPORT_DETAILS
))
4601 fprintf (vect_dump
, "non-associatable copy.");
4602 operand
= TREE_OPERAND (operand
, 0);
4605 if (TREE_CODE (operand
) != SSA_NAME
)
4607 if (vect_print_dump_info (REPORT_DETAILS
))
4608 fprintf (vect_dump
, "not ssa-name.");
4612 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
4613 if (*def_stmt
== NULL
)
4615 if (vect_print_dump_info (REPORT_DETAILS
))
4616 fprintf (vect_dump
, "no def_stmt.");
4620 if (vect_print_dump_info (REPORT_DETAILS
))
4622 fprintf (vect_dump
, "def_stmt: ");
4623 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
4626 /* Empty stmt is expected only in case of a function argument.
4627 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4628 if (gimple_nop_p (*def_stmt
))
4631 *dt
= vect_external_def
;
4635 bb
= gimple_bb (*def_stmt
);
4637 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
4638 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
4639 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
4640 *dt
= vect_external_def
;
4643 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
4644 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
4647 if (*dt
== vect_unknown_def_type
)
4649 if (vect_print_dump_info (REPORT_DETAILS
))
4650 fprintf (vect_dump
, "Unsupported pattern.");
4654 if (vect_print_dump_info (REPORT_DETAILS
))
4655 fprintf (vect_dump
, "type of def: %d.",*dt
);
4657 switch (gimple_code (*def_stmt
))
4660 *def
= gimple_phi_result (*def_stmt
);
4664 *def
= gimple_assign_lhs (*def_stmt
);
4668 *def
= gimple_call_lhs (*def_stmt
);
4673 if (vect_print_dump_info (REPORT_DETAILS
))
4674 fprintf (vect_dump
, "unsupported defining stmt: ");
4681 /* Function vect_is_simple_use_1.
4683 Same as vect_is_simple_use_1 but also determines the vector operand
4684 type of OPERAND and stores it to *VECTYPE. If the definition of
4685 OPERAND is vect_uninitialized_def, vect_constant_def or
4686 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
4687 is responsible to compute the best suited vector type for the
4691 vect_is_simple_use_1 (tree operand
, loop_vec_info loop_vinfo
,
4692 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
4693 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
4695 if (!vect_is_simple_use (operand
, loop_vinfo
, bb_vinfo
, def_stmt
, def
, dt
))
4698 /* Now get a vector type if the def is internal, otherwise supply
4699 NULL_TREE and leave it up to the caller to figure out a proper
4700 type for the use stmt. */
4701 if (*dt
== vect_internal_def
4702 || *dt
== vect_induction_def
4703 || *dt
== vect_reduction_def
4704 || *dt
== vect_double_reduction_def
4705 || *dt
== vect_nested_cycle
)
4707 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
4708 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
4709 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
4710 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4711 gcc_assert (*vectype
!= NULL_TREE
);
4713 else if (*dt
== vect_uninitialized_def
4714 || *dt
== vect_constant_def
4715 || *dt
== vect_external_def
)
4716 *vectype
= NULL_TREE
;
4724 /* Function supportable_widening_operation
4726 Check whether an operation represented by the code CODE is a
4727 widening operation that is supported by the target platform in
4728 vector form (i.e., when operating on arguments of type VECTYPE_IN
4729 producing a result of type VECTYPE_OUT).
4731 Widening operations we currently support are NOP (CONVERT), FLOAT
4732 and WIDEN_MULT. This function checks if these operations are supported
4733 by the target platform either directly (via vector tree-codes), or via
4737 - CODE1 and CODE2 are codes of vector operations to be used when
4738 vectorizing the operation, if available.
4739 - DECL1 and DECL2 are decls of target builtin functions to be used
4740 when vectorizing the operation, if available. In this case,
4741 CODE1 and CODE2 are CALL_EXPR.
4742 - MULTI_STEP_CVT determines the number of required intermediate steps in
4743 case of multi-step conversion (like char->short->int - in that case
4744 MULTI_STEP_CVT will be 1).
4745 - INTERM_TYPES contains the intermediate type required to perform the
4746 widening operation (short in the above example). */
4749 supportable_widening_operation (enum tree_code code
, gimple stmt
,
4750 tree vectype_out
, tree vectype_in
,
4751 tree
*decl1
, tree
*decl2
,
4752 enum tree_code
*code1
, enum tree_code
*code2
,
4753 int *multi_step_cvt
,
4754 VEC (tree
, heap
) **interm_types
)
4756 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4757 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4758 struct loop
*vect_loop
= LOOP_VINFO_LOOP (loop_info
);
4760 enum machine_mode vec_mode
;
4761 enum insn_code icode1
, icode2
;
4762 optab optab1
, optab2
;
4763 tree vectype
= vectype_in
;
4764 tree wide_vectype
= vectype_out
;
4765 enum tree_code c1
, c2
;
4767 /* The result of a vectorized widening operation usually requires two vectors
4768 (because the widened results do not fit int one vector). The generated
4769 vector results would normally be expected to be generated in the same
4770 order as in the original scalar computation, i.e. if 8 results are
4771 generated in each vector iteration, they are to be organized as follows:
4772 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4774 However, in the special case that the result of the widening operation is
4775 used in a reduction computation only, the order doesn't matter (because
4776 when vectorizing a reduction we change the order of the computation).
4777 Some targets can take advantage of this and generate more efficient code.
4778 For example, targets like Altivec, that support widen_mult using a sequence
4779 of {mult_even,mult_odd} generate the following vectors:
4780 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4782 When vectorizing outer-loops, we execute the inner-loop sequentially
4783 (each vectorized inner-loop iteration contributes to VF outer-loop
4784 iterations in parallel). We therefore don't allow to change the order
4785 of the computation in the inner-loop during outer-loop vectorization. */
4787 if (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
4788 && !nested_in_vect_loop_p (vect_loop
, stmt
))
4794 && code
== WIDEN_MULT_EXPR
4795 && targetm
.vectorize
.builtin_mul_widen_even
4796 && targetm
.vectorize
.builtin_mul_widen_even (vectype
)
4797 && targetm
.vectorize
.builtin_mul_widen_odd
4798 && targetm
.vectorize
.builtin_mul_widen_odd (vectype
))
4800 if (vect_print_dump_info (REPORT_DETAILS
))
4801 fprintf (vect_dump
, "Unordered widening operation detected.");
4803 *code1
= *code2
= CALL_EXPR
;
4804 *decl1
= targetm
.vectorize
.builtin_mul_widen_even (vectype
);
4805 *decl2
= targetm
.vectorize
.builtin_mul_widen_odd (vectype
);
4811 case WIDEN_MULT_EXPR
:
4812 if (BYTES_BIG_ENDIAN
)
4814 c1
= VEC_WIDEN_MULT_HI_EXPR
;
4815 c2
= VEC_WIDEN_MULT_LO_EXPR
;
4819 c2
= VEC_WIDEN_MULT_HI_EXPR
;
4820 c1
= VEC_WIDEN_MULT_LO_EXPR
;
4825 if (BYTES_BIG_ENDIAN
)
4827 c1
= VEC_UNPACK_HI_EXPR
;
4828 c2
= VEC_UNPACK_LO_EXPR
;
4832 c2
= VEC_UNPACK_HI_EXPR
;
4833 c1
= VEC_UNPACK_LO_EXPR
;
4838 if (BYTES_BIG_ENDIAN
)
4840 c1
= VEC_UNPACK_FLOAT_HI_EXPR
;
4841 c2
= VEC_UNPACK_FLOAT_LO_EXPR
;
4845 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
4846 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
4850 case FIX_TRUNC_EXPR
:
4851 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4852 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4853 computing the operation. */
4860 if (code
== FIX_TRUNC_EXPR
)
4862 /* The signedness is determined from output operand. */
4863 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
4864 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
4868 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
4869 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
4872 if (!optab1
|| !optab2
)
4875 vec_mode
= TYPE_MODE (vectype
);
4876 if ((icode1
= optab_handler (optab1
, vec_mode
)->insn_code
) == CODE_FOR_nothing
4877 || (icode2
= optab_handler (optab2
, vec_mode
)->insn_code
)
4878 == CODE_FOR_nothing
)
4881 /* Check if it's a multi-step conversion that can be done using intermediate
4883 if (insn_data
[icode1
].operand
[0].mode
!= TYPE_MODE (wide_vectype
)
4884 || insn_data
[icode2
].operand
[0].mode
!= TYPE_MODE (wide_vectype
))
4887 tree prev_type
= vectype
, intermediate_type
;
4888 enum machine_mode intermediate_mode
, prev_mode
= vec_mode
;
4889 optab optab3
, optab4
;
4891 if (!CONVERT_EXPR_CODE_P (code
))
4897 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4898 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4899 to get to NARROW_VECTYPE, and fail if we do not. */
4900 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
4901 for (i
= 0; i
< 3; i
++)
4903 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
4904 intermediate_type
= lang_hooks
.types
.type_for_mode (intermediate_mode
,
4905 TYPE_UNSIGNED (prev_type
));
4906 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
4907 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
4909 if (!optab3
|| !optab4
4910 || (icode1
= optab1
->handlers
[(int) prev_mode
].insn_code
)
4912 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
4913 || (icode2
= optab2
->handlers
[(int) prev_mode
].insn_code
)
4915 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
4916 || (icode1
= optab3
->handlers
[(int) intermediate_mode
].insn_code
)
4918 || (icode2
= optab4
->handlers
[(int) intermediate_mode
].insn_code
)
4919 == CODE_FOR_nothing
)
4922 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
4923 (*multi_step_cvt
)++;
4925 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
4926 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
4929 prev_type
= intermediate_type
;
4930 prev_mode
= intermediate_mode
;
4942 /* Function supportable_narrowing_operation
4944 Check whether an operation represented by the code CODE is a
4945 narrowing operation that is supported by the target platform in
4946 vector form (i.e., when operating on arguments of type VECTYPE_IN
4947 and producing a result of type VECTYPE_OUT).
4949 Narrowing operations we currently support are NOP (CONVERT) and
4950 FIX_TRUNC. This function checks if these operations are supported by
4951 the target platform directly via vector tree-codes.
4954 - CODE1 is the code of a vector operation to be used when
4955 vectorizing the operation, if available.
4956 - MULTI_STEP_CVT determines the number of required intermediate steps in
4957 case of multi-step conversion (like int->short->char - in that case
4958 MULTI_STEP_CVT will be 1).
4959 - INTERM_TYPES contains the intermediate type required to perform the
4960 narrowing operation (short in the above example). */
4963 supportable_narrowing_operation (enum tree_code code
,
4964 tree vectype_out
, tree vectype_in
,
4965 enum tree_code
*code1
, int *multi_step_cvt
,
4966 VEC (tree
, heap
) **interm_types
)
4968 enum machine_mode vec_mode
;
4969 enum insn_code icode1
;
4970 optab optab1
, interm_optab
;
4971 tree vectype
= vectype_in
;
4972 tree narrow_vectype
= vectype_out
;
4974 tree intermediate_type
, prev_type
;
4980 c1
= VEC_PACK_TRUNC_EXPR
;
4983 case FIX_TRUNC_EXPR
:
4984 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
4988 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4989 tree code and optabs used for computing the operation. */
4996 if (code
== FIX_TRUNC_EXPR
)
4997 /* The signedness is determined from output operand. */
4998 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
5000 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
5005 vec_mode
= TYPE_MODE (vectype
);
5006 if ((icode1
= optab_handler (optab1
, vec_mode
)->insn_code
)
5007 == CODE_FOR_nothing
)
5010 /* Check if it's a multi-step conversion that can be done using intermediate
5012 if (insn_data
[icode1
].operand
[0].mode
!= TYPE_MODE (narrow_vectype
))
5014 enum machine_mode intermediate_mode
, prev_mode
= vec_mode
;
5017 prev_type
= vectype
;
5018 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5019 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
5020 to get to NARROW_VECTYPE, and fail if we do not. */
5021 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
5022 for (i
= 0; i
< 3; i
++)
5024 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
5025 intermediate_type
= lang_hooks
.types
.type_for_mode (intermediate_mode
,
5026 TYPE_UNSIGNED (prev_type
));
5027 interm_optab
= optab_for_tree_code (c1
, intermediate_type
,
5030 || (icode1
= optab1
->handlers
[(int) prev_mode
].insn_code
)
5032 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
5034 = interm_optab
->handlers
[(int) intermediate_mode
].insn_code
)
5035 == CODE_FOR_nothing
)
5038 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
5039 (*multi_step_cvt
)++;
5041 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
5044 prev_type
= intermediate_type
;
5045 prev_mode
= intermediate_mode
;