1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
50 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
61 tree array
, unsigned HOST_WIDE_INT n
)
63 tree vect_type
, vect
, vect_name
, array_ref
;
66 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
67 vect_type
= TREE_TYPE (TREE_TYPE (array
));
68 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
69 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
70 build_int_cst (size_type_node
, n
),
71 NULL_TREE
, NULL_TREE
);
73 new_stmt
= gimple_build_assign (vect
, array_ref
);
74 vect_name
= make_ssa_name (vect
, new_stmt
);
75 gimple_assign_set_lhs (new_stmt
, vect_name
);
76 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
77 mark_symbols_for_renaming (new_stmt
);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
88 tree array
, unsigned HOST_WIDE_INT n
)
93 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
94 build_int_cst (size_type_node
, n
),
95 NULL_TREE
, NULL_TREE
);
97 new_stmt
= gimple_build_assign (array_ref
, vect
);
98 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
99 mark_symbols_for_renaming (new_stmt
);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
109 struct ptr_info_def
*pi
;
110 tree mem_ref
, alias_ptr_type
;
112 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
113 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
114 /* Arrays have the same alignment as their type. */
115 pi
= get_ptr_info (ptr
);
116 pi
->align
= TYPE_ALIGN_UNIT (type
);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
129 enum vect_relevant relevant
, bool live_p
,
130 bool used_in_pattern
)
132 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
133 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
134 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
137 if (vect_print_dump_info (REPORT_DETAILS
))
138 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
147 if (!used_in_pattern
)
149 imm_use_iterator imm_iter
;
154 if (is_gimple_assign (stmt
))
155 lhs
= gimple_assign_lhs (stmt
);
157 lhs
= gimple_call_lhs (stmt
);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
162 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
164 if (is_gimple_debug (USE_STMT (use_p
)))
166 use_stmt
= USE_STMT (use_p
);
168 if (vinfo_for_stmt (use_stmt
)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
186 if (vect_print_dump_info (REPORT_DETAILS
))
187 fprintf (vect_dump
, "last stmt in pattern. don't mark"
189 stmt_info
= vinfo_for_stmt (pattern_stmt
);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
191 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
192 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
197 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
198 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
199 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
201 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
204 if (vect_print_dump_info (REPORT_DETAILS
))
205 fprintf (vect_dump
, "already marked relevant/live.");
209 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
226 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
227 enum vect_relevant
*relevant
, bool *live_p
)
229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
231 imm_use_iterator imm_iter
;
235 *relevant
= vect_unused_in_scope
;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt
)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
241 != loop_exit_ctrl_vec_info_type
)
242 *relevant
= vect_used_in_scope
;
244 /* changing memory. */
245 if (gimple_code (stmt
) != GIMPLE_PHI
)
246 if (gimple_vdef (stmt
))
248 if (vect_print_dump_info (REPORT_DETAILS
))
249 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant
= vect_used_in_scope
;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
256 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
258 basic_block bb
= gimple_bb (USE_STMT (use_p
));
259 if (!flow_bb_inside_loop_p (loop
, bb
))
261 if (vect_print_dump_info (REPORT_DETAILS
))
262 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p
)))
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
270 gcc_assert (bb
== single_exit (loop
)->dest
);
277 return (*live_p
|| *relevant
);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
287 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
290 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info
))
298 /* STMT has a data_ref. FORNOW this means that its of one of
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt
))
313 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
315 operand
= gimple_assign_rhs1 (stmt
);
316 if (TREE_CODE (operand
) != SSA_NAME
)
327 Function process_use.
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
336 Generally, LIVE_P and RELEVANT are used to define the liveness and
337 relevance info of the DEF_STMT of this USE:
338 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
339 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
341 - case 1: If USE is used only for address computations (e.g. array indexing),
342 which does not need to be directly vectorized, then the liveness/relevance
343 of the respective DEF_STMT is left unchanged.
344 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
345 skip DEF_STMT cause it had already been processed.
346 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
347 be modified accordingly.
349 Return true if everything is as expected. Return false otherwise. */
352 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
353 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
)
355 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
356 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
357 stmt_vec_info dstmt_vinfo
;
358 basic_block bb
, def_bb
;
361 enum vect_def_type dt
;
363 /* case 1: we are only interested in uses that need to be vectorized. Uses
364 that are used for address computation are not considered relevant. */
365 if (!exist_non_indexing_operands_for_use_p (use
, stmt
))
368 if (!vect_is_simple_use (use
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
371 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
375 if (!def_stmt
|| gimple_nop_p (def_stmt
))
378 def_bb
= gimple_bb (def_stmt
);
379 if (!flow_bb_inside_loop_p (loop
, def_bb
))
381 if (vect_print_dump_info (REPORT_DETAILS
))
382 fprintf (vect_dump
, "def_stmt is out of loop.");
386 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
387 DEF_STMT must have already been processed, because this should be the
388 only way that STMT, which is a reduction-phi, was put in the worklist,
389 as there should be no other uses for DEF_STMT in the loop. So we just
390 check that everything is as expected, and we are done. */
391 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
392 bb
= gimple_bb (stmt
);
393 if (gimple_code (stmt
) == GIMPLE_PHI
394 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
395 && gimple_code (def_stmt
) != GIMPLE_PHI
396 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
397 && bb
->loop_father
== def_bb
->loop_father
)
399 if (vect_print_dump_info (REPORT_DETAILS
))
400 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
401 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
402 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
403 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
404 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
405 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
409 /* case 3a: outer-loop stmt defining an inner-loop stmt:
410 outer-loop-header-bb:
416 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
418 if (vect_print_dump_info (REPORT_DETAILS
))
419 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
423 case vect_unused_in_scope
:
424 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
425 vect_used_in_scope
: vect_unused_in_scope
;
428 case vect_used_in_outer_by_reduction
:
429 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
430 relevant
= vect_used_by_reduction
;
433 case vect_used_in_outer
:
434 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
435 relevant
= vect_used_in_scope
;
438 case vect_used_in_scope
:
446 /* case 3b: inner-loop stmt defining an outer-loop stmt:
447 outer-loop-header-bb:
451 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
453 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
455 if (vect_print_dump_info (REPORT_DETAILS
))
456 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
460 case vect_unused_in_scope
:
461 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
462 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
463 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
466 case vect_used_by_reduction
:
467 relevant
= vect_used_in_outer_by_reduction
;
470 case vect_used_in_scope
:
471 relevant
= vect_used_in_outer
;
479 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
480 is_pattern_stmt_p (stmt_vinfo
));
485 /* Function vect_mark_stmts_to_be_vectorized.
487 Not all stmts in the loop need to be vectorized. For example:
496 Stmt 1 and 3 do not need to be vectorized, because loop control and
497 addressing of vectorized data-refs are handled differently.
499 This pass detects such stmts. */
502 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
504 VEC(gimple
,heap
) *worklist
;
505 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
506 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
507 unsigned int nbbs
= loop
->num_nodes
;
508 gimple_stmt_iterator si
;
511 stmt_vec_info stmt_vinfo
;
515 enum vect_relevant relevant
, tmp_relevant
;
516 enum vect_def_type def_type
;
518 if (vect_print_dump_info (REPORT_DETAILS
))
519 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
521 worklist
= VEC_alloc (gimple
, heap
, 64);
523 /* 1. Init worklist. */
524 for (i
= 0; i
< nbbs
; i
++)
527 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
530 if (vect_print_dump_info (REPORT_DETAILS
))
532 fprintf (vect_dump
, "init: phi relevant? ");
533 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
536 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
537 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
539 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
541 stmt
= gsi_stmt (si
);
542 if (vect_print_dump_info (REPORT_DETAILS
))
544 fprintf (vect_dump
, "init: stmt relevant? ");
545 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
548 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
549 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
553 /* 2. Process_worklist */
554 while (VEC_length (gimple
, worklist
) > 0)
559 stmt
= VEC_pop (gimple
, worklist
);
560 if (vect_print_dump_info (REPORT_DETAILS
))
562 fprintf (vect_dump
, "worklist: examine stmt: ");
563 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
566 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
567 (DEF_STMT) as relevant/irrelevant and live/dead according to the
568 liveness and relevance properties of STMT. */
569 stmt_vinfo
= vinfo_for_stmt (stmt
);
570 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
571 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
573 /* Generally, the liveness and relevance properties of STMT are
574 propagated as is to the DEF_STMTs of its USEs:
575 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
576 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
578 One exception is when STMT has been identified as defining a reduction
579 variable; in this case we set the liveness/relevance as follows:
581 relevant = vect_used_by_reduction
582 This is because we distinguish between two kinds of relevant stmts -
583 those that are used by a reduction computation, and those that are
584 (also) used by a regular computation. This allows us later on to
585 identify stmts that are used solely by a reduction, and therefore the
586 order of the results that they produce does not have to be kept. */
588 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
589 tmp_relevant
= relevant
;
592 case vect_reduction_def
:
593 switch (tmp_relevant
)
595 case vect_unused_in_scope
:
596 relevant
= vect_used_by_reduction
;
599 case vect_used_by_reduction
:
600 if (gimple_code (stmt
) == GIMPLE_PHI
)
605 if (vect_print_dump_info (REPORT_DETAILS
))
606 fprintf (vect_dump
, "unsupported use of reduction.");
608 VEC_free (gimple
, heap
, worklist
);
615 case vect_nested_cycle
:
616 if (tmp_relevant
!= vect_unused_in_scope
617 && tmp_relevant
!= vect_used_in_outer_by_reduction
618 && tmp_relevant
!= vect_used_in_outer
)
620 if (vect_print_dump_info (REPORT_DETAILS
))
621 fprintf (vect_dump
, "unsupported use of nested cycle.");
623 VEC_free (gimple
, heap
, worklist
);
630 case vect_double_reduction_def
:
631 if (tmp_relevant
!= vect_unused_in_scope
632 && tmp_relevant
!= vect_used_by_reduction
)
634 if (vect_print_dump_info (REPORT_DETAILS
))
635 fprintf (vect_dump
, "unsupported use of double reduction.");
637 VEC_free (gimple
, heap
, worklist
);
648 if (is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
650 /* Pattern statements are not inserted into the code, so
651 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
652 have to scan the RHS or function arguments instead. */
653 if (is_gimple_assign (stmt
))
655 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
656 tree op
= gimple_assign_rhs1 (stmt
);
659 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
661 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
662 live_p
, relevant
, &worklist
)
663 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
664 live_p
, relevant
, &worklist
))
666 VEC_free (gimple
, heap
, worklist
);
671 for (; i
< gimple_num_ops (stmt
); i
++)
673 op
= gimple_op (stmt
, i
);
674 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
677 VEC_free (gimple
, heap
, worklist
);
682 else if (is_gimple_call (stmt
))
684 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
686 tree arg
= gimple_call_arg (stmt
, i
);
687 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
690 VEC_free (gimple
, heap
, worklist
);
697 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
699 tree op
= USE_FROM_PTR (use_p
);
700 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
703 VEC_free (gimple
, heap
, worklist
);
707 } /* while worklist */
709 VEC_free (gimple
, heap
, worklist
);
714 /* Get cost by calling cost target builtin. */
717 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost
)
719 tree dummy_type
= NULL
;
722 return targetm
.vectorize
.builtin_vectorization_cost (type_of_cost
,
727 /* Get cost for STMT. */
730 cost_for_stmt (gimple stmt
)
732 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
734 switch (STMT_VINFO_TYPE (stmt_info
))
736 case load_vec_info_type
:
737 return vect_get_stmt_cost (scalar_load
);
738 case store_vec_info_type
:
739 return vect_get_stmt_cost (scalar_store
);
740 case op_vec_info_type
:
741 case condition_vec_info_type
:
742 case assignment_vec_info_type
:
743 case reduc_vec_info_type
:
744 case induc_vec_info_type
:
745 case type_promotion_vec_info_type
:
746 case type_demotion_vec_info_type
:
747 case type_conversion_vec_info_type
:
748 case call_vec_info_type
:
749 return vect_get_stmt_cost (scalar_stmt
);
750 case undef_vec_info_type
:
756 /* Function vect_model_simple_cost.
758 Models cost for simple operations, i.e. those that only emit ncopies of a
759 single op. Right now, this does not account for multiple insns that could
760 be generated for the single vector op. We will handle that shortly. */
763 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
764 enum vect_def_type
*dt
, slp_tree slp_node
)
767 int inside_cost
= 0, outside_cost
= 0;
769 /* The SLP costs were already calculated during SLP tree build. */
770 if (PURE_SLP_STMT (stmt_info
))
773 inside_cost
= ncopies
* vect_get_stmt_cost (vector_stmt
);
775 /* FORNOW: Assuming maximum 2 args per stmts. */
776 for (i
= 0; i
< 2; i
++)
778 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
779 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
782 if (vect_print_dump_info (REPORT_COST
))
783 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
784 "outside_cost = %d .", inside_cost
, outside_cost
);
786 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
787 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
788 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
792 /* Function vect_cost_strided_group_size
794 For strided load or store, return the group_size only if it is the first
795 load or store of a group, else return 1. This ensures that group size is
796 only returned once per group. */
799 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
801 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
803 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
804 return GROUP_SIZE (stmt_info
);
810 /* Function vect_model_store_cost
812 Models cost for stores. In the case of strided accesses, one access
813 has the overhead of the strided access attributed to it. */
816 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
817 bool store_lanes_p
, enum vect_def_type dt
,
821 unsigned int inside_cost
= 0, outside_cost
= 0;
822 struct data_reference
*first_dr
;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info
))
829 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
830 outside_cost
= vect_get_stmt_cost (scalar_to_vec
);
832 /* Strided access? */
833 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
837 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
842 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
843 group_size
= vect_cost_strided_group_size (stmt_info
);
846 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
848 /* Not a strided access. */
852 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
855 /* We assume that the cost of a single store-lanes instruction is
856 equivalent to the cost of GROUP_SIZE separate stores. If a strided
857 access is instead being provided by a permute-and-store operation,
858 include the cost of the permutes. */
859 if (!store_lanes_p
&& group_size
> 1)
861 /* Uses a high and low interleave operation for each needed permute. */
862 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
863 * vect_get_stmt_cost (vector_stmt
);
865 if (vect_print_dump_info (REPORT_COST
))
866 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
871 /* Costs of the stores. */
872 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
);
874 if (vect_print_dump_info (REPORT_COST
))
875 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
876 "outside_cost = %d .", inside_cost
, outside_cost
);
878 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
879 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
880 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
884 /* Calculate cost of DR's memory access. */
886 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
887 unsigned int *inside_cost
)
889 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
891 switch (alignment_support_scheme
)
895 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_store
);
897 if (vect_print_dump_info (REPORT_COST
))
898 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
903 case dr_unaligned_supported
:
905 gimple stmt
= DR_STMT (dr
);
906 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
907 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
909 /* Here, we assign an additional cost for the unaligned store. */
910 *inside_cost
+= ncopies
911 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_store
,
912 vectype
, DR_MISALIGNMENT (dr
));
914 if (vect_print_dump_info (REPORT_COST
))
915 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
927 /* Function vect_model_load_cost
929 Models cost for loads. In the case of strided accesses, the last access
930 has the overhead of the strided access attributed to it. Since unaligned
931 accesses are supported for loads, we also account for the costs of the
932 access scheme chosen. */
935 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, bool load_lanes_p
,
940 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
941 unsigned int inside_cost
= 0, outside_cost
= 0;
943 /* The SLP costs were already calculated during SLP tree build. */
944 if (PURE_SLP_STMT (stmt_info
))
947 /* Strided accesses? */
948 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
949 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
951 group_size
= vect_cost_strided_group_size (stmt_info
);
952 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
954 /* Not a strided access. */
961 /* We assume that the cost of a single load-lanes instruction is
962 equivalent to the cost of GROUP_SIZE separate loads. If a strided
963 access is instead being provided by a load-and-permute operation,
964 include the cost of the permutes. */
965 if (!load_lanes_p
&& group_size
> 1)
967 /* Uses an even and odd extract operations for each needed permute. */
968 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
969 * vect_get_stmt_cost (vector_stmt
);
971 if (vect_print_dump_info (REPORT_COST
))
972 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
976 /* The loads themselves. */
977 vect_get_load_cost (first_dr
, ncopies
,
978 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info
)) || group_size
> 1
980 &inside_cost
, &outside_cost
);
982 if (vect_print_dump_info (REPORT_COST
))
983 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
984 "outside_cost = %d .", inside_cost
, outside_cost
);
986 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
987 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
988 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
992 /* Calculate cost of DR's memory access. */
994 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
995 bool add_realign_cost
, unsigned int *inside_cost
,
996 unsigned int *outside_cost
)
998 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1000 switch (alignment_support_scheme
)
1004 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_load
);
1006 if (vect_print_dump_info (REPORT_COST
))
1007 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1011 case dr_unaligned_supported
:
1013 gimple stmt
= DR_STMT (dr
);
1014 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1015 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1017 /* Here, we assign an additional cost for the unaligned load. */
1018 *inside_cost
+= ncopies
1019 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_load
,
1020 vectype
, DR_MISALIGNMENT (dr
));
1021 if (vect_print_dump_info (REPORT_COST
))
1022 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1027 case dr_explicit_realign
:
1029 *inside_cost
+= ncopies
* (2 * vect_get_stmt_cost (vector_load
)
1030 + vect_get_stmt_cost (vector_stmt
));
1032 /* FIXME: If the misalignment remains fixed across the iterations of
1033 the containing loop, the following cost should be added to the
1035 if (targetm
.vectorize
.builtin_mask_for_load
)
1036 *inside_cost
+= vect_get_stmt_cost (vector_stmt
);
1040 case dr_explicit_realign_optimized
:
1042 if (vect_print_dump_info (REPORT_COST
))
1043 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1046 /* Unaligned software pipeline has a load of an address, an initial
1047 load, and possibly a mask operation to "prime" the loop. However,
1048 if this is an access in a group of loads, which provide strided
1049 access, then the above cost should only be considered for one
1050 access in the group. Inside the loop, there is a load op
1051 and a realignment op. */
1053 if (add_realign_cost
)
1055 *outside_cost
= 2 * vect_get_stmt_cost (vector_stmt
);
1056 if (targetm
.vectorize
.builtin_mask_for_load
)
1057 *outside_cost
+= vect_get_stmt_cost (vector_stmt
);
1060 *inside_cost
+= ncopies
* (vect_get_stmt_cost (vector_load
)
1061 + vect_get_stmt_cost (vector_stmt
));
1071 /* Function vect_init_vector.
1073 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1074 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1075 is not NULL. Otherwise, place the initialization at the loop preheader.
1076 Return the DEF of INIT_STMT.
1077 It will be used in the vectorization of STMT. */
1080 vect_init_vector (gimple stmt
, tree vector_var
, tree vector_type
,
1081 gimple_stmt_iterator
*gsi
)
1083 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1091 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
1092 add_referenced_var (new_var
);
1093 init_stmt
= gimple_build_assign (new_var
, vector_var
);
1094 new_temp
= make_ssa_name (new_var
, init_stmt
);
1095 gimple_assign_set_lhs (init_stmt
, new_temp
);
1098 vect_finish_stmt_generation (stmt
, init_stmt
, gsi
);
1101 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1105 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1107 if (nested_in_vect_loop_p (loop
, stmt
))
1110 pe
= loop_preheader_edge (loop
);
1111 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1112 gcc_assert (!new_bb
);
1116 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1118 gimple_stmt_iterator gsi_bb_start
;
1120 gcc_assert (bb_vinfo
);
1121 bb
= BB_VINFO_BB (bb_vinfo
);
1122 gsi_bb_start
= gsi_after_labels (bb
);
1123 gsi_insert_before (&gsi_bb_start
, init_stmt
, GSI_SAME_STMT
);
1127 if (vect_print_dump_info (REPORT_DETAILS
))
1129 fprintf (vect_dump
, "created new init_stmt: ");
1130 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1133 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1138 /* Function vect_get_vec_def_for_operand.
1140 OP is an operand in STMT. This function returns a (vector) def that will be
1141 used in the vectorized stmt for STMT.
1143 In the case that OP is an SSA_NAME which is defined in the loop, then
1144 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1146 In case OP is an invariant or constant, a new stmt that creates a vector def
1147 needs to be introduced. */
1150 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1155 stmt_vec_info def_stmt_info
= NULL
;
1156 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1157 unsigned int nunits
;
1158 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1164 enum vect_def_type dt
;
1168 if (vect_print_dump_info (REPORT_DETAILS
))
1170 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1171 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1174 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, NULL
, &def_stmt
, &def
,
1176 gcc_assert (is_simple_use
);
1177 if (vect_print_dump_info (REPORT_DETAILS
))
1181 fprintf (vect_dump
, "def = ");
1182 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1186 fprintf (vect_dump
, " def_stmt = ");
1187 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1193 /* Case 1: operand is a constant. */
1194 case vect_constant_def
:
1196 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1197 gcc_assert (vector_type
);
1198 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1203 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1204 if (vect_print_dump_info (REPORT_DETAILS
))
1205 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1207 vec_cst
= build_vector_from_val (vector_type
,
1208 fold_convert (TREE_TYPE (vector_type
),
1210 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
1213 /* Case 2: operand is defined outside the loop - loop invariant. */
1214 case vect_external_def
:
1216 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1217 gcc_assert (vector_type
);
1218 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1223 /* Create 'vec_inv = {inv,inv,..,inv}' */
1224 if (vect_print_dump_info (REPORT_DETAILS
))
1225 fprintf (vect_dump
, "Create vector_inv.");
1227 for (i
= nunits
- 1; i
>= 0; --i
)
1229 t
= tree_cons (NULL_TREE
, def
, t
);
1232 /* FIXME: use build_constructor directly. */
1233 vec_inv
= build_constructor_from_list (vector_type
, t
);
1234 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
1237 /* Case 3: operand is defined inside the loop. */
1238 case vect_internal_def
:
1241 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1243 /* Get the def from the vectorized stmt. */
1244 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1246 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1247 /* Get vectorized pattern statement. */
1249 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1250 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1251 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1252 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1253 gcc_assert (vec_stmt
);
1254 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1255 vec_oprnd
= PHI_RESULT (vec_stmt
);
1256 else if (is_gimple_call (vec_stmt
))
1257 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1259 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1263 /* Case 4: operand is defined by a loop header phi - reduction */
1264 case vect_reduction_def
:
1265 case vect_double_reduction_def
:
1266 case vect_nested_cycle
:
1270 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1271 loop
= (gimple_bb (def_stmt
))->loop_father
;
1273 /* Get the def before the loop */
1274 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1275 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1278 /* Case 5: operand is defined by loop-header phi - induction. */
1279 case vect_induction_def
:
1281 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1283 /* Get the def from the vectorized stmt. */
1284 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1285 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1286 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1287 vec_oprnd
= PHI_RESULT (vec_stmt
);
1289 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1299 /* Function vect_get_vec_def_for_stmt_copy
1301 Return a vector-def for an operand. This function is used when the
1302 vectorized stmt to be created (by the caller to this function) is a "copy"
1303 created in case the vectorized result cannot fit in one vector, and several
1304 copies of the vector-stmt are required. In this case the vector-def is
1305 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1306 of the stmt that defines VEC_OPRND.
1307 DT is the type of the vector def VEC_OPRND.
1310 In case the vectorization factor (VF) is bigger than the number
1311 of elements that can fit in a vectype (nunits), we have to generate
1312 more than one vector stmt to vectorize the scalar stmt. This situation
1313 arises when there are multiple data-types operated upon in the loop; the
1314 smallest data-type determines the VF, and as a result, when vectorizing
1315 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1316 vector stmt (each computing a vector of 'nunits' results, and together
1317 computing 'VF' results in each iteration). This function is called when
1318 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1319 which VF=16 and nunits=4, so the number of copies required is 4):
1321 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1323 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1324 VS1.1: vx.1 = memref1 VS1.2
1325 VS1.2: vx.2 = memref2 VS1.3
1326 VS1.3: vx.3 = memref3
1328 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1329 VSnew.1: vz1 = vx.1 + ... VSnew.2
1330 VSnew.2: vz2 = vx.2 + ... VSnew.3
1331 VSnew.3: vz3 = vx.3 + ...
1333 The vectorization of S1 is explained in vectorizable_load.
1334 The vectorization of S2:
1335 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1336 the function 'vect_get_vec_def_for_operand' is called to
1337 get the relevant vector-def for each operand of S2. For operand x it
1338 returns the vector-def 'vx.0'.
1340 To create the remaining copies of the vector-stmt (VSnew.j), this
1341 function is called to get the relevant vector-def for each operand. It is
1342 obtained from the respective VS1.j stmt, which is recorded in the
1343 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1345 For example, to obtain the vector-def 'vx.1' in order to create the
1346 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1347 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1348 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1349 and return its def ('vx.1').
1350 Overall, to create the above sequence this function will be called 3 times:
1351 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1352 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1353 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1356 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1358 gimple vec_stmt_for_operand
;
1359 stmt_vec_info def_stmt_info
;
1361 /* Do nothing; can reuse same def. */
1362 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1365 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1366 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1367 gcc_assert (def_stmt_info
);
1368 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1369 gcc_assert (vec_stmt_for_operand
);
1370 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1371 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1372 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1374 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1379 /* Get vectorized definitions for the operands to create a copy of an original
1380 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1383 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1384 VEC(tree
,heap
) **vec_oprnds0
,
1385 VEC(tree
,heap
) **vec_oprnds1
)
1387 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1389 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1390 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1392 if (vec_oprnds1
&& *vec_oprnds1
)
1394 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1395 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1396 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1401 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1405 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1406 VEC(tree
,heap
) **vec_oprnds0
, VEC(tree
,heap
) **vec_oprnds1
,
1410 vect_get_slp_defs (op0
, op1
, slp_node
, vec_oprnds0
, vec_oprnds1
, -1);
1415 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1416 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1417 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1421 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1422 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1423 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1429 /* Function vect_finish_stmt_generation.
1431 Insert a new stmt. */
1434 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1435 gimple_stmt_iterator
*gsi
)
1437 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1438 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1439 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1441 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1443 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1445 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1448 if (vect_print_dump_info (REPORT_DETAILS
))
1450 fprintf (vect_dump
, "add new stmt: ");
1451 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1454 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1457 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1458 a function declaration if the target has a vectorized version
1459 of the function, or NULL_TREE if the function cannot be vectorized. */
1462 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1464 tree fndecl
= gimple_call_fndecl (call
);
1466 /* We only handle functions that do not read or clobber memory -- i.e.
1467 const or novops ones. */
1468 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1472 || TREE_CODE (fndecl
) != FUNCTION_DECL
1473 || !DECL_BUILT_IN (fndecl
))
1476 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1480 /* Function vectorizable_call.
1482 Check if STMT performs a function call that can be vectorized.
1483 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1484 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1485 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1488 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
)
1493 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1494 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1495 tree vectype_out
, vectype_in
;
1498 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1499 tree fndecl
, new_temp
, def
, rhs_type
;
1501 enum vect_def_type dt
[3]
1502 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1503 gimple new_stmt
= NULL
;
1505 VEC(tree
, heap
) *vargs
= NULL
;
1506 enum { NARROW
, NONE
, WIDEN
} modifier
;
1510 /* FORNOW: unsupported in basic block SLP. */
1511 gcc_assert (loop_vinfo
);
1513 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1516 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1519 /* FORNOW: SLP not supported. */
1520 if (STMT_SLP_TYPE (stmt_info
))
1523 /* Is STMT a vectorizable call? */
1524 if (!is_gimple_call (stmt
))
1527 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1530 if (stmt_can_throw_internal (stmt
))
1533 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1535 /* Process function arguments. */
1536 rhs_type
= NULL_TREE
;
1537 vectype_in
= NULL_TREE
;
1538 nargs
= gimple_call_num_args (stmt
);
1540 /* Bail out if the function has more than three arguments, we do not have
1541 interesting builtin functions to vectorize with more than two arguments
1542 except for fma. No arguments is also not good. */
1543 if (nargs
== 0 || nargs
> 3)
1546 for (i
= 0; i
< nargs
; i
++)
1550 op
= gimple_call_arg (stmt
, i
);
1552 /* We can only handle calls with arguments of the same type. */
1554 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1556 if (vect_print_dump_info (REPORT_DETAILS
))
1557 fprintf (vect_dump
, "argument types differ.");
1561 rhs_type
= TREE_TYPE (op
);
1563 if (!vect_is_simple_use_1 (op
, loop_vinfo
, NULL
,
1564 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1566 if (vect_print_dump_info (REPORT_DETAILS
))
1567 fprintf (vect_dump
, "use not simple.");
1572 vectype_in
= opvectype
;
1574 && opvectype
!= vectype_in
)
1576 if (vect_print_dump_info (REPORT_DETAILS
))
1577 fprintf (vect_dump
, "argument vector types differ.");
1581 /* If all arguments are external or constant defs use a vector type with
1582 the same size as the output vector type. */
1584 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1586 gcc_assert (vectype_in
);
1589 if (vect_print_dump_info (REPORT_DETAILS
))
1591 fprintf (vect_dump
, "no vectype for scalar type ");
1592 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1599 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1600 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1601 if (nunits_in
== nunits_out
/ 2)
1603 else if (nunits_out
== nunits_in
)
1605 else if (nunits_out
== nunits_in
/ 2)
1610 /* For now, we only vectorize functions if a target specific builtin
1611 is available. TODO -- in some cases, it might be profitable to
1612 insert the calls for pieces of the vector, in order to be able
1613 to vectorize other operations in the loop. */
1614 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1615 if (fndecl
== NULL_TREE
)
1617 if (vect_print_dump_info (REPORT_DETAILS
))
1618 fprintf (vect_dump
, "function is not vectorizable.");
1623 gcc_assert (!gimple_vuse (stmt
));
1625 if (modifier
== NARROW
)
1626 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1628 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1630 /* Sanity check: make sure that at least one copy of the vectorized stmt
1631 needs to be generated. */
1632 gcc_assert (ncopies
>= 1);
1634 if (!vec_stmt
) /* transformation not required. */
1636 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1637 if (vect_print_dump_info (REPORT_DETAILS
))
1638 fprintf (vect_dump
, "=== vectorizable_call ===");
1639 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
1645 if (vect_print_dump_info (REPORT_DETAILS
))
1646 fprintf (vect_dump
, "transform call.");
1649 scalar_dest
= gimple_call_lhs (stmt
);
1650 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1652 prev_stmt_info
= NULL
;
1656 for (j
= 0; j
< ncopies
; ++j
)
1658 /* Build argument list for the vectorized call. */
1660 vargs
= VEC_alloc (tree
, heap
, nargs
);
1662 VEC_truncate (tree
, vargs
, 0);
1664 for (i
= 0; i
< nargs
; i
++)
1666 op
= gimple_call_arg (stmt
, i
);
1669 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1672 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1674 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1677 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1680 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1681 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1682 gimple_call_set_lhs (new_stmt
, new_temp
);
1684 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1685 mark_symbols_for_renaming (new_stmt
);
1688 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1690 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1692 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1698 for (j
= 0; j
< ncopies
; ++j
)
1700 /* Build argument list for the vectorized call. */
1702 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1704 VEC_truncate (tree
, vargs
, 0);
1706 for (i
= 0; i
< nargs
; i
++)
1708 op
= gimple_call_arg (stmt
, i
);
1712 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1714 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1718 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1720 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1722 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1725 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1726 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1729 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1730 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1731 gimple_call_set_lhs (new_stmt
, new_temp
);
1733 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1734 mark_symbols_for_renaming (new_stmt
);
1737 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1739 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1741 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1744 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1749 /* No current target implements this case. */
1753 VEC_free (tree
, heap
, vargs
);
1755 /* Update the exception handling table with the vector stmt if necessary. */
1756 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
1757 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
1759 /* The call in STMT might prevent it from being removed in dce.
1760 We however cannot remove it here, due to the way the ssa name
1761 it defines is mapped to the new definition. So just replace
1762 rhs of the statement with something harmless. */
1764 type
= TREE_TYPE (scalar_dest
);
1765 if (is_pattern_stmt_p (stmt_info
))
1766 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
1768 lhs
= gimple_call_lhs (stmt
);
1769 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
1770 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1771 set_vinfo_for_stmt (stmt
, NULL
);
1772 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1773 gsi_replace (gsi
, new_stmt
, false);
1774 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
1780 /* Function vect_gen_widened_results_half
1782 Create a vector stmt whose code, type, number of arguments, and result
1783 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1784 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1785 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1786 needs to be created (DECL is a function-decl of a target-builtin).
1787 STMT is the original scalar stmt that we are vectorizing. */
1790 vect_gen_widened_results_half (enum tree_code code
,
1792 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
1793 tree vec_dest
, gimple_stmt_iterator
*gsi
,
1799 /* Generate half of the widened result: */
1800 if (code
== CALL_EXPR
)
1802 /* Target specific support */
1803 if (op_type
== binary_op
)
1804 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
1806 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
1807 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1808 gimple_call_set_lhs (new_stmt
, new_temp
);
1812 /* Generic support */
1813 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
1814 if (op_type
!= binary_op
)
1816 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
1818 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1819 gimple_assign_set_lhs (new_stmt
, new_temp
);
1821 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1827 /* Check if STMT performs a conversion operation, that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1833 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
1834 gimple
*vec_stmt
, slp_tree slp_node
)
1839 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1840 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1841 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1842 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
1843 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
1847 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
1848 gimple new_stmt
= NULL
;
1849 stmt_vec_info prev_stmt_info
;
1852 tree vectype_out
, vectype_in
;
1856 enum { NARROW
, NONE
, WIDEN
} modifier
;
1858 VEC(tree
,heap
) *vec_oprnds0
= NULL
;
1860 VEC(tree
,heap
) *dummy
= NULL
;
1863 /* Is STMT a vectorizable conversion? */
1865 /* FORNOW: unsupported in basic block SLP. */
1866 gcc_assert (loop_vinfo
);
1868 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1871 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1874 if (!is_gimple_assign (stmt
))
1877 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
1880 code
= gimple_assign_rhs_code (stmt
);
1881 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
1884 /* Check types of lhs and rhs. */
1885 scalar_dest
= gimple_assign_lhs (stmt
);
1886 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1888 op0
= gimple_assign_rhs1 (stmt
);
1889 rhs_type
= TREE_TYPE (op0
);
1890 /* Check the operands of the operation. */
1891 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, NULL
,
1892 &def_stmt
, &def
, &dt
[0], &vectype_in
))
1894 if (vect_print_dump_info (REPORT_DETAILS
))
1895 fprintf (vect_dump
, "use not simple.");
1898 /* If op0 is an external or constant defs use a vector type of
1899 the same size as the output vector type. */
1901 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1903 gcc_assert (vectype_in
);
1906 if (vect_print_dump_info (REPORT_DETAILS
))
1908 fprintf (vect_dump
, "no vectype for scalar type ");
1909 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1916 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1917 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1918 if (nunits_in
== nunits_out
/ 2)
1920 else if (nunits_out
== nunits_in
)
1922 else if (nunits_out
== nunits_in
/ 2)
1927 if (modifier
== NARROW
)
1928 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1930 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1932 /* Multiple types in SLP are handled by creating the appropriate number of
1933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1935 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1938 /* Sanity check: make sure that at least one copy of the vectorized stmt
1939 needs to be generated. */
1940 gcc_assert (ncopies
>= 1);
1942 /* Supportable by target? */
1943 if ((modifier
== NONE
1944 && !targetm
.vectorize
.builtin_conversion (code
, vectype_out
, vectype_in
))
1945 || (modifier
== WIDEN
1946 && !supportable_widening_operation (code
, stmt
,
1947 vectype_out
, vectype_in
,
1950 &dummy_int
, &dummy
))
1951 || (modifier
== NARROW
1952 && !supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
1953 &code1
, &dummy_int
, &dummy
)))
1955 if (vect_print_dump_info (REPORT_DETAILS
))
1956 fprintf (vect_dump
, "conversion not supported by target.");
1960 if (modifier
!= NONE
)
1962 /* FORNOW: SLP not supported. */
1963 if (STMT_SLP_TYPE (stmt_info
))
1967 if (!vec_stmt
) /* transformation not required. */
1969 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
1974 if (vect_print_dump_info (REPORT_DETAILS
))
1975 fprintf (vect_dump
, "transform conversion.");
1978 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1980 if (modifier
== NONE
&& !slp_node
)
1981 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1983 prev_stmt_info
= NULL
;
1987 for (j
= 0; j
< ncopies
; j
++)
1990 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
1992 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
1995 targetm
.vectorize
.builtin_conversion (code
,
1996 vectype_out
, vectype_in
);
1997 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
1999 /* Arguments are ready. create the new vector stmt. */
2000 new_stmt
= gimple_build_call (builtin_decl
, 1, vop0
);
2001 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2002 gimple_call_set_lhs (new_stmt
, new_temp
);
2003 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2005 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2009 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2011 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2012 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2017 /* In case the vectorization factor (VF) is bigger than the number
2018 of elements that we can fit in a vectype (nunits), we have to
2019 generate more than one vector stmt - i.e - we need to "unroll"
2020 the vector stmt by a factor VF/nunits. */
2021 for (j
= 0; j
< ncopies
; j
++)
2024 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2026 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2028 /* Generate first half of the widened result: */
2030 = vect_gen_widened_results_half (code1
, decl1
,
2031 vec_oprnd0
, vec_oprnd1
,
2032 unary_op
, vec_dest
, gsi
, stmt
);
2034 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2036 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2037 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2039 /* Generate second half of the widened result: */
2041 = vect_gen_widened_results_half (code2
, decl2
,
2042 vec_oprnd0
, vec_oprnd1
,
2043 unary_op
, vec_dest
, gsi
, stmt
);
2044 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2045 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2050 /* In case the vectorization factor (VF) is bigger than the number
2051 of elements that we can fit in a vectype (nunits), we have to
2052 generate more than one vector stmt - i.e - we need to "unroll"
2053 the vector stmt by a factor VF/nunits. */
2054 for (j
= 0; j
< ncopies
; j
++)
2059 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2060 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2064 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd1
);
2065 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2068 /* Arguments are ready. Create the new vector stmt. */
2069 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
, vec_oprnd0
,
2071 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2072 gimple_assign_set_lhs (new_stmt
, new_temp
);
2073 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2076 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2078 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2080 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2083 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2087 VEC_free (tree
, heap
, vec_oprnds0
);
2093 /* Function vectorizable_assignment.
2095 Check if STMT performs an assignment (copy) that can be vectorized.
2096 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2097 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2101 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2102 gimple
*vec_stmt
, slp_tree slp_node
)
2107 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2108 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2109 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2113 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2114 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2117 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2119 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2120 gimple new_stmt
= NULL
;
2121 stmt_vec_info prev_stmt_info
= NULL
;
2122 enum tree_code code
;
2125 /* Multiple types in SLP are handled by creating the appropriate number of
2126 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2128 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2131 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2133 gcc_assert (ncopies
>= 1);
2135 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2138 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2141 /* Is vectorizable assignment? */
2142 if (!is_gimple_assign (stmt
))
2145 scalar_dest
= gimple_assign_lhs (stmt
);
2146 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2149 code
= gimple_assign_rhs_code (stmt
);
2150 if (gimple_assign_single_p (stmt
)
2151 || code
== PAREN_EXPR
2152 || CONVERT_EXPR_CODE_P (code
))
2153 op
= gimple_assign_rhs1 (stmt
);
2157 if (code
== VIEW_CONVERT_EXPR
)
2158 op
= TREE_OPERAND (op
, 0);
2160 if (!vect_is_simple_use_1 (op
, loop_vinfo
, bb_vinfo
,
2161 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2163 if (vect_print_dump_info (REPORT_DETAILS
))
2164 fprintf (vect_dump
, "use not simple.");
2168 /* We can handle NOP_EXPR conversions that do not change the number
2169 of elements or the vector size. */
2170 if ((CONVERT_EXPR_CODE_P (code
)
2171 || code
== VIEW_CONVERT_EXPR
)
2173 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2174 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2175 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2178 /* We do not handle bit-precision changes. */
2179 if ((CONVERT_EXPR_CODE_P (code
)
2180 || code
== VIEW_CONVERT_EXPR
)
2181 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2182 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2183 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2184 || ((TYPE_PRECISION (TREE_TYPE (op
))
2185 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2186 /* But a conversion that does not change the bit-pattern is ok. */
2187 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2188 > TYPE_PRECISION (TREE_TYPE (op
)))
2189 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2191 if (vect_print_dump_info (REPORT_DETAILS
))
2192 fprintf (vect_dump
, "type conversion to/from bit-precision "
2197 if (!vec_stmt
) /* transformation not required. */
2199 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2200 if (vect_print_dump_info (REPORT_DETAILS
))
2201 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2202 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2207 if (vect_print_dump_info (REPORT_DETAILS
))
2208 fprintf (vect_dump
, "transform assignment.");
2211 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2214 for (j
= 0; j
< ncopies
; j
++)
2218 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2220 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2222 /* Arguments are ready. create the new vector stmt. */
2223 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2225 if (CONVERT_EXPR_CODE_P (code
)
2226 || code
== VIEW_CONVERT_EXPR
)
2227 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2228 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2229 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2230 gimple_assign_set_lhs (new_stmt
, new_temp
);
2231 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2233 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2240 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2242 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2244 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2247 VEC_free (tree
, heap
, vec_oprnds
);
2252 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2253 either as shift by a scalar or by a vector. */
2256 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2259 enum machine_mode vec_mode
;
2264 vectype
= get_vectype_for_scalar_type (scalar_type
);
2268 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2270 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2272 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2274 || (optab_handler (optab
, TYPE_MODE (vectype
))
2275 == CODE_FOR_nothing
))
2279 vec_mode
= TYPE_MODE (vectype
);
2280 icode
= (int) optab_handler (optab
, vec_mode
);
2281 if (icode
== CODE_FOR_nothing
)
2288 /* Function vectorizable_shift.
2290 Check if STMT performs a shift operation that can be vectorized.
2291 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2292 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2293 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2296 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
2297 gimple
*vec_stmt
, slp_tree slp_node
)
2301 tree op0
, op1
= NULL
;
2302 tree vec_oprnd1
= NULL_TREE
;
2303 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2305 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2306 enum tree_code code
;
2307 enum machine_mode vec_mode
;
2311 enum machine_mode optab_op2_mode
;
2314 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2315 gimple new_stmt
= NULL
;
2316 stmt_vec_info prev_stmt_info
;
2322 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2325 bool scalar_shift_arg
= true;
2326 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2329 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2332 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2335 /* Is STMT a vectorizable binary/unary operation? */
2336 if (!is_gimple_assign (stmt
))
2339 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2342 code
= gimple_assign_rhs_code (stmt
);
2344 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
2345 || code
== RROTATE_EXPR
))
2348 scalar_dest
= gimple_assign_lhs (stmt
);
2349 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2350 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2351 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2353 if (vect_print_dump_info (REPORT_DETAILS
))
2354 fprintf (vect_dump
, "bit-precision shifts not supported.");
2358 op0
= gimple_assign_rhs1 (stmt
);
2359 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, bb_vinfo
,
2360 &def_stmt
, &def
, &dt
[0], &vectype
))
2362 if (vect_print_dump_info (REPORT_DETAILS
))
2363 fprintf (vect_dump
, "use not simple.");
2366 /* If op0 is an external or constant def use a vector type with
2367 the same size as the output vector type. */
2369 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
2371 gcc_assert (vectype
);
2374 if (vect_print_dump_info (REPORT_DETAILS
))
2376 fprintf (vect_dump
, "no vectype for scalar type ");
2377 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
2383 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2384 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
2385 if (nunits_out
!= nunits_in
)
2388 op1
= gimple_assign_rhs2 (stmt
);
2389 if (!vect_is_simple_use (op1
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
, &dt
[1]))
2391 if (vect_print_dump_info (REPORT_DETAILS
))
2392 fprintf (vect_dump
, "use not simple.");
2397 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2401 /* Multiple types in SLP are handled by creating the appropriate number of
2402 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2404 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2407 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2409 gcc_assert (ncopies
>= 1);
2411 /* Determine whether the shift amount is a vector, or scalar. If the
2412 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2414 if (dt
[1] == vect_internal_def
&& !slp_node
)
2415 scalar_shift_arg
= false;
2416 else if (dt
[1] == vect_constant_def
2417 || dt
[1] == vect_external_def
2418 || dt
[1] == vect_internal_def
)
2420 /* In SLP, need to check whether the shift count is the same,
2421 in loops if it is a constant or invariant, it is always
2425 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2428 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
2429 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
2430 scalar_shift_arg
= false;
2435 if (vect_print_dump_info (REPORT_DETAILS
))
2436 fprintf (vect_dump
, "operand mode requires invariant argument.");
2440 /* Vector shifted by vector. */
2441 if (!scalar_shift_arg
)
2443 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2444 if (vect_print_dump_info (REPORT_DETAILS
))
2445 fprintf (vect_dump
, "vector/vector shift/rotate found.");
2447 /* See if the machine has a vector shifted by scalar insn and if not
2448 then see if it has a vector shifted by vector insn. */
2451 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2453 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
2455 if (vect_print_dump_info (REPORT_DETAILS
))
2456 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
2460 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2462 && (optab_handler (optab
, TYPE_MODE (vectype
))
2463 != CODE_FOR_nothing
))
2465 scalar_shift_arg
= false;
2467 if (vect_print_dump_info (REPORT_DETAILS
))
2468 fprintf (vect_dump
, "vector/vector shift/rotate found.");
2470 /* Unlike the other binary operators, shifts/rotates have
2471 the rhs being int, instead of the same type as the lhs,
2472 so make sure the scalar is the right type if we are
2473 dealing with vectors of short/char. */
2474 if (dt
[1] == vect_constant_def
)
2475 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
2480 /* Supportable by target? */
2483 if (vect_print_dump_info (REPORT_DETAILS
))
2484 fprintf (vect_dump
, "no optab.");
2487 vec_mode
= TYPE_MODE (vectype
);
2488 icode
= (int) optab_handler (optab
, vec_mode
);
2489 if (icode
== CODE_FOR_nothing
)
2491 if (vect_print_dump_info (REPORT_DETAILS
))
2492 fprintf (vect_dump
, "op not supported by target.");
2493 /* Check only during analysis. */
2494 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2495 || (vf
< vect_min_worthwhile_factor (code
)
2498 if (vect_print_dump_info (REPORT_DETAILS
))
2499 fprintf (vect_dump
, "proceeding using word mode.");
2502 /* Worthwhile without SIMD support? Check only during analysis. */
2503 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2504 && vf
< vect_min_worthwhile_factor (code
)
2507 if (vect_print_dump_info (REPORT_DETAILS
))
2508 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2512 if (!vec_stmt
) /* transformation not required. */
2514 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
2515 if (vect_print_dump_info (REPORT_DETAILS
))
2516 fprintf (vect_dump
, "=== vectorizable_shift ===");
2517 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2523 if (vect_print_dump_info (REPORT_DETAILS
))
2524 fprintf (vect_dump
, "transform binary/unary operation.");
2527 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2529 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2530 created in the previous stages of the recursion, so no allocation is
2531 needed, except for the case of shift with scalar shift argument. In that
2532 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2533 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2534 In case of loop-based vectorization we allocate VECs of size 1. We
2535 allocate VEC_OPRNDS1 only in case of binary operation. */
2538 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2539 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2541 else if (scalar_shift_arg
)
2542 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2544 prev_stmt_info
= NULL
;
2545 for (j
= 0; j
< ncopies
; j
++)
2550 if (scalar_shift_arg
)
2552 /* Vector shl and shr insn patterns can be defined with scalar
2553 operand 2 (shift operand). In this case, use constant or loop
2554 invariant op1 directly, without extending it to vector mode
2556 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
2557 if (!VECTOR_MODE_P (optab_op2_mode
))
2559 if (vect_print_dump_info (REPORT_DETAILS
))
2560 fprintf (vect_dump
, "operand 1 using scalar mode.");
2562 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2565 /* Store vec_oprnd1 for every vector stmt to be created
2566 for SLP_NODE. We check during the analysis that all
2567 the shift arguments are the same.
2568 TODO: Allow different constants for different vector
2569 stmts generated for an SLP instance. */
2570 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2571 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2576 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2577 (a special case for certain kind of vector shifts); otherwise,
2578 operand 1 should be of a vector type (the usual case). */
2580 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2583 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
2587 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
2589 /* Arguments are ready. Create the new vector stmt. */
2590 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2592 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
2593 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2594 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2595 gimple_assign_set_lhs (new_stmt
, new_temp
);
2596 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2598 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2605 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2607 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2608 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2611 VEC_free (tree
, heap
, vec_oprnds0
);
2612 VEC_free (tree
, heap
, vec_oprnds1
);
2618 /* Function vectorizable_operation.
2620 Check if STMT performs a binary, unary or ternary operation that can
2622 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2623 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2624 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2627 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
2628 gimple
*vec_stmt
, slp_tree slp_node
)
2632 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
2633 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2635 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2636 enum tree_code code
;
2637 enum machine_mode vec_mode
;
2644 enum vect_def_type dt
[3]
2645 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2646 gimple new_stmt
= NULL
;
2647 stmt_vec_info prev_stmt_info
;
2653 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
2654 tree vop0
, vop1
, vop2
;
2655 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2658 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2661 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2664 /* Is STMT a vectorizable binary/unary operation? */
2665 if (!is_gimple_assign (stmt
))
2668 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2671 code
= gimple_assign_rhs_code (stmt
);
2673 /* For pointer addition, we should use the normal plus for
2674 the vector addition. */
2675 if (code
== POINTER_PLUS_EXPR
)
2678 /* Support only unary or binary operations. */
2679 op_type
= TREE_CODE_LENGTH (code
);
2680 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
2682 if (vect_print_dump_info (REPORT_DETAILS
))
2683 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
2688 scalar_dest
= gimple_assign_lhs (stmt
);
2689 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2691 /* Most operations cannot handle bit-precision types without extra
2693 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2694 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2695 /* Exception are bitwise binary operations. */
2696 && code
!= BIT_IOR_EXPR
2697 && code
!= BIT_XOR_EXPR
2698 && code
!= BIT_AND_EXPR
)
2700 if (vect_print_dump_info (REPORT_DETAILS
))
2701 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
2705 op0
= gimple_assign_rhs1 (stmt
);
2706 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, bb_vinfo
,
2707 &def_stmt
, &def
, &dt
[0], &vectype
))
2709 if (vect_print_dump_info (REPORT_DETAILS
))
2710 fprintf (vect_dump
, "use not simple.");
2713 /* If op0 is an external or constant def use a vector type with
2714 the same size as the output vector type. */
2716 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
2718 gcc_assert (vectype
);
2721 if (vect_print_dump_info (REPORT_DETAILS
))
2723 fprintf (vect_dump
, "no vectype for scalar type ");
2724 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
2730 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2731 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
2732 if (nunits_out
!= nunits_in
)
2735 if (op_type
== binary_op
|| op_type
== ternary_op
)
2737 op1
= gimple_assign_rhs2 (stmt
);
2738 if (!vect_is_simple_use (op1
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2741 if (vect_print_dump_info (REPORT_DETAILS
))
2742 fprintf (vect_dump
, "use not simple.");
2746 if (op_type
== ternary_op
)
2748 op2
= gimple_assign_rhs3 (stmt
);
2749 if (!vect_is_simple_use (op2
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
2752 if (vect_print_dump_info (REPORT_DETAILS
))
2753 fprintf (vect_dump
, "use not simple.");
2759 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2763 /* Multiple types in SLP are handled by creating the appropriate number of
2764 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2766 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2769 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2771 gcc_assert (ncopies
>= 1);
2773 /* Shifts are handled in vectorizable_shift (). */
2774 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
2775 || code
== RROTATE_EXPR
)
2778 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
2780 /* Supportable by target? */
2783 if (vect_print_dump_info (REPORT_DETAILS
))
2784 fprintf (vect_dump
, "no optab.");
2787 vec_mode
= TYPE_MODE (vectype
);
2788 icode
= (int) optab_handler (optab
, vec_mode
);
2789 if (icode
== CODE_FOR_nothing
)
2791 if (vect_print_dump_info (REPORT_DETAILS
))
2792 fprintf (vect_dump
, "op not supported by target.");
2793 /* Check only during analysis. */
2794 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
2795 || (vf
< vect_min_worthwhile_factor (code
)
2798 if (vect_print_dump_info (REPORT_DETAILS
))
2799 fprintf (vect_dump
, "proceeding using word mode.");
2802 /* Worthwhile without SIMD support? Check only during analysis. */
2803 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2804 && vf
< vect_min_worthwhile_factor (code
)
2807 if (vect_print_dump_info (REPORT_DETAILS
))
2808 fprintf (vect_dump
, "not worthwhile without SIMD support.");
2812 if (!vec_stmt
) /* transformation not required. */
2814 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
2815 if (vect_print_dump_info (REPORT_DETAILS
))
2816 fprintf (vect_dump
, "=== vectorizable_operation ===");
2817 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2823 if (vect_print_dump_info (REPORT_DETAILS
))
2824 fprintf (vect_dump
, "transform binary/unary operation.");
2827 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2829 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2830 created in the previous stages of the recursion, so no allocation is
2831 needed, except for the case of shift with scalar shift argument. In that
2832 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2833 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2834 In case of loop-based vectorization we allocate VECs of size 1. We
2835 allocate VEC_OPRNDS1 only in case of binary operation. */
2838 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2839 if (op_type
== binary_op
|| op_type
== ternary_op
)
2840 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2841 if (op_type
== ternary_op
)
2842 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
2845 /* In case the vectorization factor (VF) is bigger than the number
2846 of elements that we can fit in a vectype (nunits), we have to generate
2847 more than one vector stmt - i.e - we need to "unroll" the
2848 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2849 from one copy of the vector stmt to the next, in the field
2850 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2851 stages to find the correct vector defs to be used when vectorizing
2852 stmts that use the defs of the current stmt. The example below
2853 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2854 we need to create 4 vectorized stmts):
2856 before vectorization:
2857 RELATED_STMT VEC_STMT
2861 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2863 RELATED_STMT VEC_STMT
2864 VS1_0: vx0 = memref0 VS1_1 -
2865 VS1_1: vx1 = memref1 VS1_2 -
2866 VS1_2: vx2 = memref2 VS1_3 -
2867 VS1_3: vx3 = memref3 - -
2868 S1: x = load - VS1_0
2871 step2: vectorize stmt S2 (done here):
2872 To vectorize stmt S2 we first need to find the relevant vector
2873 def for the first operand 'x'. This is, as usual, obtained from
2874 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2875 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2876 relevant vector def 'vx0'. Having found 'vx0' we can generate
2877 the vector stmt VS2_0, and as usual, record it in the
2878 STMT_VINFO_VEC_STMT of stmt S2.
2879 When creating the second copy (VS2_1), we obtain the relevant vector
2880 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2881 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2882 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2883 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2884 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2885 chain of stmts and pointers:
2886 RELATED_STMT VEC_STMT
2887 VS1_0: vx0 = memref0 VS1_1 -
2888 VS1_1: vx1 = memref1 VS1_2 -
2889 VS1_2: vx2 = memref2 VS1_3 -
2890 VS1_3: vx3 = memref3 - -
2891 S1: x = load - VS1_0
2892 VS2_0: vz0 = vx0 + v1 VS2_1 -
2893 VS2_1: vz1 = vx1 + v1 VS2_2 -
2894 VS2_2: vz2 = vx2 + v1 VS2_3 -
2895 VS2_3: vz3 = vx3 + v1 - -
2896 S2: z = x + 1 - VS2_0 */
2898 prev_stmt_info
= NULL
;
2899 for (j
= 0; j
< ncopies
; j
++)
2904 if (op_type
== binary_op
|| op_type
== ternary_op
)
2905 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
2908 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2910 if (op_type
== ternary_op
)
2912 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
2913 VEC_quick_push (tree
, vec_oprnds2
,
2914 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
2919 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
2920 if (op_type
== ternary_op
)
2922 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
2923 VEC_quick_push (tree
, vec_oprnds2
,
2924 vect_get_vec_def_for_stmt_copy (dt
[2],
2929 /* Arguments are ready. Create the new vector stmt. */
2930 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2932 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
2933 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
2934 vop2
= ((op_type
== ternary_op
)
2935 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
2936 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
2938 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2939 gimple_assign_set_lhs (new_stmt
, new_temp
);
2940 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2942 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2949 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2952 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2955 VEC_free (tree
, heap
, vec_oprnds0
);
2957 VEC_free (tree
, heap
, vec_oprnds1
);
2959 VEC_free (tree
, heap
, vec_oprnds2
);
2965 /* Get vectorized definitions for loop-based vectorization. For the first
2966 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2967 scalar operand), and for the rest we get a copy with
2968 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2969 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2970 The vectors are collected into VEC_OPRNDS. */
2973 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2974 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2978 /* Get first vector operand. */
2979 /* All the vector operands except the very first one (that is scalar oprnd)
2981 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2982 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2984 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2986 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2988 /* Get second vector operand. */
2989 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2990 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2994 /* For conversion in multiple steps, continue to get operands
2997 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3001 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3002 For multi-step conversions store the resulting vectors and call the function
3006 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
3007 int multi_step_cvt
, gimple stmt
,
3008 VEC (tree
, heap
) *vec_dsts
,
3009 gimple_stmt_iterator
*gsi
,
3010 slp_tree slp_node
, enum tree_code code
,
3011 stmt_vec_info
*prev_stmt_info
)
3014 tree vop0
, vop1
, new_tmp
, vec_dest
;
3016 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3018 vec_dest
= VEC_pop (tree
, vec_dsts
);
3020 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
3022 /* Create demotion operation. */
3023 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
3024 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
3025 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3026 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3027 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3028 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3031 /* Store the resulting vector for next recursive call. */
3032 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
3035 /* This is the last step of the conversion sequence. Store the
3036 vectors in SLP_NODE or in vector info of the scalar statement
3037 (or in STMT_VINFO_RELATED_STMT chain). */
3039 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3042 if (!*prev_stmt_info
)
3043 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3045 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3047 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3052 /* For multi-step demotion operations we first generate demotion operations
3053 from the source type to the intermediate types, and then combine the
3054 results (stored in VEC_OPRNDS) in demotion operation to the destination
3058 /* At each level of recursion we have have of the operands we had at the
3060 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
3061 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3062 stmt
, vec_dsts
, gsi
, slp_node
,
3063 code
, prev_stmt_info
);
3068 /* Function vectorizable_type_demotion
3070 Check if STMT performs a binary or unary operation that involves
3071 type demotion, and if it can be vectorized.
3072 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3073 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3074 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3077 vectorizable_type_demotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3078 gimple
*vec_stmt
, slp_tree slp_node
)
3083 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3084 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3085 enum tree_code code
, code1
= ERROR_MARK
;
3088 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3089 stmt_vec_info prev_stmt_info
;
3096 int multi_step_cvt
= 0;
3097 VEC (tree
, heap
) *vec_oprnds0
= NULL
;
3098 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
3099 tree last_oprnd
, intermediate_type
;
3100 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3102 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3105 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3108 /* Is STMT a vectorizable type-demotion operation? */
3109 if (!is_gimple_assign (stmt
))
3112 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3115 code
= gimple_assign_rhs_code (stmt
);
3116 if (!CONVERT_EXPR_CODE_P (code
))
3119 scalar_dest
= gimple_assign_lhs (stmt
);
3120 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3122 /* Check the operands of the operation. */
3123 op0
= gimple_assign_rhs1 (stmt
);
3124 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3125 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
3126 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
3127 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
)))))
3130 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3131 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3132 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3133 || ((TYPE_PRECISION (TREE_TYPE (op0
))
3134 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0
)))))))
3136 if (vect_print_dump_info (REPORT_DETAILS
))
3137 fprintf (vect_dump
, "type demotion to/from bit-precision unsupported.");
3141 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, bb_vinfo
,
3142 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3144 if (vect_print_dump_info (REPORT_DETAILS
))
3145 fprintf (vect_dump
, "use not simple.");
3148 /* If op0 is an external def use a vector type with the
3149 same size as the output vector type if possible. */
3151 vectype_in
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3153 gcc_assert (vectype_in
);
3156 if (vect_print_dump_info (REPORT_DETAILS
))
3158 fprintf (vect_dump
, "no vectype for scalar type ");
3159 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3165 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3166 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3167 if (nunits_in
>= nunits_out
)
3170 /* Multiple types in SLP are handled by creating the appropriate number of
3171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3173 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3176 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3177 gcc_assert (ncopies
>= 1);
3179 /* Supportable by target? */
3180 if (!supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3181 &code1
, &multi_step_cvt
, &interm_types
))
3184 if (!vec_stmt
) /* transformation not required. */
3186 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3187 if (vect_print_dump_info (REPORT_DETAILS
))
3188 fprintf (vect_dump
, "=== vectorizable_demotion ===");
3189 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3194 if (vect_print_dump_info (REPORT_DETAILS
))
3195 fprintf (vect_dump
, "transform type demotion operation. ncopies = %d.",
3198 /* In case of multi-step demotion, we first generate demotion operations to
3199 the intermediate types, and then from that types to the final one.
3200 We create vector destinations for the intermediate type (TYPES) received
3201 from supportable_narrowing_operation, and store them in the correct order
3202 for future use in vect_create_vectorized_demotion_stmts(). */
3204 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
3206 vec_dsts
= VEC_alloc (tree
, heap
, 1);
3208 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3209 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
3213 for (i
= VEC_length (tree
, interm_types
) - 1;
3214 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
3216 vec_dest
= vect_create_destination_var (scalar_dest
,
3218 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
3222 /* In case the vectorization factor (VF) is bigger than the number
3223 of elements that we can fit in a vectype (nunits), we have to generate
3224 more than one vector stmt - i.e - we need to "unroll" the
3225 vector stmt by a factor VF/nunits. */
3227 prev_stmt_info
= NULL
;
3228 for (j
= 0; j
< ncopies
; j
++)
3232 vect_get_slp_defs (op0
, NULL_TREE
, slp_node
, &vec_oprnds0
, NULL
, -1);
3235 VEC_free (tree
, heap
, vec_oprnds0
);
3236 vec_oprnds0
= VEC_alloc (tree
, heap
,
3237 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) * 2 : 2));
3238 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3239 vect_pow2 (multi_step_cvt
) - 1);
3242 /* Arguments are ready. Create the new vector stmts. */
3243 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
3244 vect_create_vectorized_demotion_stmts (&vec_oprnds0
,
3245 multi_step_cvt
, stmt
, tmp_vec_dsts
,
3246 gsi
, slp_node
, code1
,
3250 VEC_free (tree
, heap
, vec_oprnds0
);
3251 VEC_free (tree
, heap
, vec_dsts
);
3252 VEC_free (tree
, heap
, tmp_vec_dsts
);
3253 VEC_free (tree
, heap
, interm_types
);
3255 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3260 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3261 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3262 the resulting vectors and call the function recursively. */
3265 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
3266 VEC (tree
, heap
) **vec_oprnds1
,
3267 int multi_step_cvt
, gimple stmt
,
3268 VEC (tree
, heap
) *vec_dsts
,
3269 gimple_stmt_iterator
*gsi
,
3270 slp_tree slp_node
, enum tree_code code1
,
3271 enum tree_code code2
, tree decl1
,
3272 tree decl2
, int op_type
,
3273 stmt_vec_info
*prev_stmt_info
)
3276 tree vop0
, vop1
, new_tmp1
, new_tmp2
, vec_dest
;
3277 gimple new_stmt1
, new_stmt2
;
3278 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3279 VEC (tree
, heap
) *vec_tmp
;
3281 vec_dest
= VEC_pop (tree
, vec_dsts
);
3282 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
3284 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
3286 if (op_type
== binary_op
)
3287 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
3291 /* Generate the two halves of promotion operation. */
3292 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3293 op_type
, vec_dest
, gsi
, stmt
);
3294 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3295 op_type
, vec_dest
, gsi
, stmt
);
3296 if (is_gimple_call (new_stmt1
))
3298 new_tmp1
= gimple_call_lhs (new_stmt1
);
3299 new_tmp2
= gimple_call_lhs (new_stmt2
);
3303 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3304 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3309 /* Store the results for the recursive call. */
3310 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
3311 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
3315 /* Last step of promotion sequience - store the results. */
3318 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt1
);
3319 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt2
);
3323 if (!*prev_stmt_info
)
3324 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt1
;
3326 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt1
;
3328 *prev_stmt_info
= vinfo_for_stmt (new_stmt1
);
3329 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt2
;
3330 *prev_stmt_info
= vinfo_for_stmt (new_stmt2
);
3337 /* For multi-step promotion operation we first generate we call the
3338 function recurcively for every stage. We start from the input type,
3339 create promotion operations to the intermediate types, and then
3340 create promotions to the output type. */
3341 *vec_oprnds0
= VEC_copy (tree
, heap
, vec_tmp
);
3342 vect_create_vectorized_promotion_stmts (vec_oprnds0
, vec_oprnds1
,
3343 multi_step_cvt
- 1, stmt
,
3344 vec_dsts
, gsi
, slp_node
, code1
,
3345 code2
, decl2
, decl2
, op_type
,
3349 VEC_free (tree
, heap
, vec_tmp
);
3353 /* Function vectorizable_type_promotion
3355 Check if STMT performs a binary or unary operation that involves
3356 type promotion, and if it can be vectorized.
3357 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3358 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3359 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3362 vectorizable_type_promotion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3363 gimple
*vec_stmt
, slp_tree slp_node
)
3367 tree op0
, op1
= NULL
;
3368 tree vec_oprnd0
=NULL
, vec_oprnd1
=NULL
;
3369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3370 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3371 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3372 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3376 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3377 stmt_vec_info prev_stmt_info
;
3384 tree intermediate_type
= NULL_TREE
;
3385 int multi_step_cvt
= 0;
3386 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3387 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
, *tmp_vec_dsts
= NULL
;
3388 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3391 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3394 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3397 /* Is STMT a vectorizable type-promotion operation? */
3398 if (!is_gimple_assign (stmt
))
3401 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3404 code
= gimple_assign_rhs_code (stmt
);
3405 if (!CONVERT_EXPR_CODE_P (code
)
3406 && code
!= WIDEN_MULT_EXPR
3407 && code
!= WIDEN_LSHIFT_EXPR
)
3410 scalar_dest
= gimple_assign_lhs (stmt
);
3411 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3413 /* Check the operands of the operation. */
3414 op0
= gimple_assign_rhs1 (stmt
);
3415 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3416 && INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
3417 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest
))
3418 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0
))
3419 && CONVERT_EXPR_CODE_P (code
))))
3422 if (INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
3423 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3424 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3425 || ((TYPE_PRECISION (TREE_TYPE (op0
))
3426 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op0
)))))))
3428 if (vect_print_dump_info (REPORT_DETAILS
))
3429 fprintf (vect_dump
, "type promotion to/from bit-precision "
3434 if (!vect_is_simple_use_1 (op0
, loop_vinfo
, bb_vinfo
,
3435 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3437 if (vect_print_dump_info (REPORT_DETAILS
))
3438 fprintf (vect_dump
, "use not simple.");
3442 op_type
= TREE_CODE_LENGTH (code
);
3443 if (op_type
== binary_op
)
3447 op1
= gimple_assign_rhs2 (stmt
);
3448 if (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
)
3450 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3452 if (CONSTANT_CLASS_P (op0
))
3453 ok
= vect_is_simple_use_1 (op1
, loop_vinfo
, NULL
,
3454 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3456 ok
= vect_is_simple_use (op1
, loop_vinfo
, NULL
, &def_stmt
, &def
,
3461 if (vect_print_dump_info (REPORT_DETAILS
))
3462 fprintf (vect_dump
, "use not simple.");
3468 /* If op0 is an external or constant def use a vector type with
3469 the same size as the output vector type. */
3471 vectype_in
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3473 gcc_assert (vectype_in
);
3476 if (vect_print_dump_info (REPORT_DETAILS
))
3478 fprintf (vect_dump
, "no vectype for scalar type ");
3479 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3485 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3486 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3487 if (nunits_in
<= nunits_out
)
3490 /* Multiple types in SLP are handled by creating the appropriate number of
3491 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3493 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3496 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3498 gcc_assert (ncopies
>= 1);
3500 /* Supportable by target? */
3501 if (!supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3502 &decl1
, &decl2
, &code1
, &code2
,
3503 &multi_step_cvt
, &interm_types
))
3506 /* Binary widening operation can only be supported directly by the
3508 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3510 if (!vec_stmt
) /* transformation not required. */
3512 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3513 if (vect_print_dump_info (REPORT_DETAILS
))
3514 fprintf (vect_dump
, "=== vectorizable_promotion ===");
3515 vect_model_simple_cost (stmt_info
, 2*ncopies
, dt
, NULL
);
3521 if (vect_print_dump_info (REPORT_DETAILS
))
3522 fprintf (vect_dump
, "transform type promotion operation. ncopies = %d.",
3525 if (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
)
3527 if (CONSTANT_CLASS_P (op0
))
3528 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3529 else if (CONSTANT_CLASS_P (op1
))
3530 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3534 /* In case of multi-step promotion, we first generate promotion operations
3535 to the intermediate types, and then from that types to the final one.
3536 We store vector destination in VEC_DSTS in the correct order for
3537 recursive creation of promotion operations in
3538 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3539 according to TYPES recieved from supportable_widening_operation(). */
3541 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
3543 vec_dsts
= VEC_alloc (tree
, heap
, 1);
3545 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3546 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
3550 for (i
= VEC_length (tree
, interm_types
) - 1;
3551 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
3553 vec_dest
= vect_create_destination_var (scalar_dest
,
3555 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
3561 vec_oprnds0
= VEC_alloc (tree
, heap
,
3562 (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3563 if (op_type
== binary_op
)
3564 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3566 else if (code
== WIDEN_LSHIFT_EXPR
)
3567 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3569 /* In case the vectorization factor (VF) is bigger than the number
3570 of elements that we can fit in a vectype (nunits), we have to generate
3571 more than one vector stmt - i.e - we need to "unroll" the
3572 vector stmt by a factor VF/nunits. */
3574 prev_stmt_info
= NULL
;
3575 for (j
= 0; j
< ncopies
; j
++)
3582 if (code
== WIDEN_LSHIFT_EXPR
)
3585 /* Store vec_oprnd1 for every vector stmt to be created
3586 for SLP_NODE. We check during the analysis that all
3587 the shift arguments are the same. */
3588 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3589 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3591 vect_get_slp_defs (op0
, NULL_TREE
, slp_node
, &vec_oprnds0
, NULL
,
3595 vect_get_slp_defs (op0
, op1
, slp_node
, &vec_oprnds0
,
3600 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3601 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
3602 if (op_type
== binary_op
)
3604 if (code
== WIDEN_LSHIFT_EXPR
)
3607 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
3608 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3614 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3615 VEC_replace (tree
, vec_oprnds0
, 0, vec_oprnd0
);
3616 if (op_type
== binary_op
)
3618 if (code
== WIDEN_LSHIFT_EXPR
)
3621 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd1
);
3622 VEC_replace (tree
, vec_oprnds1
, 0, vec_oprnd1
);
3626 /* Arguments are ready. Create the new vector stmts. */
3627 tmp_vec_dsts
= VEC_copy (tree
, heap
, vec_dsts
);
3628 vect_create_vectorized_promotion_stmts (&vec_oprnds0
, &vec_oprnds1
,
3629 multi_step_cvt
, stmt
,
3631 gsi
, slp_node
, code1
, code2
,
3632 decl1
, decl2
, op_type
,
3636 VEC_free (tree
, heap
, vec_dsts
);
3637 VEC_free (tree
, heap
, tmp_vec_dsts
);
3638 VEC_free (tree
, heap
, interm_types
);
3639 VEC_free (tree
, heap
, vec_oprnds0
);
3640 VEC_free (tree
, heap
, vec_oprnds1
);
3642 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3647 /* Function vectorizable_store.
3649 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3656 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3662 tree vec_oprnd
= NULL_TREE
;
3663 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3664 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3667 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3668 struct loop
*loop
= NULL
;
3669 enum machine_mode vec_mode
;
3671 enum dr_alignment_support alignment_support_scheme
;
3674 enum vect_def_type dt
;
3675 stmt_vec_info prev_stmt_info
= NULL
;
3676 tree dataref_ptr
= NULL_TREE
;
3677 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3680 gimple next_stmt
, first_stmt
= NULL
;
3681 bool strided_store
= false;
3682 bool store_lanes_p
= false;
3683 unsigned int group_size
, i
;
3684 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3686 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3687 bool slp
= (slp_node
!= NULL
);
3688 unsigned int vec_num
;
3689 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3693 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3695 /* Multiple types in SLP are handled by creating the appropriate number of
3696 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3698 if (slp
|| PURE_SLP_STMT (stmt_info
))
3701 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3703 gcc_assert (ncopies
>= 1);
3705 /* FORNOW. This restriction should be relaxed. */
3706 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3708 if (vect_print_dump_info (REPORT_DETAILS
))
3709 fprintf (vect_dump
, "multiple types in nested loop.");
3713 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3716 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3719 /* Is vectorizable store? */
3721 if (!is_gimple_assign (stmt
))
3724 scalar_dest
= gimple_assign_lhs (stmt
);
3725 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3726 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3727 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3728 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3729 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3730 && TREE_CODE (scalar_dest
) != MEM_REF
)
3733 gcc_assert (gimple_assign_single_p (stmt
));
3734 op
= gimple_assign_rhs1 (stmt
);
3735 if (!vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
, &dt
))
3737 if (vect_print_dump_info (REPORT_DETAILS
))
3738 fprintf (vect_dump
, "use not simple.");
3742 elem_type
= TREE_TYPE (vectype
);
3743 vec_mode
= TYPE_MODE (vectype
);
3745 /* FORNOW. In some cases can vectorize even if data-type not supported
3746 (e.g. - array initialization with 0). */
3747 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3750 if (!STMT_VINFO_DATA_REF (stmt_info
))
3753 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
3755 if (vect_print_dump_info (REPORT_DETAILS
))
3756 fprintf (vect_dump
, "negative step for store.");
3760 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
3762 strided_store
= true;
3763 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3764 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3766 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3767 if (vect_store_lanes_supported (vectype
, group_size
))
3768 store_lanes_p
= true;
3769 else if (!vect_strided_store_supported (vectype
, group_size
))
3773 if (first_stmt
== stmt
)
3775 /* STMT is the leader of the group. Check the operands of all the
3776 stmts of the group. */
3777 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3780 gcc_assert (gimple_assign_single_p (next_stmt
));
3781 op
= gimple_assign_rhs1 (next_stmt
);
3782 if (!vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3785 if (vect_print_dump_info (REPORT_DETAILS
))
3786 fprintf (vect_dump
, "use not simple.");
3789 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3794 if (!vec_stmt
) /* transformation not required. */
3796 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3797 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
, NULL
);
3805 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3806 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3808 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3811 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3813 /* We vectorize all the stmts of the interleaving group when we
3814 reach the last stmt in the group. */
3815 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3816 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3825 strided_store
= false;
3826 /* VEC_NUM is the number of vect stmts to be created for this
3828 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3829 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3830 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3833 /* VEC_NUM is the number of vect stmts to be created for this
3835 vec_num
= group_size
;
3841 group_size
= vec_num
= 1;
3844 if (vect_print_dump_info (REPORT_DETAILS
))
3845 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3847 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3848 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3850 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3851 gcc_assert (alignment_support_scheme
);
3852 /* Targets with store-lane instructions must not require explicit
3854 gcc_assert (!store_lanes_p
3855 || alignment_support_scheme
== dr_aligned
3856 || alignment_support_scheme
== dr_unaligned_supported
);
3859 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3861 aggr_type
= vectype
;
3863 /* In case the vectorization factor (VF) is bigger than the number
3864 of elements that we can fit in a vectype (nunits), we have to generate
3865 more than one vector stmt - i.e - we need to "unroll" the
3866 vector stmt by a factor VF/nunits. For more details see documentation in
3867 vect_get_vec_def_for_copy_stmt. */
3869 /* In case of interleaving (non-unit strided access):
3876 We create vectorized stores starting from base address (the access of the
3877 first stmt in the chain (S2 in the above example), when the last store stmt
3878 of the chain (S4) is reached:
3881 VS2: &base + vec_size*1 = vx0
3882 VS3: &base + vec_size*2 = vx1
3883 VS4: &base + vec_size*3 = vx3
3885 Then permutation statements are generated:
3887 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3888 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3891 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3892 (the order of the data-refs in the output of vect_permute_store_chain
3893 corresponds to the order of scalar stmts in the interleaving chain - see
3894 the documentation of vect_permute_store_chain()).
3896 In case of both multiple types and interleaving, above vector stores and
3897 permutation stmts are created for every copy. The result vector stmts are
3898 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3899 STMT_VINFO_RELATED_STMT for the next copies.
3902 prev_stmt_info
= NULL
;
3903 for (j
= 0; j
< ncopies
; j
++)
3912 /* Get vectorized arguments for SLP_NODE. */
3913 vect_get_slp_defs (NULL_TREE
, NULL_TREE
, slp_node
, &vec_oprnds
,
3916 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
3920 /* For interleaved stores we collect vectorized defs for all the
3921 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3922 used as an input to vect_permute_store_chain(), and OPRNDS as
3923 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3925 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3926 OPRNDS are of size 1. */
3927 next_stmt
= first_stmt
;
3928 for (i
= 0; i
< group_size
; i
++)
3930 /* Since gaps are not supported for interleaved stores,
3931 GROUP_SIZE is the exact number of stmts in the chain.
3932 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3933 there is no interleaving, GROUP_SIZE is 1, and only one
3934 iteration of the loop will be executed. */
3935 gcc_assert (next_stmt
3936 && gimple_assign_single_p (next_stmt
));
3937 op
= gimple_assign_rhs1 (next_stmt
);
3939 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
3941 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
3942 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
3943 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3947 /* We should have catched mismatched types earlier. */
3948 gcc_assert (useless_type_conversion_p (vectype
,
3949 TREE_TYPE (vec_oprnd
)));
3950 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
3951 NULL_TREE
, &dummy
, gsi
,
3952 &ptr_incr
, false, &inv_p
);
3953 gcc_assert (bb_vinfo
|| !inv_p
);
3957 /* For interleaved stores we created vectorized defs for all the
3958 defs stored in OPRNDS in the previous iteration (previous copy).
3959 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3960 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3962 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3963 OPRNDS are of size 1. */
3964 for (i
= 0; i
< group_size
; i
++)
3966 op
= VEC_index (tree
, oprnds
, i
);
3967 vect_is_simple_use (op
, loop_vinfo
, bb_vinfo
, &def_stmt
, &def
,
3969 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
3970 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
3971 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
3973 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
3974 TYPE_SIZE_UNIT (aggr_type
));
3981 /* Combine all the vectors into an array. */
3982 vec_array
= create_vector_array (vectype
, vec_num
);
3983 for (i
= 0; i
< vec_num
; i
++)
3985 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
3986 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
3990 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3991 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
3992 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
3993 gimple_call_set_lhs (new_stmt
, data_ref
);
3994 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3995 mark_symbols_for_renaming (new_stmt
);
4002 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4004 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4008 next_stmt
= first_stmt
;
4009 for (i
= 0; i
< vec_num
; i
++)
4011 struct ptr_info_def
*pi
;
4014 /* Bump the vector pointer. */
4015 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4019 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4020 else if (strided_store
)
4021 /* For strided stores vectorized defs are interleaved in
4022 vect_permute_store_chain(). */
4023 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4025 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4026 build_int_cst (reference_alias_ptr_type
4027 (DR_REF (first_dr
)), 0));
4028 pi
= get_ptr_info (dataref_ptr
);
4029 pi
->align
= TYPE_ALIGN_UNIT (vectype
);
4030 if (aligned_access_p (first_dr
))
4032 else if (DR_MISALIGNMENT (first_dr
) == -1)
4034 TREE_TYPE (data_ref
)
4035 = build_aligned_type (TREE_TYPE (data_ref
),
4036 TYPE_ALIGN (elem_type
));
4037 pi
->align
= TYPE_ALIGN_UNIT (elem_type
);
4042 TREE_TYPE (data_ref
)
4043 = build_aligned_type (TREE_TYPE (data_ref
),
4044 TYPE_ALIGN (elem_type
));
4045 pi
->misalign
= DR_MISALIGNMENT (first_dr
);
4048 /* Arguments are ready. Create the new vector stmt. */
4049 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4050 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4051 mark_symbols_for_renaming (new_stmt
);
4056 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4064 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4066 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4067 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4071 VEC_free (tree
, heap
, dr_chain
);
4072 VEC_free (tree
, heap
, oprnds
);
4074 VEC_free (tree
, heap
, result_chain
);
4076 VEC_free (tree
, heap
, vec_oprnds
);
4081 /* Given a vector type VECTYPE returns a builtin DECL to be used
4082 for vector permutation and returns the mask that implements
4083 reversal of the vector elements. If that is impossible to do,
4087 perm_mask_for_reverse (tree vectype
)
4089 tree mask_element_type
, mask_type
, mask_vec
= NULL
;
4092 if (!can_vec_perm_expr_p (vectype
, NULL_TREE
))
4096 = lang_hooks
.types
.type_for_size
4097 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype
))), 1);
4098 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
4099 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4101 for (i
= 0; i
< nunits
; i
++)
4102 mask_vec
= tree_cons (NULL
, build_int_cst (mask_element_type
, i
), mask_vec
);
4103 mask_vec
= build_vector (mask_type
, mask_vec
);
4105 if (!can_vec_perm_expr_p (vectype
, mask_vec
))
4111 /* Given a vector variable X, that was generated for the scalar LHS of
4112 STMT, generate instructions to reverse the vector elements of X,
4113 insert them a *GSI and return the permuted vector variable. */
4116 reverse_vec_elements (tree x
, gimple stmt
, gimple_stmt_iterator
*gsi
)
4118 tree vectype
= TREE_TYPE (x
);
4119 tree mask_vec
, perm_dest
, data_ref
;
4122 mask_vec
= perm_mask_for_reverse (vectype
);
4124 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4126 /* Generate the permute statement. */
4127 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
4129 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
4130 gimple_set_lhs (perm_stmt
, data_ref
);
4131 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4136 /* vectorizable_load.
4138 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4140 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4141 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4142 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4145 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4146 slp_tree slp_node
, slp_instance slp_node_instance
)
4149 tree vec_dest
= NULL
;
4150 tree data_ref
= NULL
;
4151 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4152 stmt_vec_info prev_stmt_info
;
4153 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4154 struct loop
*loop
= NULL
;
4155 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4156 bool nested_in_vect_loop
= false;
4157 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4158 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4161 enum machine_mode mode
;
4162 gimple new_stmt
= NULL
;
4164 enum dr_alignment_support alignment_support_scheme
;
4165 tree dataref_ptr
= NULL_TREE
;
4167 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4169 int i
, j
, group_size
;
4170 tree msq
= NULL_TREE
, lsq
;
4171 tree offset
= NULL_TREE
;
4172 tree realignment_token
= NULL_TREE
;
4174 VEC(tree
,heap
) *dr_chain
= NULL
;
4175 bool strided_load
= false;
4176 bool load_lanes_p
= false;
4180 bool compute_in_loop
= false;
4181 struct loop
*at_loop
;
4183 bool slp
= (slp_node
!= NULL
);
4184 bool slp_perm
= false;
4185 enum tree_code code
;
4186 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4192 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4193 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4194 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4199 /* Multiple types in SLP are handled by creating the appropriate number of
4200 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4202 if (slp
|| PURE_SLP_STMT (stmt_info
))
4205 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4207 gcc_assert (ncopies
>= 1);
4209 /* FORNOW. This restriction should be relaxed. */
4210 if (nested_in_vect_loop
&& ncopies
> 1)
4212 if (vect_print_dump_info (REPORT_DETAILS
))
4213 fprintf (vect_dump
, "multiple types in nested loop.");
4217 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4220 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4223 /* Is vectorizable load? */
4224 if (!is_gimple_assign (stmt
))
4227 scalar_dest
= gimple_assign_lhs (stmt
);
4228 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4231 code
= gimple_assign_rhs_code (stmt
);
4232 if (code
!= ARRAY_REF
4233 && code
!= INDIRECT_REF
4234 && code
!= COMPONENT_REF
4235 && code
!= IMAGPART_EXPR
4236 && code
!= REALPART_EXPR
4238 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4241 if (!STMT_VINFO_DATA_REF (stmt_info
))
4244 negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
4245 if (negative
&& ncopies
> 1)
4247 if (vect_print_dump_info (REPORT_DETAILS
))
4248 fprintf (vect_dump
, "multiple types with negative step.");
4252 elem_type
= TREE_TYPE (vectype
);
4253 mode
= TYPE_MODE (vectype
);
4255 /* FORNOW. In some cases can vectorize even if data-type not supported
4256 (e.g. - data copies). */
4257 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4259 if (vect_print_dump_info (REPORT_DETAILS
))
4260 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4264 /* Check if the load is a part of an interleaving chain. */
4265 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
4267 strided_load
= true;
4269 gcc_assert (! nested_in_vect_loop
);
4271 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4272 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4274 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4275 if (vect_load_lanes_supported (vectype
, group_size
))
4276 load_lanes_p
= true;
4277 else if (!vect_strided_load_supported (vectype
, group_size
))
4284 gcc_assert (!strided_load
);
4285 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4286 if (alignment_support_scheme
!= dr_aligned
4287 && alignment_support_scheme
!= dr_unaligned_supported
)
4289 if (vect_print_dump_info (REPORT_DETAILS
))
4290 fprintf (vect_dump
, "negative step but alignment required.");
4293 if (!perm_mask_for_reverse (vectype
))
4295 if (vect_print_dump_info (REPORT_DETAILS
))
4296 fprintf (vect_dump
, "negative step and reversing not supported.");
4301 if (!vec_stmt
) /* transformation not required. */
4303 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4304 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
);
4308 if (vect_print_dump_info (REPORT_DETAILS
))
4309 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4315 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4317 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4318 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4319 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4321 /* Check if the chain of loads is already vectorized. */
4322 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4324 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4327 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4328 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4330 /* VEC_NUM is the number of vect stmts to be created for this group. */
4333 strided_load
= false;
4334 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4335 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4339 vec_num
= group_size
;
4345 group_size
= vec_num
= 1;
4348 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4349 gcc_assert (alignment_support_scheme
);
4350 /* Targets with load-lane instructions must not require explicit
4352 gcc_assert (!load_lanes_p
4353 || alignment_support_scheme
== dr_aligned
4354 || alignment_support_scheme
== dr_unaligned_supported
);
4356 /* In case the vectorization factor (VF) is bigger than the number
4357 of elements that we can fit in a vectype (nunits), we have to generate
4358 more than one vector stmt - i.e - we need to "unroll" the
4359 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4360 from one copy of the vector stmt to the next, in the field
4361 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4362 stages to find the correct vector defs to be used when vectorizing
4363 stmts that use the defs of the current stmt. The example below
4364 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4365 need to create 4 vectorized stmts):
4367 before vectorization:
4368 RELATED_STMT VEC_STMT
4372 step 1: vectorize stmt S1:
4373 We first create the vector stmt VS1_0, and, as usual, record a
4374 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4375 Next, we create the vector stmt VS1_1, and record a pointer to
4376 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4377 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4379 RELATED_STMT VEC_STMT
4380 VS1_0: vx0 = memref0 VS1_1 -
4381 VS1_1: vx1 = memref1 VS1_2 -
4382 VS1_2: vx2 = memref2 VS1_3 -
4383 VS1_3: vx3 = memref3 - -
4384 S1: x = load - VS1_0
4387 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4388 information we recorded in RELATED_STMT field is used to vectorize
4391 /* In case of interleaving (non-unit strided access):
4398 Vectorized loads are created in the order of memory accesses
4399 starting from the access of the first stmt of the chain:
4402 VS2: vx1 = &base + vec_size*1
4403 VS3: vx3 = &base + vec_size*2
4404 VS4: vx4 = &base + vec_size*3
4406 Then permutation statements are generated:
4408 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4409 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4412 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4413 (the order of the data-refs in the output of vect_permute_load_chain
4414 corresponds to the order of scalar stmts in the interleaving chain - see
4415 the documentation of vect_permute_load_chain()).
4416 The generation of permutation stmts and recording them in
4417 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4419 In case of both multiple types and interleaving, the vector loads and
4420 permutation stmts above are created for every copy. The result vector
4421 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4422 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4424 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4425 on a target that supports unaligned accesses (dr_unaligned_supported)
4426 we generate the following code:
4430 p = p + indx * vectype_size;
4435 Otherwise, the data reference is potentially unaligned on a target that
4436 does not support unaligned accesses (dr_explicit_realign_optimized) -
4437 then generate the following code, in which the data in each iteration is
4438 obtained by two vector loads, one from the previous iteration, and one
4439 from the current iteration:
4441 msq_init = *(floor(p1))
4442 p2 = initial_addr + VS - 1;
4443 realignment_token = call target_builtin;
4446 p2 = p2 + indx * vectype_size
4448 vec_dest = realign_load (msq, lsq, realignment_token)
4453 /* If the misalignment remains the same throughout the execution of the
4454 loop, we can create the init_addr and permutation mask at the loop
4455 preheader. Otherwise, it needs to be created inside the loop.
4456 This can only occur when vectorizing memory accesses in the inner-loop
4457 nested within an outer-loop that is being vectorized. */
4459 if (loop
&& nested_in_vect_loop_p (loop
, stmt
)
4460 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4461 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4463 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4464 compute_in_loop
= true;
4467 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4468 || alignment_support_scheme
== dr_explicit_realign
)
4469 && !compute_in_loop
)
4471 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4472 alignment_support_scheme
, NULL_TREE
,
4474 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4476 phi
= SSA_NAME_DEF_STMT (msq
);
4477 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4484 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4487 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4489 aggr_type
= vectype
;
4491 prev_stmt_info
= NULL
;
4492 for (j
= 0; j
< ncopies
; j
++)
4494 /* 1. Create the vector or array pointer update chain. */
4496 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4497 offset
, &dummy
, gsi
,
4498 &ptr_incr
, false, &inv_p
);
4500 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4501 TYPE_SIZE_UNIT (aggr_type
));
4503 if (strided_load
|| slp_perm
)
4504 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4510 vec_array
= create_vector_array (vectype
, vec_num
);
4513 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4514 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4515 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4516 gimple_call_set_lhs (new_stmt
, vec_array
);
4517 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4518 mark_symbols_for_renaming (new_stmt
);
4520 /* Extract each vector into an SSA_NAME. */
4521 for (i
= 0; i
< vec_num
; i
++)
4523 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4525 VEC_quick_push (tree
, dr_chain
, new_temp
);
4528 /* Record the mapping between SSA_NAMEs and statements. */
4529 vect_record_strided_load_vectors (stmt
, dr_chain
);
4533 for (i
= 0; i
< vec_num
; i
++)
4536 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4539 /* 2. Create the vector-load in the loop. */
4540 switch (alignment_support_scheme
)
4543 case dr_unaligned_supported
:
4545 struct ptr_info_def
*pi
;
4547 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4548 build_int_cst (reference_alias_ptr_type
4549 (DR_REF (first_dr
)), 0));
4550 pi
= get_ptr_info (dataref_ptr
);
4551 pi
->align
= TYPE_ALIGN_UNIT (vectype
);
4552 if (alignment_support_scheme
== dr_aligned
)
4554 gcc_assert (aligned_access_p (first_dr
));
4557 else if (DR_MISALIGNMENT (first_dr
) == -1)
4559 TREE_TYPE (data_ref
)
4560 = build_aligned_type (TREE_TYPE (data_ref
),
4561 TYPE_ALIGN (elem_type
));
4562 pi
->align
= TYPE_ALIGN_UNIT (elem_type
);
4567 TREE_TYPE (data_ref
)
4568 = build_aligned_type (TREE_TYPE (data_ref
),
4569 TYPE_ALIGN (elem_type
));
4570 pi
->misalign
= DR_MISALIGNMENT (first_dr
);
4574 case dr_explicit_realign
:
4579 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4581 if (compute_in_loop
)
4582 msq
= vect_setup_realignment (first_stmt
, gsi
,
4584 dr_explicit_realign
,
4587 new_stmt
= gimple_build_assign_with_ops
4588 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4590 (TREE_TYPE (dataref_ptr
),
4591 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4592 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4593 gimple_assign_set_lhs (new_stmt
, ptr
);
4594 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4596 = build2 (MEM_REF
, vectype
, ptr
,
4597 build_int_cst (reference_alias_ptr_type
4598 (DR_REF (first_dr
)), 0));
4599 vec_dest
= vect_create_destination_var (scalar_dest
,
4601 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4602 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4603 gimple_assign_set_lhs (new_stmt
, new_temp
);
4604 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4605 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4606 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4609 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4610 TYPE_SIZE_UNIT (elem_type
));
4611 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
4612 new_stmt
= gimple_build_assign_with_ops
4613 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
4616 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4617 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4618 gimple_assign_set_lhs (new_stmt
, ptr
);
4619 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4621 = build2 (MEM_REF
, vectype
, ptr
,
4622 build_int_cst (reference_alias_ptr_type
4623 (DR_REF (first_dr
)), 0));
4626 case dr_explicit_realign_optimized
:
4627 new_stmt
= gimple_build_assign_with_ops
4628 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4630 (TREE_TYPE (dataref_ptr
),
4631 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4632 new_temp
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
),
4634 gimple_assign_set_lhs (new_stmt
, new_temp
);
4635 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4637 = build2 (MEM_REF
, vectype
, new_temp
,
4638 build_int_cst (reference_alias_ptr_type
4639 (DR_REF (first_dr
)), 0));
4644 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4645 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4646 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4647 gimple_assign_set_lhs (new_stmt
, new_temp
);
4648 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4649 mark_symbols_for_renaming (new_stmt
);
4651 /* 3. Handle explicit realignment if necessary/supported.
4653 vec_dest = realign_load (msq, lsq, realignment_token) */
4654 if (alignment_support_scheme
== dr_explicit_realign_optimized
4655 || alignment_support_scheme
== dr_explicit_realign
)
4657 lsq
= gimple_assign_lhs (new_stmt
);
4658 if (!realignment_token
)
4659 realignment_token
= dataref_ptr
;
4660 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4662 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
4665 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4666 gimple_assign_set_lhs (new_stmt
, new_temp
);
4667 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4669 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4672 if (i
== vec_num
- 1 && j
== ncopies
- 1)
4673 add_phi_arg (phi
, lsq
,
4674 loop_latch_edge (containing_loop
),
4680 /* 4. Handle invariant-load. */
4681 if (inv_p
&& !bb_vinfo
)
4684 gimple_stmt_iterator gsi2
= *gsi
;
4685 gcc_assert (!strided_load
);
4687 vec_inv
= build_vector_from_val (vectype
, scalar_dest
);
4688 new_temp
= vect_init_vector (stmt
, vec_inv
,
4690 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4695 new_temp
= reverse_vec_elements (new_temp
, stmt
, gsi
);
4696 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4699 /* Collect vector loads and later create their permutation in
4700 vect_transform_strided_load (). */
4701 if (strided_load
|| slp_perm
)
4702 VEC_quick_push (tree
, dr_chain
, new_temp
);
4704 /* Store vector loads in the corresponding SLP_NODE. */
4705 if (slp
&& !slp_perm
)
4706 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
4711 if (slp
&& !slp_perm
)
4716 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
4717 slp_node_instance
, false))
4719 VEC_free (tree
, heap
, dr_chain
);
4728 vect_transform_strided_load (stmt
, dr_chain
, group_size
, gsi
);
4729 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4734 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4736 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4737 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4741 VEC_free (tree
, heap
, dr_chain
);
4747 /* Function vect_is_simple_cond.
4750 LOOP - the loop that is being vectorized.
4751 COND - Condition that is checked for simple use.
4754 *COMP_VECTYPE - the vector type for the comparison.
4756 Returns whether a COND can be vectorized. Checks whether
4757 condition operands are supportable using vec_is_simple_use. */
4760 vect_is_simple_cond (tree cond
, loop_vec_info loop_vinfo
, tree
*comp_vectype
)
4764 enum vect_def_type dt
;
4765 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
4767 if (!COMPARISON_CLASS_P (cond
))
4770 lhs
= TREE_OPERAND (cond
, 0);
4771 rhs
= TREE_OPERAND (cond
, 1);
4773 if (TREE_CODE (lhs
) == SSA_NAME
)
4775 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
4776 if (!vect_is_simple_use_1 (lhs
, loop_vinfo
, NULL
, &lhs_def_stmt
, &def
,
4780 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
4781 && TREE_CODE (lhs
) != FIXED_CST
)
4784 if (TREE_CODE (rhs
) == SSA_NAME
)
4786 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
4787 if (!vect_is_simple_use_1 (rhs
, loop_vinfo
, NULL
, &rhs_def_stmt
, &def
,
4791 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
4792 && TREE_CODE (rhs
) != FIXED_CST
)
4795 *comp_vectype
= vectype1
? vectype1
: vectype2
;
4799 /* vectorizable_condition.
4801 Check if STMT is conditional modify expression that can be vectorized.
4802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4803 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4806 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4807 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4808 else caluse if it is 2).
4810 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4813 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
4814 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
)
4816 tree scalar_dest
= NULL_TREE
;
4817 tree vec_dest
= NULL_TREE
;
4818 tree cond_expr
, then_clause
, else_clause
;
4819 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4820 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4822 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
4823 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
4824 tree vec_compare
, vec_cond_expr
;
4826 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4828 enum vect_def_type dt
, dts
[4];
4829 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4830 int ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4831 enum tree_code code
;
4832 stmt_vec_info prev_stmt_info
= NULL
;
4835 /* FORNOW: unsupported in basic block SLP. */
4836 gcc_assert (loop_vinfo
);
4838 /* FORNOW: SLP not supported. */
4839 if (STMT_SLP_TYPE (stmt_info
))
4842 gcc_assert (ncopies
>= 1);
4843 if (reduc_index
&& ncopies
> 1)
4844 return false; /* FORNOW */
4846 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
4849 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4850 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
4854 /* FORNOW: not yet supported. */
4855 if (STMT_VINFO_LIVE_P (stmt_info
))
4857 if (vect_print_dump_info (REPORT_DETAILS
))
4858 fprintf (vect_dump
, "value used after loop.");
4862 /* Is vectorizable conditional operation? */
4863 if (!is_gimple_assign (stmt
))
4866 code
= gimple_assign_rhs_code (stmt
);
4868 if (code
!= COND_EXPR
)
4871 cond_expr
= gimple_assign_rhs1 (stmt
);
4872 then_clause
= gimple_assign_rhs2 (stmt
);
4873 else_clause
= gimple_assign_rhs3 (stmt
);
4875 if (!vect_is_simple_cond (cond_expr
, loop_vinfo
, &comp_vectype
)
4879 if (TREE_CODE (then_clause
) == SSA_NAME
)
4881 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
4882 if (!vect_is_simple_use (then_clause
, loop_vinfo
, NULL
,
4883 &then_def_stmt
, &def
, &dt
))
4886 else if (TREE_CODE (then_clause
) != INTEGER_CST
4887 && TREE_CODE (then_clause
) != REAL_CST
4888 && TREE_CODE (then_clause
) != FIXED_CST
)
4891 if (TREE_CODE (else_clause
) == SSA_NAME
)
4893 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
4894 if (!vect_is_simple_use (else_clause
, loop_vinfo
, NULL
,
4895 &else_def_stmt
, &def
, &dt
))
4898 else if (TREE_CODE (else_clause
) != INTEGER_CST
4899 && TREE_CODE (else_clause
) != REAL_CST
4900 && TREE_CODE (else_clause
) != FIXED_CST
)
4905 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
4906 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
4912 scalar_dest
= gimple_assign_lhs (stmt
);
4913 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4915 /* Handle cond expr. */
4916 for (j
= 0; j
< ncopies
; j
++)
4923 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
4925 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), loop_vinfo
,
4926 NULL
, >emp
, &def
, &dts
[0]);
4928 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
4930 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), loop_vinfo
,
4931 NULL
, >emp
, &def
, &dts
[1]);
4932 if (reduc_index
== 1)
4933 vec_then_clause
= reduc_def
;
4936 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
4938 vect_is_simple_use (then_clause
, loop_vinfo
,
4939 NULL
, >emp
, &def
, &dts
[2]);
4941 if (reduc_index
== 2)
4942 vec_else_clause
= reduc_def
;
4945 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
4947 vect_is_simple_use (else_clause
, loop_vinfo
,
4948 NULL
, >emp
, &def
, &dts
[3]);
4953 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0], vec_cond_lhs
);
4954 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1], vec_cond_rhs
);
4955 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
4957 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
4961 /* Arguments are ready. Create the new vector stmt. */
4962 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
4963 vec_cond_lhs
, vec_cond_rhs
);
4964 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
4965 vec_compare
, vec_then_clause
, vec_else_clause
);
4967 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
4968 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4969 gimple_assign_set_lhs (new_stmt
, new_temp
);
4970 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4972 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4974 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4976 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4983 /* Make sure the statement is vectorizable. */
4986 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
4988 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4989 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4990 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
4992 tree scalar_type
, vectype
;
4993 gimple pattern_stmt
, pattern_def_stmt
;
4995 if (vect_print_dump_info (REPORT_DETAILS
))
4997 fprintf (vect_dump
, "==> examining statement: ");
4998 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5001 if (gimple_has_volatile_ops (stmt
))
5003 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5004 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5009 /* Skip stmts that do not need to be vectorized. In loops this is expected
5011 - the COND_EXPR which is the loop exit condition
5012 - any LABEL_EXPRs in the loop
5013 - computations that are used only for array indexing or loop control.
5014 In basic blocks we only analyze statements that are a part of some SLP
5015 instance, therefore, all the statements are relevant.
5017 Pattern statement need to be analyzed instead of the original statement
5018 if the original statement is not relevant. Otherwise, we analyze both
5021 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5022 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5023 && !STMT_VINFO_LIVE_P (stmt_info
))
5025 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5027 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5028 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5030 /* Analyze PATTERN_STMT instead of the original stmt. */
5031 stmt
= pattern_stmt
;
5032 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5033 if (vect_print_dump_info (REPORT_DETAILS
))
5035 fprintf (vect_dump
, "==> examining pattern statement: ");
5036 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5041 if (vect_print_dump_info (REPORT_DETAILS
))
5042 fprintf (vect_dump
, "irrelevant.");
5047 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5049 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5050 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5052 /* Analyze PATTERN_STMT too. */
5053 if (vect_print_dump_info (REPORT_DETAILS
))
5055 fprintf (vect_dump
, "==> examining pattern statement: ");
5056 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5059 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5063 if (is_pattern_stmt_p (stmt_info
)
5064 && (pattern_def_stmt
= STMT_VINFO_PATTERN_DEF_STMT (stmt_info
))
5065 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5066 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
))))
5068 /* Analyze def stmt of STMT if it's a pattern stmt. */
5069 if (vect_print_dump_info (REPORT_DETAILS
))
5071 fprintf (vect_dump
, "==> examining pattern def statement: ");
5072 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5075 if (!vect_analyze_stmt (pattern_def_stmt
, need_to_vectorize
, node
))
5080 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5082 case vect_internal_def
:
5085 case vect_reduction_def
:
5086 case vect_nested_cycle
:
5087 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5088 || relevance
== vect_used_in_outer_by_reduction
5089 || relevance
== vect_unused_in_scope
));
5092 case vect_induction_def
:
5093 case vect_constant_def
:
5094 case vect_external_def
:
5095 case vect_unknown_def_type
:
5102 gcc_assert (PURE_SLP_STMT (stmt_info
));
5104 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5105 if (vect_print_dump_info (REPORT_DETAILS
))
5107 fprintf (vect_dump
, "get vectype for scalar type: ");
5108 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5111 vectype
= get_vectype_for_scalar_type (scalar_type
);
5114 if (vect_print_dump_info (REPORT_DETAILS
))
5116 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5117 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5122 if (vect_print_dump_info (REPORT_DETAILS
))
5124 fprintf (vect_dump
, "vectype: ");
5125 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5128 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5131 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5133 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5134 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5135 *need_to_vectorize
= true;
5140 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5141 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5142 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
, NULL
)
5143 || vectorizable_type_demotion (stmt
, NULL
, NULL
, NULL
)
5144 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5145 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5146 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5147 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5148 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5149 || vectorizable_call (stmt
, NULL
, NULL
)
5150 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5151 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5152 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0));
5156 ok
= (vectorizable_type_promotion (stmt
, NULL
, NULL
, node
)
5157 || vectorizable_type_demotion (stmt
, NULL
, NULL
, node
)
5158 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5159 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5160 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5161 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5162 || vectorizable_store (stmt
, NULL
, NULL
, node
));
5167 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5169 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5170 fprintf (vect_dump
, "supported: ");
5171 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5180 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5181 need extra handling, except for vectorizable reductions. */
5182 if (STMT_VINFO_LIVE_P (stmt_info
)
5183 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5184 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5188 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5190 fprintf (vect_dump
, "not vectorized: live stmt not ");
5191 fprintf (vect_dump
, "supported: ");
5192 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5202 /* Function vect_transform_stmt.
5204 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5207 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5208 bool *strided_store
, slp_tree slp_node
,
5209 slp_instance slp_node_instance
)
5211 bool is_store
= false;
5212 gimple vec_stmt
= NULL
;
5213 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5216 switch (STMT_VINFO_TYPE (stmt_info
))
5218 case type_demotion_vec_info_type
:
5219 done
= vectorizable_type_demotion (stmt
, gsi
, &vec_stmt
, slp_node
);
5223 case type_promotion_vec_info_type
:
5224 done
= vectorizable_type_promotion (stmt
, gsi
, &vec_stmt
, slp_node
);
5228 case type_conversion_vec_info_type
:
5229 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5233 case induc_vec_info_type
:
5234 gcc_assert (!slp_node
);
5235 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5239 case shift_vec_info_type
:
5240 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5244 case op_vec_info_type
:
5245 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5249 case assignment_vec_info_type
:
5250 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5254 case load_vec_info_type
:
5255 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5260 case store_vec_info_type
:
5261 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5263 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && !slp_node
)
5265 /* In case of interleaving, the whole chain is vectorized when the
5266 last store in the chain is reached. Store stmts before the last
5267 one are skipped, and there vec_stmt_info shouldn't be freed
5269 *strided_store
= true;
5270 if (STMT_VINFO_VEC_STMT (stmt_info
))
5277 case condition_vec_info_type
:
5278 gcc_assert (!slp_node
);
5279 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0);
5283 case call_vec_info_type
:
5284 gcc_assert (!slp_node
);
5285 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
);
5286 stmt
= gsi_stmt (*gsi
);
5289 case reduc_vec_info_type
:
5290 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5295 if (!STMT_VINFO_LIVE_P (stmt_info
))
5297 if (vect_print_dump_info (REPORT_DETAILS
))
5298 fprintf (vect_dump
, "stmt not supported.");
5303 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5304 is being vectorized, but outside the immediately enclosing loop. */
5306 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5307 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5308 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5309 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5310 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5311 || STMT_VINFO_RELEVANT (stmt_info
) ==
5312 vect_used_in_outer_by_reduction
))
5314 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5315 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5316 imm_use_iterator imm_iter
;
5317 use_operand_p use_p
;
5321 if (vect_print_dump_info (REPORT_DETAILS
))
5322 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5324 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5325 (to be used when vectorizing outer-loop stmts that use the DEF of
5327 if (gimple_code (stmt
) == GIMPLE_PHI
)
5328 scalar_dest
= PHI_RESULT (stmt
);
5330 scalar_dest
= gimple_assign_lhs (stmt
);
5332 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5334 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5336 exit_phi
= USE_STMT (use_p
);
5337 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5342 /* Handle stmts whose DEF is used outside the loop-nest that is
5343 being vectorized. */
5344 if (STMT_VINFO_LIVE_P (stmt_info
)
5345 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5347 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5352 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5358 /* Remove a group of stores (for SLP or interleaving), free their
5362 vect_remove_stores (gimple first_stmt
)
5364 gimple next
= first_stmt
;
5366 gimple_stmt_iterator next_si
;
5370 /* Free the attached stmt_vec_info and remove the stmt. */
5371 next_si
= gsi_for_stmt (next
);
5372 gsi_remove (&next_si
, true);
5373 tmp
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
5374 free_stmt_vec_info (next
);
5380 /* Function new_stmt_vec_info.
5382 Create and initialize a new stmt_vec_info struct for STMT. */
5385 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5386 bb_vec_info bb_vinfo
)
5389 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5391 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5392 STMT_VINFO_STMT (res
) = stmt
;
5393 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5394 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5395 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5396 STMT_VINFO_LIVE_P (res
) = false;
5397 STMT_VINFO_VECTYPE (res
) = NULL
;
5398 STMT_VINFO_VEC_STMT (res
) = NULL
;
5399 STMT_VINFO_VECTORIZABLE (res
) = true;
5400 STMT_VINFO_IN_PATTERN_P (res
) = false;
5401 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5402 STMT_VINFO_PATTERN_DEF_STMT (res
) = NULL
;
5403 STMT_VINFO_DATA_REF (res
) = NULL
;
5405 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5406 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5407 STMT_VINFO_DR_INIT (res
) = NULL
;
5408 STMT_VINFO_DR_STEP (res
) = NULL
;
5409 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5411 if (gimple_code (stmt
) == GIMPLE_PHI
5412 && is_loop_header_bb_p (gimple_bb (stmt
)))
5413 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5415 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5417 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
5418 STMT_VINFO_INSIDE_OF_LOOP_COST (res
) = 0;
5419 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
5420 STMT_SLP_TYPE (res
) = loop_vect
;
5421 GROUP_FIRST_ELEMENT (res
) = NULL
;
5422 GROUP_NEXT_ELEMENT (res
) = NULL
;
5423 GROUP_SIZE (res
) = 0;
5424 GROUP_STORE_COUNT (res
) = 0;
5425 GROUP_GAP (res
) = 0;
5426 GROUP_SAME_DR_STMT (res
) = NULL
;
5427 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5433 /* Create a hash table for stmt_vec_info. */
5436 init_stmt_vec_info_vec (void)
5438 gcc_assert (!stmt_vec_info_vec
);
5439 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5443 /* Free hash table for stmt_vec_info. */
5446 free_stmt_vec_info_vec (void)
5448 gcc_assert (stmt_vec_info_vec
);
5449 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5453 /* Free stmt vectorization related info. */
5456 free_stmt_vec_info (gimple stmt
)
5458 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5463 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5464 set_vinfo_for_stmt (stmt
, NULL
);
5469 /* Function get_vectype_for_scalar_type_and_size.
5471 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5475 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5477 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5478 enum machine_mode simd_mode
;
5479 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5486 /* We can't build a vector type of elements with alignment bigger than
5488 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5491 /* For vector types of elements whose mode precision doesn't
5492 match their types precision we use a element type of mode
5493 precision. The vectorization routines will have to make sure
5494 they support the proper result truncation/extension. */
5495 if (INTEGRAL_TYPE_P (scalar_type
)
5496 && GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
))
5497 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5498 TYPE_UNSIGNED (scalar_type
));
5500 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5501 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5504 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5505 When the component mode passes the above test simply use a type
5506 corresponding to that mode. The theory is that any use that
5507 would cause problems with this will disable vectorization anyway. */
5508 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5509 && !INTEGRAL_TYPE_P (scalar_type
)
5510 && !POINTER_TYPE_P (scalar_type
))
5511 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5513 /* If no size was supplied use the mode the target prefers. Otherwise
5514 lookup a vector mode of the specified size. */
5516 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
5518 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
5519 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
5523 vectype
= build_vector_type (scalar_type
, nunits
);
5524 if (vect_print_dump_info (REPORT_DETAILS
))
5526 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
5527 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5533 if (vect_print_dump_info (REPORT_DETAILS
))
5535 fprintf (vect_dump
, "vectype: ");
5536 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5539 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5540 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
5542 if (vect_print_dump_info (REPORT_DETAILS
))
5543 fprintf (vect_dump
, "mode not supported by target.");
5550 unsigned int current_vector_size
;
5552 /* Function get_vectype_for_scalar_type.
5554 Returns the vector type corresponding to SCALAR_TYPE as supported
5558 get_vectype_for_scalar_type (tree scalar_type
)
5561 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
5562 current_vector_size
);
5564 && current_vector_size
== 0)
5565 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
5569 /* Function get_same_sized_vectype
5571 Returns a vector type corresponding to SCALAR_TYPE of size
5572 VECTOR_TYPE if supported by the target. */
5575 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
5577 return get_vectype_for_scalar_type_and_size
5578 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
5581 /* Function vect_is_simple_use.
5584 LOOP_VINFO - the vect info of the loop that is being vectorized.
5585 BB_VINFO - the vect info of the basic block that is being vectorized.
5586 OPERAND - operand of a stmt in the loop or bb.
5587 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5589 Returns whether a stmt with OPERAND can be vectorized.
5590 For loops, supportable operands are constants, loop invariants, and operands
5591 that are defined by the current iteration of the loop. Unsupportable
5592 operands are those that are defined by a previous iteration of the loop (as
5593 is the case in reduction/induction computations).
5594 For basic blocks, supportable operands are constants and bb invariants.
5595 For now, operands defined outside the basic block are not supported. */
5598 vect_is_simple_use (tree operand
, loop_vec_info loop_vinfo
,
5599 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
5600 tree
*def
, enum vect_def_type
*dt
)
5603 stmt_vec_info stmt_vinfo
;
5604 struct loop
*loop
= NULL
;
5607 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5612 if (vect_print_dump_info (REPORT_DETAILS
))
5614 fprintf (vect_dump
, "vect_is_simple_use: operand ");
5615 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
5618 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
5620 *dt
= vect_constant_def
;
5624 if (is_gimple_min_invariant (operand
))
5627 *dt
= vect_external_def
;
5631 if (TREE_CODE (operand
) == PAREN_EXPR
)
5633 if (vect_print_dump_info (REPORT_DETAILS
))
5634 fprintf (vect_dump
, "non-associatable copy.");
5635 operand
= TREE_OPERAND (operand
, 0);
5638 if (TREE_CODE (operand
) != SSA_NAME
)
5640 if (vect_print_dump_info (REPORT_DETAILS
))
5641 fprintf (vect_dump
, "not ssa-name.");
5645 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
5646 if (*def_stmt
== NULL
)
5648 if (vect_print_dump_info (REPORT_DETAILS
))
5649 fprintf (vect_dump
, "no def_stmt.");
5653 if (vect_print_dump_info (REPORT_DETAILS
))
5655 fprintf (vect_dump
, "def_stmt: ");
5656 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
5659 /* Empty stmt is expected only in case of a function argument.
5660 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5661 if (gimple_nop_p (*def_stmt
))
5664 *dt
= vect_external_def
;
5668 bb
= gimple_bb (*def_stmt
);
5670 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
5671 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
5672 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
5673 *dt
= vect_external_def
;
5676 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
5677 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
5680 if (*dt
== vect_unknown_def_type
)
5682 if (vect_print_dump_info (REPORT_DETAILS
))
5683 fprintf (vect_dump
, "Unsupported pattern.");
5687 if (vect_print_dump_info (REPORT_DETAILS
))
5688 fprintf (vect_dump
, "type of def: %d.",*dt
);
5690 switch (gimple_code (*def_stmt
))
5693 *def
= gimple_phi_result (*def_stmt
);
5697 *def
= gimple_assign_lhs (*def_stmt
);
5701 *def
= gimple_call_lhs (*def_stmt
);
5706 if (vect_print_dump_info (REPORT_DETAILS
))
5707 fprintf (vect_dump
, "unsupported defining stmt: ");
5714 /* Function vect_is_simple_use_1.
5716 Same as vect_is_simple_use_1 but also determines the vector operand
5717 type of OPERAND and stores it to *VECTYPE. If the definition of
5718 OPERAND is vect_uninitialized_def, vect_constant_def or
5719 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5720 is responsible to compute the best suited vector type for the
5724 vect_is_simple_use_1 (tree operand
, loop_vec_info loop_vinfo
,
5725 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
5726 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
5728 if (!vect_is_simple_use (operand
, loop_vinfo
, bb_vinfo
, def_stmt
, def
, dt
))
5731 /* Now get a vector type if the def is internal, otherwise supply
5732 NULL_TREE and leave it up to the caller to figure out a proper
5733 type for the use stmt. */
5734 if (*dt
== vect_internal_def
5735 || *dt
== vect_induction_def
5736 || *dt
== vect_reduction_def
5737 || *dt
== vect_double_reduction_def
5738 || *dt
== vect_nested_cycle
)
5740 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
5742 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5743 && !STMT_VINFO_RELEVANT (stmt_info
)
5744 && !STMT_VINFO_LIVE_P (stmt_info
))
5745 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5747 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5748 gcc_assert (*vectype
!= NULL_TREE
);
5750 else if (*dt
== vect_uninitialized_def
5751 || *dt
== vect_constant_def
5752 || *dt
== vect_external_def
)
5753 *vectype
= NULL_TREE
;
5761 /* Function supportable_widening_operation
5763 Check whether an operation represented by the code CODE is a
5764 widening operation that is supported by the target platform in
5765 vector form (i.e., when operating on arguments of type VECTYPE_IN
5766 producing a result of type VECTYPE_OUT).
5768 Widening operations we currently support are NOP (CONVERT), FLOAT
5769 and WIDEN_MULT. This function checks if these operations are supported
5770 by the target platform either directly (via vector tree-codes), or via
5774 - CODE1 and CODE2 are codes of vector operations to be used when
5775 vectorizing the operation, if available.
5776 - DECL1 and DECL2 are decls of target builtin functions to be used
5777 when vectorizing the operation, if available. In this case,
5778 CODE1 and CODE2 are CALL_EXPR.
5779 - MULTI_STEP_CVT determines the number of required intermediate steps in
5780 case of multi-step conversion (like char->short->int - in that case
5781 MULTI_STEP_CVT will be 1).
5782 - INTERM_TYPES contains the intermediate type required to perform the
5783 widening operation (short in the above example). */
5786 supportable_widening_operation (enum tree_code code
, gimple stmt
,
5787 tree vectype_out
, tree vectype_in
,
5788 tree
*decl1
, tree
*decl2
,
5789 enum tree_code
*code1
, enum tree_code
*code2
,
5790 int *multi_step_cvt
,
5791 VEC (tree
, heap
) **interm_types
)
5793 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5794 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5795 struct loop
*vect_loop
= NULL
;
5797 enum machine_mode vec_mode
;
5798 enum insn_code icode1
, icode2
;
5799 optab optab1
, optab2
;
5800 tree vectype
= vectype_in
;
5801 tree wide_vectype
= vectype_out
;
5802 enum tree_code c1
, c2
;
5805 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
5807 /* The result of a vectorized widening operation usually requires two vectors
5808 (because the widened results do not fit int one vector). The generated
5809 vector results would normally be expected to be generated in the same
5810 order as in the original scalar computation, i.e. if 8 results are
5811 generated in each vector iteration, they are to be organized as follows:
5812 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
5814 However, in the special case that the result of the widening operation is
5815 used in a reduction computation only, the order doesn't matter (because
5816 when vectorizing a reduction we change the order of the computation).
5817 Some targets can take advantage of this and generate more efficient code.
5818 For example, targets like Altivec, that support widen_mult using a sequence
5819 of {mult_even,mult_odd} generate the following vectors:
5820 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5822 When vectorizing outer-loops, we execute the inner-loop sequentially
5823 (each vectorized inner-loop iteration contributes to VF outer-loop
5824 iterations in parallel). We therefore don't allow to change the order
5825 of the computation in the inner-loop during outer-loop vectorization. */
5828 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
5829 && !nested_in_vect_loop_p (vect_loop
, stmt
))
5835 && code
== WIDEN_MULT_EXPR
5836 && targetm
.vectorize
.builtin_mul_widen_even
5837 && targetm
.vectorize
.builtin_mul_widen_even (vectype
)
5838 && targetm
.vectorize
.builtin_mul_widen_odd
5839 && targetm
.vectorize
.builtin_mul_widen_odd (vectype
))
5841 if (vect_print_dump_info (REPORT_DETAILS
))
5842 fprintf (vect_dump
, "Unordered widening operation detected.");
5844 *code1
= *code2
= CALL_EXPR
;
5845 *decl1
= targetm
.vectorize
.builtin_mul_widen_even (vectype
);
5846 *decl2
= targetm
.vectorize
.builtin_mul_widen_odd (vectype
);
5852 case WIDEN_MULT_EXPR
:
5853 if (BYTES_BIG_ENDIAN
)
5855 c1
= VEC_WIDEN_MULT_HI_EXPR
;
5856 c2
= VEC_WIDEN_MULT_LO_EXPR
;
5860 c2
= VEC_WIDEN_MULT_HI_EXPR
;
5861 c1
= VEC_WIDEN_MULT_LO_EXPR
;
5865 case WIDEN_LSHIFT_EXPR
:
5866 if (BYTES_BIG_ENDIAN
)
5868 c1
= VEC_WIDEN_LSHIFT_HI_EXPR
;
5869 c2
= VEC_WIDEN_LSHIFT_LO_EXPR
;
5873 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
5874 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
5879 if (BYTES_BIG_ENDIAN
)
5881 c1
= VEC_UNPACK_HI_EXPR
;
5882 c2
= VEC_UNPACK_LO_EXPR
;
5886 c2
= VEC_UNPACK_HI_EXPR
;
5887 c1
= VEC_UNPACK_LO_EXPR
;
5892 if (BYTES_BIG_ENDIAN
)
5894 c1
= VEC_UNPACK_FLOAT_HI_EXPR
;
5895 c2
= VEC_UNPACK_FLOAT_LO_EXPR
;
5899 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
5900 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
5904 case FIX_TRUNC_EXPR
:
5905 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5906 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5907 computing the operation. */
5914 if (code
== FIX_TRUNC_EXPR
)
5916 /* The signedness is determined from output operand. */
5917 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
5918 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
5922 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
5923 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
5926 if (!optab1
|| !optab2
)
5929 vec_mode
= TYPE_MODE (vectype
);
5930 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
5931 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
5934 /* Check if it's a multi-step conversion that can be done using intermediate
5936 if (insn_data
[icode1
].operand
[0].mode
!= TYPE_MODE (wide_vectype
)
5937 || insn_data
[icode2
].operand
[0].mode
!= TYPE_MODE (wide_vectype
))
5940 tree prev_type
= vectype
, intermediate_type
;
5941 enum machine_mode intermediate_mode
, prev_mode
= vec_mode
;
5942 optab optab3
, optab4
;
5944 if (!CONVERT_EXPR_CODE_P (code
))
5950 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5951 intermediate steps in promotion sequence. We try
5952 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5954 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
5955 for (i
= 0; i
< 3; i
++)
5957 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
5958 intermediate_type
= lang_hooks
.types
.type_for_mode (intermediate_mode
,
5959 TYPE_UNSIGNED (prev_type
));
5960 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
5961 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
5963 if (!optab3
|| !optab4
5964 || ((icode1
= optab_handler (optab1
, prev_mode
))
5965 == CODE_FOR_nothing
)
5966 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
5967 || ((icode2
= optab_handler (optab2
, prev_mode
))
5968 == CODE_FOR_nothing
)
5969 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
5970 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
5971 == CODE_FOR_nothing
)
5972 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
5973 == CODE_FOR_nothing
))
5976 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
5977 (*multi_step_cvt
)++;
5979 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
5980 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
5983 prev_type
= intermediate_type
;
5984 prev_mode
= intermediate_mode
;
5996 /* Function supportable_narrowing_operation
5998 Check whether an operation represented by the code CODE is a
5999 narrowing operation that is supported by the target platform in
6000 vector form (i.e., when operating on arguments of type VECTYPE_IN
6001 and producing a result of type VECTYPE_OUT).
6003 Narrowing operations we currently support are NOP (CONVERT) and
6004 FIX_TRUNC. This function checks if these operations are supported by
6005 the target platform directly via vector tree-codes.
6008 - CODE1 is the code of a vector operation to be used when
6009 vectorizing the operation, if available.
6010 - MULTI_STEP_CVT determines the number of required intermediate steps in
6011 case of multi-step conversion (like int->short->char - in that case
6012 MULTI_STEP_CVT will be 1).
6013 - INTERM_TYPES contains the intermediate type required to perform the
6014 narrowing operation (short in the above example). */
6017 supportable_narrowing_operation (enum tree_code code
,
6018 tree vectype_out
, tree vectype_in
,
6019 enum tree_code
*code1
, int *multi_step_cvt
,
6020 VEC (tree
, heap
) **interm_types
)
6022 enum machine_mode vec_mode
;
6023 enum insn_code icode1
;
6024 optab optab1
, interm_optab
;
6025 tree vectype
= vectype_in
;
6026 tree narrow_vectype
= vectype_out
;
6028 tree intermediate_type
, prev_type
;
6034 c1
= VEC_PACK_TRUNC_EXPR
;
6037 case FIX_TRUNC_EXPR
:
6038 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6042 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6043 tree code and optabs used for computing the operation. */
6050 if (code
== FIX_TRUNC_EXPR
)
6051 /* The signedness is determined from output operand. */
6052 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6054 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6059 vec_mode
= TYPE_MODE (vectype
);
6060 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6063 /* Check if it's a multi-step conversion that can be done using intermediate
6065 if (insn_data
[icode1
].operand
[0].mode
!= TYPE_MODE (narrow_vectype
))
6067 enum machine_mode intermediate_mode
, prev_mode
= vec_mode
;
6070 prev_type
= vectype
;
6071 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6072 intermediate steps in promotion sequence. We try
6073 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6075 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6076 for (i
= 0; i
< 3; i
++)
6078 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6079 intermediate_type
= lang_hooks
.types
.type_for_mode (intermediate_mode
,
6080 TYPE_UNSIGNED (prev_type
));
6081 interm_optab
= optab_for_tree_code (c1
, intermediate_type
,
6084 || ((icode1
= optab_handler (optab1
, prev_mode
))
6085 == CODE_FOR_nothing
)
6086 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6087 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6088 == CODE_FOR_nothing
))
6091 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6092 (*multi_step_cvt
)++;
6094 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6097 prev_type
= intermediate_type
;
6098 prev_mode
= intermediate_mode
;