1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
48 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
50 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
60 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
61 tree array
, unsigned HOST_WIDE_INT n
)
63 tree vect_type
, vect
, vect_name
, array_ref
;
66 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
67 vect_type
= TREE_TYPE (TREE_TYPE (array
));
68 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
69 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
70 build_int_cst (size_type_node
, n
),
71 NULL_TREE
, NULL_TREE
);
73 new_stmt
= gimple_build_assign (vect
, array_ref
);
74 vect_name
= make_ssa_name (vect
, new_stmt
);
75 gimple_assign_set_lhs (new_stmt
, vect_name
);
76 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
77 mark_symbols_for_renaming (new_stmt
);
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
87 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
88 tree array
, unsigned HOST_WIDE_INT n
)
93 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
94 build_int_cst (size_type_node
, n
),
95 NULL_TREE
, NULL_TREE
);
97 new_stmt
= gimple_build_assign (array_ref
, vect
);
98 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
99 mark_symbols_for_renaming (new_stmt
);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
107 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
109 struct ptr_info_def
*pi
;
110 tree mem_ref
, alias_ptr_type
;
112 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
113 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
114 /* Arrays have the same alignment as their type. */
115 pi
= get_ptr_info (ptr
);
116 pi
->align
= TYPE_ALIGN_UNIT (type
);
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
128 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
129 enum vect_relevant relevant
, bool live_p
,
130 bool used_in_pattern
)
132 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
133 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
134 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
137 if (vect_print_dump_info (REPORT_DETAILS
))
138 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
147 if (!used_in_pattern
)
149 imm_use_iterator imm_iter
;
153 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
154 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
156 if (is_gimple_assign (stmt
))
157 lhs
= gimple_assign_lhs (stmt
);
159 lhs
= gimple_call_lhs (stmt
);
161 /* This use is out of pattern use, if LHS has other uses that are
162 pattern uses, we should mark the stmt itself, and not the pattern
164 if (TREE_CODE (lhs
) == SSA_NAME
)
165 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
167 if (is_gimple_debug (USE_STMT (use_p
)))
169 use_stmt
= USE_STMT (use_p
);
171 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
174 if (vinfo_for_stmt (use_stmt
)
175 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
185 /* This is the last stmt in a sequence that was detected as a
186 pattern that can potentially be vectorized. Don't mark the stmt
187 as relevant/live because it's not going to be vectorized.
188 Instead mark the pattern-stmt that replaces it. */
190 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
192 if (vect_print_dump_info (REPORT_DETAILS
))
193 fprintf (vect_dump
, "last stmt in pattern. don't mark"
195 stmt_info
= vinfo_for_stmt (pattern_stmt
);
196 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
197 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
198 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
203 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
204 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
205 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
207 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
208 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
210 if (vect_print_dump_info (REPORT_DETAILS
))
211 fprintf (vect_dump
, "already marked relevant/live.");
215 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
219 /* Function vect_stmt_relevant_p.
221 Return true if STMT in loop that is represented by LOOP_VINFO is
222 "relevant for vectorization".
224 A stmt is considered "relevant for vectorization" if:
225 - it has uses outside the loop.
226 - it has vdefs (it alters memory).
227 - control stmts in the loop (except for the exit condition).
229 CHECKME: what other side effects would the vectorizer allow? */
232 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
233 enum vect_relevant
*relevant
, bool *live_p
)
235 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
237 imm_use_iterator imm_iter
;
241 *relevant
= vect_unused_in_scope
;
244 /* cond stmt other than loop exit cond. */
245 if (is_ctrl_stmt (stmt
)
246 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
247 != loop_exit_ctrl_vec_info_type
)
248 *relevant
= vect_used_in_scope
;
250 /* changing memory. */
251 if (gimple_code (stmt
) != GIMPLE_PHI
)
252 if (gimple_vdef (stmt
))
254 if (vect_print_dump_info (REPORT_DETAILS
))
255 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
256 *relevant
= vect_used_in_scope
;
259 /* uses outside the loop. */
260 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
262 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
264 basic_block bb
= gimple_bb (USE_STMT (use_p
));
265 if (!flow_bb_inside_loop_p (loop
, bb
))
267 if (vect_print_dump_info (REPORT_DETAILS
))
268 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
270 if (is_gimple_debug (USE_STMT (use_p
)))
273 /* We expect all such uses to be in the loop exit phis
274 (because of loop closed form) */
275 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
276 gcc_assert (bb
== single_exit (loop
)->dest
);
283 return (*live_p
|| *relevant
);
287 /* Function exist_non_indexing_operands_for_use_p
289 USE is one of the uses attached to STMT. Check if USE is
290 used in STMT for anything other than indexing an array. */
293 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
296 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
298 /* USE corresponds to some operand in STMT. If there is no data
299 reference in STMT, then any operand that corresponds to USE
300 is not indexing an array. */
301 if (!STMT_VINFO_DATA_REF (stmt_info
))
304 /* STMT has a data_ref. FORNOW this means that its of one of
308 (This should have been verified in analyze_data_refs).
310 'var' in the second case corresponds to a def, not a use,
311 so USE cannot correspond to any operands that are not used
314 Therefore, all we need to check is if STMT falls into the
315 first case, and whether var corresponds to USE. */
317 if (!gimple_assign_copy_p (stmt
))
319 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
321 operand
= gimple_assign_rhs1 (stmt
);
322 if (TREE_CODE (operand
) != SSA_NAME
)
333 Function process_use.
336 - a USE in STMT in a loop represented by LOOP_VINFO
337 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338 that defined USE. This is done by calling mark_relevant and passing it
339 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
344 Generally, LIVE_P and RELEVANT are used to define the liveness and
345 relevance info of the DEF_STMT of this USE:
346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
349 - case 1: If USE is used only for address computations (e.g. array indexing),
350 which does not need to be directly vectorized, then the liveness/relevance
351 of the respective DEF_STMT is left unchanged.
352 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353 skip DEF_STMT cause it had already been processed.
354 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
355 be modified accordingly.
357 Return true if everything is as expected. Return false otherwise. */
360 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
361 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
364 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
365 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
366 stmt_vec_info dstmt_vinfo
;
367 basic_block bb
, def_bb
;
370 enum vect_def_type dt
;
372 /* case 1: we are only interested in uses that need to be vectorized. Uses
373 that are used for address computation are not considered relevant. */
374 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
377 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
380 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
384 if (!def_stmt
|| gimple_nop_p (def_stmt
))
387 def_bb
= gimple_bb (def_stmt
);
388 if (!flow_bb_inside_loop_p (loop
, def_bb
))
390 if (vect_print_dump_info (REPORT_DETAILS
))
391 fprintf (vect_dump
, "def_stmt is out of loop.");
395 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396 DEF_STMT must have already been processed, because this should be the
397 only way that STMT, which is a reduction-phi, was put in the worklist,
398 as there should be no other uses for DEF_STMT in the loop. So we just
399 check that everything is as expected, and we are done. */
400 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
401 bb
= gimple_bb (stmt
);
402 if (gimple_code (stmt
) == GIMPLE_PHI
403 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
404 && gimple_code (def_stmt
) != GIMPLE_PHI
405 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
406 && bb
->loop_father
== def_bb
->loop_father
)
408 if (vect_print_dump_info (REPORT_DETAILS
))
409 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
410 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
411 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
412 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
413 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
414 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
418 /* case 3a: outer-loop stmt defining an inner-loop stmt:
419 outer-loop-header-bb:
425 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
427 if (vect_print_dump_info (REPORT_DETAILS
))
428 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
432 case vect_unused_in_scope
:
433 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
434 vect_used_in_scope
: vect_unused_in_scope
;
437 case vect_used_in_outer_by_reduction
:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
439 relevant
= vect_used_by_reduction
;
442 case vect_used_in_outer
:
443 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
444 relevant
= vect_used_in_scope
;
447 case vect_used_in_scope
:
455 /* case 3b: inner-loop stmt defining an outer-loop stmt:
456 outer-loop-header-bb:
460 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
462 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
464 if (vect_print_dump_info (REPORT_DETAILS
))
465 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
469 case vect_unused_in_scope
:
470 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
471 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
472 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
475 case vect_used_by_reduction
:
476 relevant
= vect_used_in_outer_by_reduction
;
479 case vect_used_in_scope
:
480 relevant
= vect_used_in_outer
;
488 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
489 is_pattern_stmt_p (stmt_vinfo
));
494 /* Function vect_mark_stmts_to_be_vectorized.
496 Not all stmts in the loop need to be vectorized. For example:
505 Stmt 1 and 3 do not need to be vectorized, because loop control and
506 addressing of vectorized data-refs are handled differently.
508 This pass detects such stmts. */
511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
513 VEC(gimple
,heap
) *worklist
;
514 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
515 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
516 unsigned int nbbs
= loop
->num_nodes
;
517 gimple_stmt_iterator si
;
520 stmt_vec_info stmt_vinfo
;
524 enum vect_relevant relevant
, tmp_relevant
;
525 enum vect_def_type def_type
;
527 if (vect_print_dump_info (REPORT_DETAILS
))
528 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
530 worklist
= VEC_alloc (gimple
, heap
, 64);
532 /* 1. Init worklist. */
533 for (i
= 0; i
< nbbs
; i
++)
536 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
539 if (vect_print_dump_info (REPORT_DETAILS
))
541 fprintf (vect_dump
, "init: phi relevant? ");
542 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
545 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
546 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
548 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
550 stmt
= gsi_stmt (si
);
551 if (vect_print_dump_info (REPORT_DETAILS
))
553 fprintf (vect_dump
, "init: stmt relevant? ");
554 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
557 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
558 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
562 /* 2. Process_worklist */
563 while (VEC_length (gimple
, worklist
) > 0)
568 stmt
= VEC_pop (gimple
, worklist
);
569 if (vect_print_dump_info (REPORT_DETAILS
))
571 fprintf (vect_dump
, "worklist: examine stmt: ");
572 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
575 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576 (DEF_STMT) as relevant/irrelevant and live/dead according to the
577 liveness and relevance properties of STMT. */
578 stmt_vinfo
= vinfo_for_stmt (stmt
);
579 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
580 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
582 /* Generally, the liveness and relevance properties of STMT are
583 propagated as is to the DEF_STMTs of its USEs:
584 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
587 One exception is when STMT has been identified as defining a reduction
588 variable; in this case we set the liveness/relevance as follows:
590 relevant = vect_used_by_reduction
591 This is because we distinguish between two kinds of relevant stmts -
592 those that are used by a reduction computation, and those that are
593 (also) used by a regular computation. This allows us later on to
594 identify stmts that are used solely by a reduction, and therefore the
595 order of the results that they produce does not have to be kept. */
597 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
598 tmp_relevant
= relevant
;
601 case vect_reduction_def
:
602 switch (tmp_relevant
)
604 case vect_unused_in_scope
:
605 relevant
= vect_used_by_reduction
;
608 case vect_used_by_reduction
:
609 if (gimple_code (stmt
) == GIMPLE_PHI
)
614 if (vect_print_dump_info (REPORT_DETAILS
))
615 fprintf (vect_dump
, "unsupported use of reduction.");
617 VEC_free (gimple
, heap
, worklist
);
624 case vect_nested_cycle
:
625 if (tmp_relevant
!= vect_unused_in_scope
626 && tmp_relevant
!= vect_used_in_outer_by_reduction
627 && tmp_relevant
!= vect_used_in_outer
)
629 if (vect_print_dump_info (REPORT_DETAILS
))
630 fprintf (vect_dump
, "unsupported use of nested cycle.");
632 VEC_free (gimple
, heap
, worklist
);
639 case vect_double_reduction_def
:
640 if (tmp_relevant
!= vect_unused_in_scope
641 && tmp_relevant
!= vect_used_by_reduction
)
643 if (vect_print_dump_info (REPORT_DETAILS
))
644 fprintf (vect_dump
, "unsupported use of double reduction.");
646 VEC_free (gimple
, heap
, worklist
);
657 if (is_pattern_stmt_p (stmt_vinfo
))
659 /* Pattern statements are not inserted into the code, so
660 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661 have to scan the RHS or function arguments instead. */
662 if (is_gimple_assign (stmt
))
664 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
665 tree op
= gimple_assign_rhs1 (stmt
);
668 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
670 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
671 live_p
, relevant
, &worklist
, false)
672 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
673 live_p
, relevant
, &worklist
, false))
675 VEC_free (gimple
, heap
, worklist
);
680 for (; i
< gimple_num_ops (stmt
); i
++)
682 op
= gimple_op (stmt
, i
);
683 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
686 VEC_free (gimple
, heap
, worklist
);
691 else if (is_gimple_call (stmt
))
693 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
695 tree arg
= gimple_call_arg (stmt
, i
);
696 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
699 VEC_free (gimple
, heap
, worklist
);
706 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
708 tree op
= USE_FROM_PTR (use_p
);
709 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
712 VEC_free (gimple
, heap
, worklist
);
717 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
720 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
722 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
725 VEC_free (gimple
, heap
, worklist
);
729 } /* while worklist */
731 VEC_free (gimple
, heap
, worklist
);
736 /* Get cost by calling cost target builtin. */
739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost
)
741 tree dummy_type
= NULL
;
744 return targetm
.vectorize
.builtin_vectorization_cost (type_of_cost
,
749 /* Get cost for STMT. */
752 cost_for_stmt (gimple stmt
)
754 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
756 switch (STMT_VINFO_TYPE (stmt_info
))
758 case load_vec_info_type
:
759 return vect_get_stmt_cost (scalar_load
);
760 case store_vec_info_type
:
761 return vect_get_stmt_cost (scalar_store
);
762 case op_vec_info_type
:
763 case condition_vec_info_type
:
764 case assignment_vec_info_type
:
765 case reduc_vec_info_type
:
766 case induc_vec_info_type
:
767 case type_promotion_vec_info_type
:
768 case type_demotion_vec_info_type
:
769 case type_conversion_vec_info_type
:
770 case call_vec_info_type
:
771 return vect_get_stmt_cost (scalar_stmt
);
772 case undef_vec_info_type
:
778 /* Function vect_model_simple_cost.
780 Models cost for simple operations, i.e. those that only emit ncopies of a
781 single op. Right now, this does not account for multiple insns that could
782 be generated for the single vector op. We will handle that shortly. */
785 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
786 enum vect_def_type
*dt
, slp_tree slp_node
)
789 int inside_cost
= 0, outside_cost
= 0;
791 /* The SLP costs were already calculated during SLP tree build. */
792 if (PURE_SLP_STMT (stmt_info
))
795 inside_cost
= ncopies
* vect_get_stmt_cost (vector_stmt
);
797 /* FORNOW: Assuming maximum 2 args per stmts. */
798 for (i
= 0; i
< 2; i
++)
800 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
801 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
804 if (vect_print_dump_info (REPORT_COST
))
805 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
806 "outside_cost = %d .", inside_cost
, outside_cost
);
808 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
809 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
810 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
814 /* Model cost for type demotion and promotion operations. PWR is normally
815 zero for single-step promotions and demotions. It will be one if
816 two-step promotion/demotion is required, and so on. Each additional
817 step doubles the number of instructions required. */
820 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
821 enum vect_def_type
*dt
, int pwr
)
824 int inside_cost
= 0, outside_cost
= 0, single_stmt_cost
;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info
))
830 single_stmt_cost
= vect_get_stmt_cost (vec_promote_demote
);
831 for (i
= 0; i
< pwr
+ 1; i
++)
833 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
835 inside_cost
+= vect_pow2 (tmp
) * single_stmt_cost
;
838 /* FORNOW: Assuming maximum 2 args per stmts. */
839 for (i
= 0; i
< 2; i
++)
841 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
842 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
845 if (vect_print_dump_info (REPORT_COST
))
846 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
847 "outside_cost = %d .", inside_cost
, outside_cost
);
849 /* Set the costs in STMT_INFO. */
850 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, NULL
, inside_cost
);
851 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, NULL
, outside_cost
);
854 /* Function vect_cost_strided_group_size
856 For strided load or store, return the group_size only if it is the first
857 load or store of a group, else return 1. This ensures that group size is
858 only returned once per group. */
861 vect_cost_strided_group_size (stmt_vec_info stmt_info
)
863 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
865 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
866 return GROUP_SIZE (stmt_info
);
872 /* Function vect_model_store_cost
874 Models cost for stores. In the case of strided accesses, one access
875 has the overhead of the strided access attributed to it. */
878 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
879 bool store_lanes_p
, enum vect_def_type dt
,
883 unsigned int inside_cost
= 0, outside_cost
= 0;
884 struct data_reference
*first_dr
;
887 /* The SLP costs were already calculated during SLP tree build. */
888 if (PURE_SLP_STMT (stmt_info
))
891 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
892 outside_cost
= vect_get_stmt_cost (scalar_to_vec
);
894 /* Strided access? */
895 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
899 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
904 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
905 group_size
= vect_cost_strided_group_size (stmt_info
);
908 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
910 /* Not a strided access. */
914 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
917 /* We assume that the cost of a single store-lanes instruction is
918 equivalent to the cost of GROUP_SIZE separate stores. If a strided
919 access is instead being provided by a permute-and-store operation,
920 include the cost of the permutes. */
921 if (!store_lanes_p
&& group_size
> 1)
923 /* Uses a high and low interleave operation for each needed permute. */
924 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
925 * vect_get_stmt_cost (vec_perm
);
927 if (vect_print_dump_info (REPORT_COST
))
928 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
932 /* Costs of the stores. */
933 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
);
935 if (vect_print_dump_info (REPORT_COST
))
936 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
937 "outside_cost = %d .", inside_cost
, outside_cost
);
939 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
940 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
941 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
945 /* Calculate cost of DR's memory access. */
947 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
948 unsigned int *inside_cost
)
950 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
952 switch (alignment_support_scheme
)
956 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_store
);
958 if (vect_print_dump_info (REPORT_COST
))
959 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
964 case dr_unaligned_supported
:
966 gimple stmt
= DR_STMT (dr
);
967 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
968 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
970 /* Here, we assign an additional cost for the unaligned store. */
971 *inside_cost
+= ncopies
972 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_store
,
973 vectype
, DR_MISALIGNMENT (dr
));
975 if (vect_print_dump_info (REPORT_COST
))
976 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
988 /* Function vect_model_load_cost
990 Models cost for loads. In the case of strided accesses, the last access
991 has the overhead of the strided access attributed to it. Since unaligned
992 accesses are supported for loads, we also account for the costs of the
993 access scheme chosen. */
996 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, bool load_lanes_p
,
1001 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1002 unsigned int inside_cost
= 0, outside_cost
= 0;
1004 /* The SLP costs were already calculated during SLP tree build. */
1005 if (PURE_SLP_STMT (stmt_info
))
1008 /* Strided accesses? */
1009 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1012 group_size
= vect_cost_strided_group_size (stmt_info
);
1013 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1015 /* Not a strided access. */
1022 /* We assume that the cost of a single load-lanes instruction is
1023 equivalent to the cost of GROUP_SIZE separate loads. If a strided
1024 access is instead being provided by a load-and-permute operation,
1025 include the cost of the permutes. */
1026 if (!load_lanes_p
&& group_size
> 1)
1028 /* Uses an even and odd extract operations for each needed permute. */
1029 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
1030 * vect_get_stmt_cost (vec_perm
);
1032 if (vect_print_dump_info (REPORT_COST
))
1033 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1037 /* The loads themselves. */
1038 vect_get_load_cost (first_dr
, ncopies
,
1039 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info
)) || group_size
> 1
1041 &inside_cost
, &outside_cost
);
1043 if (vect_print_dump_info (REPORT_COST
))
1044 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1045 "outside_cost = %d .", inside_cost
, outside_cost
);
1047 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1048 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
1049 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
1053 /* Calculate cost of DR's memory access. */
1055 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1056 bool add_realign_cost
, unsigned int *inside_cost
,
1057 unsigned int *outside_cost
)
1059 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1061 switch (alignment_support_scheme
)
1065 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_load
);
1067 if (vect_print_dump_info (REPORT_COST
))
1068 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1072 case dr_unaligned_supported
:
1074 gimple stmt
= DR_STMT (dr
);
1075 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1076 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1078 /* Here, we assign an additional cost for the unaligned load. */
1079 *inside_cost
+= ncopies
1080 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_load
,
1081 vectype
, DR_MISALIGNMENT (dr
));
1082 if (vect_print_dump_info (REPORT_COST
))
1083 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1088 case dr_explicit_realign
:
1090 *inside_cost
+= ncopies
* (2 * vect_get_stmt_cost (vector_load
)
1091 + vect_get_stmt_cost (vec_perm
));
1093 /* FIXME: If the misalignment remains fixed across the iterations of
1094 the containing loop, the following cost should be added to the
1096 if (targetm
.vectorize
.builtin_mask_for_load
)
1097 *inside_cost
+= vect_get_stmt_cost (vector_stmt
);
1099 if (vect_print_dump_info (REPORT_COST
))
1100 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1104 case dr_explicit_realign_optimized
:
1106 if (vect_print_dump_info (REPORT_COST
))
1107 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1110 /* Unaligned software pipeline has a load of an address, an initial
1111 load, and possibly a mask operation to "prime" the loop. However,
1112 if this is an access in a group of loads, which provide strided
1113 access, then the above cost should only be considered for one
1114 access in the group. Inside the loop, there is a load op
1115 and a realignment op. */
1117 if (add_realign_cost
)
1119 *outside_cost
= 2 * vect_get_stmt_cost (vector_stmt
);
1120 if (targetm
.vectorize
.builtin_mask_for_load
)
1121 *outside_cost
+= vect_get_stmt_cost (vector_stmt
);
1124 *inside_cost
+= ncopies
* (vect_get_stmt_cost (vector_load
)
1125 + vect_get_stmt_cost (vec_perm
));
1127 if (vect_print_dump_info (REPORT_COST
))
1129 "vect_model_load_cost: explicit realign optimized");
1140 /* Function vect_init_vector.
1142 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1143 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1144 is not NULL. Otherwise, place the initialization at the loop preheader.
1145 Return the DEF of INIT_STMT.
1146 It will be used in the vectorization of STMT. */
1149 vect_init_vector (gimple stmt
, tree vector_var
, tree vector_type
,
1150 gimple_stmt_iterator
*gsi
)
1152 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1160 new_var
= vect_get_new_vect_var (vector_type
, vect_simple_var
, "cst_");
1161 add_referenced_var (new_var
);
1162 init_stmt
= gimple_build_assign (new_var
, vector_var
);
1163 new_temp
= make_ssa_name (new_var
, init_stmt
);
1164 gimple_assign_set_lhs (init_stmt
, new_temp
);
1167 vect_finish_stmt_generation (stmt
, init_stmt
, gsi
);
1170 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1174 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1176 if (nested_in_vect_loop_p (loop
, stmt
))
1179 pe
= loop_preheader_edge (loop
);
1180 new_bb
= gsi_insert_on_edge_immediate (pe
, init_stmt
);
1181 gcc_assert (!new_bb
);
1185 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1187 gimple_stmt_iterator gsi_bb_start
;
1189 gcc_assert (bb_vinfo
);
1190 bb
= BB_VINFO_BB (bb_vinfo
);
1191 gsi_bb_start
= gsi_after_labels (bb
);
1192 gsi_insert_before (&gsi_bb_start
, init_stmt
, GSI_SAME_STMT
);
1196 if (vect_print_dump_info (REPORT_DETAILS
))
1198 fprintf (vect_dump
, "created new init_stmt: ");
1199 print_gimple_stmt (vect_dump
, init_stmt
, 0, TDF_SLIM
);
1202 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1207 /* Function vect_get_vec_def_for_operand.
1209 OP is an operand in STMT. This function returns a (vector) def that will be
1210 used in the vectorized stmt for STMT.
1212 In the case that OP is an SSA_NAME which is defined in the loop, then
1213 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1215 In case OP is an invariant or constant, a new stmt that creates a vector def
1216 needs to be introduced. */
1219 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1224 stmt_vec_info def_stmt_info
= NULL
;
1225 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1226 unsigned int nunits
;
1227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1233 enum vect_def_type dt
;
1237 if (vect_print_dump_info (REPORT_DETAILS
))
1239 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1240 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1243 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1244 &def_stmt
, &def
, &dt
);
1245 gcc_assert (is_simple_use
);
1246 if (vect_print_dump_info (REPORT_DETAILS
))
1250 fprintf (vect_dump
, "def = ");
1251 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1255 fprintf (vect_dump
, " def_stmt = ");
1256 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1262 /* Case 1: operand is a constant. */
1263 case vect_constant_def
:
1265 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1266 gcc_assert (vector_type
);
1267 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1272 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1273 if (vect_print_dump_info (REPORT_DETAILS
))
1274 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1276 vec_cst
= build_vector_from_val (vector_type
,
1277 fold_convert (TREE_TYPE (vector_type
),
1279 return vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
1282 /* Case 2: operand is defined outside the loop - loop invariant. */
1283 case vect_external_def
:
1285 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1286 gcc_assert (vector_type
);
1287 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1292 /* Create 'vec_inv = {inv,inv,..,inv}' */
1293 if (vect_print_dump_info (REPORT_DETAILS
))
1294 fprintf (vect_dump
, "Create vector_inv.");
1296 for (i
= nunits
- 1; i
>= 0; --i
)
1298 t
= tree_cons (NULL_TREE
, def
, t
);
1301 /* FIXME: use build_constructor directly. */
1302 vec_inv
= build_constructor_from_list (vector_type
, t
);
1303 return vect_init_vector (stmt
, vec_inv
, vector_type
, NULL
);
1306 /* Case 3: operand is defined inside the loop. */
1307 case vect_internal_def
:
1310 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1312 /* Get the def from the vectorized stmt. */
1313 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1315 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1316 /* Get vectorized pattern statement. */
1318 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1319 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1320 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1321 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1322 gcc_assert (vec_stmt
);
1323 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1324 vec_oprnd
= PHI_RESULT (vec_stmt
);
1325 else if (is_gimple_call (vec_stmt
))
1326 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1328 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1332 /* Case 4: operand is defined by a loop header phi - reduction */
1333 case vect_reduction_def
:
1334 case vect_double_reduction_def
:
1335 case vect_nested_cycle
:
1339 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1340 loop
= (gimple_bb (def_stmt
))->loop_father
;
1342 /* Get the def before the loop */
1343 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1344 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1347 /* Case 5: operand is defined by loop-header phi - induction. */
1348 case vect_induction_def
:
1350 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1352 /* Get the def from the vectorized stmt. */
1353 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1354 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1355 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1356 vec_oprnd
= PHI_RESULT (vec_stmt
);
1358 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1368 /* Function vect_get_vec_def_for_stmt_copy
1370 Return a vector-def for an operand. This function is used when the
1371 vectorized stmt to be created (by the caller to this function) is a "copy"
1372 created in case the vectorized result cannot fit in one vector, and several
1373 copies of the vector-stmt are required. In this case the vector-def is
1374 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1375 of the stmt that defines VEC_OPRND.
1376 DT is the type of the vector def VEC_OPRND.
1379 In case the vectorization factor (VF) is bigger than the number
1380 of elements that can fit in a vectype (nunits), we have to generate
1381 more than one vector stmt to vectorize the scalar stmt. This situation
1382 arises when there are multiple data-types operated upon in the loop; the
1383 smallest data-type determines the VF, and as a result, when vectorizing
1384 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1385 vector stmt (each computing a vector of 'nunits' results, and together
1386 computing 'VF' results in each iteration). This function is called when
1387 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1388 which VF=16 and nunits=4, so the number of copies required is 4):
1390 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1392 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1393 VS1.1: vx.1 = memref1 VS1.2
1394 VS1.2: vx.2 = memref2 VS1.3
1395 VS1.3: vx.3 = memref3
1397 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1398 VSnew.1: vz1 = vx.1 + ... VSnew.2
1399 VSnew.2: vz2 = vx.2 + ... VSnew.3
1400 VSnew.3: vz3 = vx.3 + ...
1402 The vectorization of S1 is explained in vectorizable_load.
1403 The vectorization of S2:
1404 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1405 the function 'vect_get_vec_def_for_operand' is called to
1406 get the relevant vector-def for each operand of S2. For operand x it
1407 returns the vector-def 'vx.0'.
1409 To create the remaining copies of the vector-stmt (VSnew.j), this
1410 function is called to get the relevant vector-def for each operand. It is
1411 obtained from the respective VS1.j stmt, which is recorded in the
1412 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1414 For example, to obtain the vector-def 'vx.1' in order to create the
1415 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1416 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1417 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1418 and return its def ('vx.1').
1419 Overall, to create the above sequence this function will be called 3 times:
1420 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1421 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1422 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1425 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1427 gimple vec_stmt_for_operand
;
1428 stmt_vec_info def_stmt_info
;
1430 /* Do nothing; can reuse same def. */
1431 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1434 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1435 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1436 gcc_assert (def_stmt_info
);
1437 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1438 gcc_assert (vec_stmt_for_operand
);
1439 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1440 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1441 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1443 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1448 /* Get vectorized definitions for the operands to create a copy of an original
1449 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1452 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1453 VEC(tree
,heap
) **vec_oprnds0
,
1454 VEC(tree
,heap
) **vec_oprnds1
)
1456 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1458 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1459 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1461 if (vec_oprnds1
&& *vec_oprnds1
)
1463 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1464 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1465 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1470 /* Get vectorized definitions for OP0 and OP1.
1471 REDUC_INDEX is the index of reduction operand in case of reduction,
1472 and -1 otherwise. */
1475 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1476 VEC (tree
, heap
) **vec_oprnds0
,
1477 VEC (tree
, heap
) **vec_oprnds1
,
1478 slp_tree slp_node
, int reduc_index
)
1482 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1483 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1484 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1486 VEC_quick_push (tree
, ops
, op0
);
1488 VEC_quick_push (tree
, ops
, op1
);
1490 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1492 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1494 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1496 VEC_free (tree
, heap
, ops
);
1497 VEC_free (slp_void_p
, heap
, vec_defs
);
1503 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1504 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1505 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1509 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1510 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1511 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1517 /* Function vect_finish_stmt_generation.
1519 Insert a new stmt. */
1522 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1523 gimple_stmt_iterator
*gsi
)
1525 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1526 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1527 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1529 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1531 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1533 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1536 if (vect_print_dump_info (REPORT_DETAILS
))
1538 fprintf (vect_dump
, "add new stmt: ");
1539 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1542 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1545 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1546 a function declaration if the target has a vectorized version
1547 of the function, or NULL_TREE if the function cannot be vectorized. */
1550 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1552 tree fndecl
= gimple_call_fndecl (call
);
1554 /* We only handle functions that do not read or clobber memory -- i.e.
1555 const or novops ones. */
1556 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1560 || TREE_CODE (fndecl
) != FUNCTION_DECL
1561 || !DECL_BUILT_IN (fndecl
))
1564 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1568 /* Function vectorizable_call.
1570 Check if STMT performs a function call that can be vectorized.
1571 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1572 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1573 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1576 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1582 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1583 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1584 tree vectype_out
, vectype_in
;
1587 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1588 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1589 tree fndecl
, new_temp
, def
, rhs_type
;
1591 enum vect_def_type dt
[3]
1592 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1593 gimple new_stmt
= NULL
;
1595 VEC(tree
, heap
) *vargs
= NULL
;
1596 enum { NARROW
, NONE
, WIDEN
} modifier
;
1600 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1603 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1606 /* Is STMT a vectorizable call? */
1607 if (!is_gimple_call (stmt
))
1610 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1613 if (stmt_can_throw_internal (stmt
))
1616 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1618 /* Process function arguments. */
1619 rhs_type
= NULL_TREE
;
1620 vectype_in
= NULL_TREE
;
1621 nargs
= gimple_call_num_args (stmt
);
1623 /* Bail out if the function has more than three arguments, we do not have
1624 interesting builtin functions to vectorize with more than two arguments
1625 except for fma. No arguments is also not good. */
1626 if (nargs
== 0 || nargs
> 3)
1629 for (i
= 0; i
< nargs
; i
++)
1633 op
= gimple_call_arg (stmt
, i
);
1635 /* We can only handle calls with arguments of the same type. */
1637 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1639 if (vect_print_dump_info (REPORT_DETAILS
))
1640 fprintf (vect_dump
, "argument types differ.");
1644 rhs_type
= TREE_TYPE (op
);
1646 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1647 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1649 if (vect_print_dump_info (REPORT_DETAILS
))
1650 fprintf (vect_dump
, "use not simple.");
1655 vectype_in
= opvectype
;
1657 && opvectype
!= vectype_in
)
1659 if (vect_print_dump_info (REPORT_DETAILS
))
1660 fprintf (vect_dump
, "argument vector types differ.");
1664 /* If all arguments are external or constant defs use a vector type with
1665 the same size as the output vector type. */
1667 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1669 gcc_assert (vectype_in
);
1672 if (vect_print_dump_info (REPORT_DETAILS
))
1674 fprintf (vect_dump
, "no vectype for scalar type ");
1675 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1682 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1683 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1684 if (nunits_in
== nunits_out
/ 2)
1686 else if (nunits_out
== nunits_in
)
1688 else if (nunits_out
== nunits_in
/ 2)
1693 /* For now, we only vectorize functions if a target specific builtin
1694 is available. TODO -- in some cases, it might be profitable to
1695 insert the calls for pieces of the vector, in order to be able
1696 to vectorize other operations in the loop. */
1697 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1698 if (fndecl
== NULL_TREE
)
1700 if (vect_print_dump_info (REPORT_DETAILS
))
1701 fprintf (vect_dump
, "function is not vectorizable.");
1706 gcc_assert (!gimple_vuse (stmt
));
1708 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1710 else if (modifier
== NARROW
)
1711 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1713 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1715 /* Sanity check: make sure that at least one copy of the vectorized stmt
1716 needs to be generated. */
1717 gcc_assert (ncopies
>= 1);
1719 if (!vec_stmt
) /* transformation not required. */
1721 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1722 if (vect_print_dump_info (REPORT_DETAILS
))
1723 fprintf (vect_dump
, "=== vectorizable_call ===");
1724 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
1730 if (vect_print_dump_info (REPORT_DETAILS
))
1731 fprintf (vect_dump
, "transform call.");
1734 scalar_dest
= gimple_call_lhs (stmt
);
1735 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1737 prev_stmt_info
= NULL
;
1741 for (j
= 0; j
< ncopies
; ++j
)
1743 /* Build argument list for the vectorized call. */
1745 vargs
= VEC_alloc (tree
, heap
, nargs
);
1747 VEC_truncate (tree
, vargs
, 0);
1751 VEC (slp_void_p
, heap
) *vec_defs
1752 = VEC_alloc (slp_void_p
, heap
, nargs
);
1753 VEC (tree
, heap
) *vec_oprnds0
;
1755 for (i
= 0; i
< nargs
; i
++)
1756 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1757 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1759 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1761 /* Arguments are ready. Create the new vector stmt. */
1762 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1765 for (k
= 0; k
< nargs
; k
++)
1767 VEC (tree
, heap
) *vec_oprndsk
1768 = (VEC (tree
, heap
) *)
1769 VEC_index (slp_void_p
, vec_defs
, k
);
1770 VEC_replace (tree
, vargs
, k
,
1771 VEC_index (tree
, vec_oprndsk
, i
));
1773 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1774 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1775 gimple_call_set_lhs (new_stmt
, new_temp
);
1776 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1777 mark_symbols_for_renaming (new_stmt
);
1778 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1782 for (i
= 0; i
< nargs
; i
++)
1784 VEC (tree
, heap
) *vec_oprndsi
1785 = (VEC (tree
, heap
) *)
1786 VEC_index (slp_void_p
, vec_defs
, i
);
1787 VEC_free (tree
, heap
, vec_oprndsi
);
1789 VEC_free (slp_void_p
, heap
, vec_defs
);
1793 for (i
= 0; i
< nargs
; i
++)
1795 op
= gimple_call_arg (stmt
, i
);
1798 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1801 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1803 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1806 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1809 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1810 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1811 gimple_call_set_lhs (new_stmt
, new_temp
);
1813 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1814 mark_symbols_for_renaming (new_stmt
);
1817 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1819 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1821 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1827 for (j
= 0; j
< ncopies
; ++j
)
1829 /* Build argument list for the vectorized call. */
1831 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1833 VEC_truncate (tree
, vargs
, 0);
1837 VEC (slp_void_p
, heap
) *vec_defs
1838 = VEC_alloc (slp_void_p
, heap
, nargs
);
1839 VEC (tree
, heap
) *vec_oprnds0
;
1841 for (i
= 0; i
< nargs
; i
++)
1842 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1843 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1845 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1847 /* Arguments are ready. Create the new vector stmt. */
1848 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1852 VEC_truncate (tree
, vargs
, 0);
1853 for (k
= 0; k
< nargs
; k
++)
1855 VEC (tree
, heap
) *vec_oprndsk
1856 = (VEC (tree
, heap
) *)
1857 VEC_index (slp_void_p
, vec_defs
, k
);
1858 VEC_quick_push (tree
, vargs
,
1859 VEC_index (tree
, vec_oprndsk
, i
));
1860 VEC_quick_push (tree
, vargs
,
1861 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1863 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1864 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1865 gimple_call_set_lhs (new_stmt
, new_temp
);
1866 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1867 mark_symbols_for_renaming (new_stmt
);
1868 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1872 for (i
= 0; i
< nargs
; i
++)
1874 VEC (tree
, heap
) *vec_oprndsi
1875 = (VEC (tree
, heap
) *)
1876 VEC_index (slp_void_p
, vec_defs
, i
);
1877 VEC_free (tree
, heap
, vec_oprndsi
);
1879 VEC_free (slp_void_p
, heap
, vec_defs
);
1883 for (i
= 0; i
< nargs
; i
++)
1885 op
= gimple_call_arg (stmt
, i
);
1889 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1891 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1895 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1897 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1899 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1902 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1903 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1906 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1907 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1908 gimple_call_set_lhs (new_stmt
, new_temp
);
1910 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1911 mark_symbols_for_renaming (new_stmt
);
1914 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1916 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1918 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1921 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1926 /* No current target implements this case. */
1930 VEC_free (tree
, heap
, vargs
);
1932 /* Update the exception handling table with the vector stmt if necessary. */
1933 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
1934 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
1936 /* The call in STMT might prevent it from being removed in dce.
1937 We however cannot remove it here, due to the way the ssa name
1938 it defines is mapped to the new definition. So just replace
1939 rhs of the statement with something harmless. */
1944 type
= TREE_TYPE (scalar_dest
);
1945 if (is_pattern_stmt_p (stmt_info
))
1946 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
1948 lhs
= gimple_call_lhs (stmt
);
1949 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
1950 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1951 set_vinfo_for_stmt (stmt
, NULL
);
1952 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1953 gsi_replace (gsi
, new_stmt
, false);
1954 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
1960 /* Function vect_gen_widened_results_half
1962 Create a vector stmt whose code, type, number of arguments, and result
1963 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1964 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1965 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1966 needs to be created (DECL is a function-decl of a target-builtin).
1967 STMT is the original scalar stmt that we are vectorizing. */
1970 vect_gen_widened_results_half (enum tree_code code
,
1972 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
1973 tree vec_dest
, gimple_stmt_iterator
*gsi
,
1979 /* Generate half of the widened result: */
1980 if (code
== CALL_EXPR
)
1982 /* Target specific support */
1983 if (op_type
== binary_op
)
1984 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
1986 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
1987 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1988 gimple_call_set_lhs (new_stmt
, new_temp
);
1992 /* Generic support */
1993 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
1994 if (op_type
!= binary_op
)
1996 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
1998 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1999 gimple_assign_set_lhs (new_stmt
, new_temp
);
2001 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2007 /* Get vectorized definitions for loop-based vectorization. For the first
2008 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2009 scalar operand), and for the rest we get a copy with
2010 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2012 The vectors are collected into VEC_OPRNDS. */
2015 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2016 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2020 /* Get first vector operand. */
2021 /* All the vector operands except the very first one (that is scalar oprnd)
2023 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2024 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2026 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2028 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2030 /* Get second vector operand. */
2031 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2032 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2036 /* For conversion in multiple steps, continue to get operands
2039 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2044 For multi-step conversions store the resulting vectors and call the function
2048 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2049 int multi_step_cvt
, gimple stmt
,
2050 VEC (tree
, heap
) *vec_dsts
,
2051 gimple_stmt_iterator
*gsi
,
2052 slp_tree slp_node
, enum tree_code code
,
2053 stmt_vec_info
*prev_stmt_info
)
2056 tree vop0
, vop1
, new_tmp
, vec_dest
;
2058 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2060 vec_dest
= VEC_pop (tree
, vec_dsts
);
2062 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2064 /* Create demotion operation. */
2065 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2066 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2067 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2068 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2069 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2070 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2073 /* Store the resulting vector for next recursive call. */
2074 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2077 /* This is the last step of the conversion sequence. Store the
2078 vectors in SLP_NODE or in vector info of the scalar statement
2079 (or in STMT_VINFO_RELATED_STMT chain). */
2081 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2084 if (!*prev_stmt_info
)
2085 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2087 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2089 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2094 /* For multi-step demotion operations we first generate demotion operations
2095 from the source type to the intermediate types, and then combine the
2096 results (stored in VEC_OPRNDS) in demotion operation to the destination
2100 /* At each level of recursion we have half of the operands we had at the
2102 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2103 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2104 stmt
, vec_dsts
, gsi
, slp_node
,
2105 VEC_PACK_TRUNC_EXPR
,
2109 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2115 the resulting vectors and call the function recursively. */
2118 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2119 VEC (tree
, heap
) **vec_oprnds1
,
2120 gimple stmt
, tree vec_dest
,
2121 gimple_stmt_iterator
*gsi
,
2122 enum tree_code code1
,
2123 enum tree_code code2
, tree decl1
,
2124 tree decl2
, int op_type
)
2127 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2128 gimple new_stmt1
, new_stmt2
;
2129 VEC (tree
, heap
) *vec_tmp
= NULL
;
2131 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2132 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2134 if (op_type
== binary_op
)
2135 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2139 /* Generate the two halves of promotion operation. */
2140 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2141 op_type
, vec_dest
, gsi
, stmt
);
2142 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2143 op_type
, vec_dest
, gsi
, stmt
);
2144 if (is_gimple_call (new_stmt1
))
2146 new_tmp1
= gimple_call_lhs (new_stmt1
);
2147 new_tmp2
= gimple_call_lhs (new_stmt2
);
2151 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2152 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2155 /* Store the results for the next step. */
2156 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2157 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2160 VEC_free (tree
, heap
, *vec_oprnds0
);
2161 *vec_oprnds0
= vec_tmp
;
2165 /* Check if STMT performs a conversion operation, that can be vectorized.
2166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2167 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2171 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2172 gimple
*vec_stmt
, slp_tree slp_node
)
2176 tree op0
, op1
= NULL_TREE
;
2177 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2178 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2179 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2180 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2181 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2182 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2186 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2187 gimple new_stmt
= NULL
;
2188 stmt_vec_info prev_stmt_info
;
2191 tree vectype_out
, vectype_in
;
2193 tree lhs_type
, rhs_type
;
2194 enum { NARROW
, NONE
, WIDEN
} modifier
;
2195 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2197 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2198 int multi_step_cvt
= 0;
2199 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2200 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2202 enum machine_mode rhs_mode
;
2203 unsigned short fltsz
;
2205 /* Is STMT a vectorizable conversion? */
2207 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2210 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2213 if (!is_gimple_assign (stmt
))
2216 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2219 code
= gimple_assign_rhs_code (stmt
);
2220 if (!CONVERT_EXPR_CODE_P (code
)
2221 && code
!= FIX_TRUNC_EXPR
2222 && code
!= FLOAT_EXPR
2223 && code
!= WIDEN_MULT_EXPR
2224 && code
!= WIDEN_LSHIFT_EXPR
)
2227 op_type
= TREE_CODE_LENGTH (code
);
2229 /* Check types of lhs and rhs. */
2230 scalar_dest
= gimple_assign_lhs (stmt
);
2231 lhs_type
= TREE_TYPE (scalar_dest
);
2232 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2234 op0
= gimple_assign_rhs1 (stmt
);
2235 rhs_type
= TREE_TYPE (op0
);
2237 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2238 && !((INTEGRAL_TYPE_P (lhs_type
)
2239 && INTEGRAL_TYPE_P (rhs_type
))
2240 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2241 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2244 if ((INTEGRAL_TYPE_P (lhs_type
)
2245 && (TYPE_PRECISION (lhs_type
)
2246 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2247 || (INTEGRAL_TYPE_P (rhs_type
)
2248 && (TYPE_PRECISION (rhs_type
)
2249 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2251 if (vect_print_dump_info (REPORT_DETAILS
))
2253 "type conversion to/from bit-precision unsupported.");
2257 /* Check the operands of the operation. */
2258 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2259 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2261 if (vect_print_dump_info (REPORT_DETAILS
))
2262 fprintf (vect_dump
, "use not simple.");
2265 if (op_type
== binary_op
)
2269 op1
= gimple_assign_rhs2 (stmt
);
2270 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2271 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2273 if (CONSTANT_CLASS_P (op0
))
2274 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, NULL
,
2275 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2277 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, NULL
, &def_stmt
,
2282 if (vect_print_dump_info (REPORT_DETAILS
))
2283 fprintf (vect_dump
, "use not simple.");
2288 /* If op0 is an external or constant defs use a vector type of
2289 the same size as the output vector type. */
2291 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2293 gcc_assert (vectype_in
);
2296 if (vect_print_dump_info (REPORT_DETAILS
))
2298 fprintf (vect_dump
, "no vectype for scalar type ");
2299 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2305 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2306 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2307 if (nunits_in
< nunits_out
)
2309 else if (nunits_out
== nunits_in
)
2314 /* Multiple types in SLP are handled by creating the appropriate number of
2315 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2317 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2319 else if (modifier
== NARROW
)
2320 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2322 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2324 /* Sanity check: make sure that at least one copy of the vectorized stmt
2325 needs to be generated. */
2326 gcc_assert (ncopies
>= 1);
2328 /* Supportable by target? */
2332 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2334 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2339 if (vect_print_dump_info (REPORT_DETAILS
))
2340 fprintf (vect_dump
, "conversion not supported by target.");
2344 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2345 &decl1
, &decl2
, &code1
, &code2
,
2346 &multi_step_cvt
, &interm_types
))
2348 /* Binary widening operation can only be supported directly by the
2350 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2354 if (code
!= FLOAT_EXPR
2355 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2356 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2359 rhs_mode
= TYPE_MODE (rhs_type
);
2360 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2361 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2362 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2363 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2367 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2368 if (cvt_type
== NULL_TREE
)
2371 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2373 if (!supportable_convert_operation (code
, vectype_out
,
2374 cvt_type
, &decl1
, &codecvt1
))
2377 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2378 cvt_type
, &decl1
, &decl2
,
2379 &codecvt1
, &codecvt2
,
2384 gcc_assert (multi_step_cvt
== 0);
2386 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2387 vectype_in
, NULL
, NULL
, &code1
,
2388 &code2
, &multi_step_cvt
,
2393 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2396 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2397 codecvt2
= ERROR_MARK
;
2401 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2402 cvt_type
= NULL_TREE
;
2407 gcc_assert (op_type
== unary_op
);
2408 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2409 &code1
, &multi_step_cvt
,
2413 if (code
!= FIX_TRUNC_EXPR
2414 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2415 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2418 rhs_mode
= TYPE_MODE (rhs_type
);
2420 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2421 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2422 if (cvt_type
== NULL_TREE
)
2424 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2427 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2428 &code1
, &multi_step_cvt
,
2437 if (!vec_stmt
) /* transformation not required. */
2439 if (vect_print_dump_info (REPORT_DETAILS
))
2440 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2441 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2443 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2444 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2446 else if (modifier
== NARROW
)
2448 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2449 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2453 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2454 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2456 VEC_free (tree
, heap
, interm_types
);
2461 if (vect_print_dump_info (REPORT_DETAILS
))
2462 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2464 if (op_type
== binary_op
)
2466 if (CONSTANT_CLASS_P (op0
))
2467 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2468 else if (CONSTANT_CLASS_P (op1
))
2469 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2472 /* In case of multi-step conversion, we first generate conversion operations
2473 to the intermediate types, and then from that types to the final one.
2474 We create vector destinations for the intermediate type (TYPES) received
2475 from supportable_*_operation, and store them in the correct order
2476 for future use in vect_create_vectorized_*_stmts (). */
2477 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2478 vec_dest
= vect_create_destination_var (scalar_dest
,
2479 (cvt_type
&& modifier
== WIDEN
)
2480 ? cvt_type
: vectype_out
);
2481 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2485 for (i
= VEC_length (tree
, interm_types
) - 1;
2486 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2488 vec_dest
= vect_create_destination_var (scalar_dest
,
2490 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2495 vec_dest
= vect_create_destination_var (scalar_dest
,
2497 ? vectype_out
: cvt_type
);
2501 if (modifier
== NONE
)
2502 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2503 else if (modifier
== WIDEN
)
2505 vec_oprnds0
= VEC_alloc (tree
, heap
,
2507 ? vect_pow2 (multi_step_cvt
) : 1));
2508 if (op_type
== binary_op
)
2509 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2512 vec_oprnds0
= VEC_alloc (tree
, heap
,
2514 ? vect_pow2 (multi_step_cvt
) : 1));
2516 else if (code
== WIDEN_LSHIFT_EXPR
)
2517 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2520 prev_stmt_info
= NULL
;
2524 for (j
= 0; j
< ncopies
; j
++)
2527 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2530 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2532 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2534 /* Arguments are ready, create the new vector stmt. */
2535 if (code1
== CALL_EXPR
)
2537 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2538 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2539 gimple_call_set_lhs (new_stmt
, new_temp
);
2543 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2544 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2546 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2547 gimple_assign_set_lhs (new_stmt
, new_temp
);
2550 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2552 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2557 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2560 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2565 /* In case the vectorization factor (VF) is bigger than the number
2566 of elements that we can fit in a vectype (nunits), we have to
2567 generate more than one vector stmt - i.e - we need to "unroll"
2568 the vector stmt by a factor VF/nunits. */
2569 for (j
= 0; j
< ncopies
; j
++)
2576 if (code
== WIDEN_LSHIFT_EXPR
)
2581 /* Store vec_oprnd1 for every vector stmt to be created
2582 for SLP_NODE. We check during the analysis that all
2583 the shift arguments are the same. */
2584 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2585 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2587 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2591 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2592 &vec_oprnds1
, slp_node
, -1);
2596 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2597 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2598 if (op_type
== binary_op
)
2600 if (code
== WIDEN_LSHIFT_EXPR
)
2603 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2605 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2611 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2612 VEC_truncate (tree
, vec_oprnds0
, 0);
2613 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2614 if (op_type
== binary_op
)
2616 if (code
== WIDEN_LSHIFT_EXPR
)
2619 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2621 VEC_truncate (tree
, vec_oprnds1
, 0);
2622 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2626 /* Arguments are ready. Create the new vector stmts. */
2627 for (i
= multi_step_cvt
; i
>= 0; i
--)
2629 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2630 enum tree_code c1
= code1
, c2
= code2
;
2631 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2636 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2638 stmt
, this_dest
, gsi
,
2639 c1
, c2
, decl1
, decl2
,
2643 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2647 if (codecvt1
== CALL_EXPR
)
2649 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2650 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2651 gimple_call_set_lhs (new_stmt
, new_temp
);
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2656 new_temp
= make_ssa_name (vec_dest
, NULL
);
2657 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2662 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2665 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2668 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2672 if (!prev_stmt_info
)
2673 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2675 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2676 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2681 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2685 /* In case the vectorization factor (VF) is bigger than the number
2686 of elements that we can fit in a vectype (nunits), we have to
2687 generate more than one vector stmt - i.e - we need to "unroll"
2688 the vector stmt by a factor VF/nunits. */
2689 for (j
= 0; j
< ncopies
; j
++)
2693 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2697 VEC_truncate (tree
, vec_oprnds0
, 0);
2698 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2699 vect_pow2 (multi_step_cvt
) - 1);
2702 /* Arguments are ready. Create the new vector stmts. */
2704 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2706 if (codecvt1
== CALL_EXPR
)
2708 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2709 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2710 gimple_call_set_lhs (new_stmt
, new_temp
);
2714 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2715 new_temp
= make_ssa_name (vec_dest
, NULL
);
2716 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2721 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2724 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2725 stmt
, vec_dsts
, gsi
,
2730 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2734 VEC_free (tree
, heap
, vec_oprnds0
);
2735 VEC_free (tree
, heap
, vec_oprnds1
);
2736 VEC_free (tree
, heap
, vec_dsts
);
2737 VEC_free (tree
, heap
, interm_types
);
2743 /* Function vectorizable_assignment.
2745 Check if STMT performs an assignment (copy) that can be vectorized.
2746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2747 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2751 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2752 gimple
*vec_stmt
, slp_tree slp_node
)
2757 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2758 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2759 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2763 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2764 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2767 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2769 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2770 gimple new_stmt
= NULL
;
2771 stmt_vec_info prev_stmt_info
= NULL
;
2772 enum tree_code code
;
2775 /* Multiple types in SLP are handled by creating the appropriate number of
2776 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2778 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2781 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2783 gcc_assert (ncopies
>= 1);
2785 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2788 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2791 /* Is vectorizable assignment? */
2792 if (!is_gimple_assign (stmt
))
2795 scalar_dest
= gimple_assign_lhs (stmt
);
2796 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2799 code
= gimple_assign_rhs_code (stmt
);
2800 if (gimple_assign_single_p (stmt
)
2801 || code
== PAREN_EXPR
2802 || CONVERT_EXPR_CODE_P (code
))
2803 op
= gimple_assign_rhs1 (stmt
);
2807 if (code
== VIEW_CONVERT_EXPR
)
2808 op
= TREE_OPERAND (op
, 0);
2810 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2811 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2813 if (vect_print_dump_info (REPORT_DETAILS
))
2814 fprintf (vect_dump
, "use not simple.");
2818 /* We can handle NOP_EXPR conversions that do not change the number
2819 of elements or the vector size. */
2820 if ((CONVERT_EXPR_CODE_P (code
)
2821 || code
== VIEW_CONVERT_EXPR
)
2823 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2824 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2825 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2828 /* We do not handle bit-precision changes. */
2829 if ((CONVERT_EXPR_CODE_P (code
)
2830 || code
== VIEW_CONVERT_EXPR
)
2831 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2832 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2833 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2834 || ((TYPE_PRECISION (TREE_TYPE (op
))
2835 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2836 /* But a conversion that does not change the bit-pattern is ok. */
2837 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2838 > TYPE_PRECISION (TREE_TYPE (op
)))
2839 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2841 if (vect_print_dump_info (REPORT_DETAILS
))
2842 fprintf (vect_dump
, "type conversion to/from bit-precision "
2847 if (!vec_stmt
) /* transformation not required. */
2849 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2850 if (vect_print_dump_info (REPORT_DETAILS
))
2851 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2852 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2857 if (vect_print_dump_info (REPORT_DETAILS
))
2858 fprintf (vect_dump
, "transform assignment.");
2861 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2864 for (j
= 0; j
< ncopies
; j
++)
2868 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2870 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2872 /* Arguments are ready. create the new vector stmt. */
2873 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2875 if (CONVERT_EXPR_CODE_P (code
)
2876 || code
== VIEW_CONVERT_EXPR
)
2877 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2878 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2879 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2880 gimple_assign_set_lhs (new_stmt
, new_temp
);
2881 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2883 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2890 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2892 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2894 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2897 VEC_free (tree
, heap
, vec_oprnds
);
2902 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2903 either as shift by a scalar or by a vector. */
2906 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2909 enum machine_mode vec_mode
;
2914 vectype
= get_vectype_for_scalar_type (scalar_type
);
2918 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2920 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2922 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2924 || (optab_handler (optab
, TYPE_MODE (vectype
))
2925 == CODE_FOR_nothing
))
2929 vec_mode
= TYPE_MODE (vectype
);
2930 icode
= (int) optab_handler (optab
, vec_mode
);
2931 if (icode
== CODE_FOR_nothing
)
2938 /* Function vectorizable_shift.
2940 Check if STMT performs a shift operation that can be vectorized.
2941 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2942 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2943 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2946 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
2947 gimple
*vec_stmt
, slp_tree slp_node
)
2951 tree op0
, op1
= NULL
;
2952 tree vec_oprnd1
= NULL_TREE
;
2953 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2955 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2956 enum tree_code code
;
2957 enum machine_mode vec_mode
;
2961 enum machine_mode optab_op2_mode
;
2964 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2965 gimple new_stmt
= NULL
;
2966 stmt_vec_info prev_stmt_info
;
2973 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2976 bool scalar_shift_arg
= true;
2977 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2980 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2983 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2986 /* Is STMT a vectorizable binary/unary operation? */
2987 if (!is_gimple_assign (stmt
))
2990 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2993 code
= gimple_assign_rhs_code (stmt
);
2995 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
2996 || code
== RROTATE_EXPR
))
2999 scalar_dest
= gimple_assign_lhs (stmt
);
3000 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3001 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3004 if (vect_print_dump_info (REPORT_DETAILS
))
3005 fprintf (vect_dump
, "bit-precision shifts not supported.");
3009 op0
= gimple_assign_rhs1 (stmt
);
3010 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3011 &def_stmt
, &def
, &dt
[0], &vectype
))
3013 if (vect_print_dump_info (REPORT_DETAILS
))
3014 fprintf (vect_dump
, "use not simple.");
3017 /* If op0 is an external or constant def use a vector type with
3018 the same size as the output vector type. */
3020 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3022 gcc_assert (vectype
);
3025 if (vect_print_dump_info (REPORT_DETAILS
))
3027 fprintf (vect_dump
, "no vectype for scalar type ");
3028 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3034 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3035 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3036 if (nunits_out
!= nunits_in
)
3039 op1
= gimple_assign_rhs2 (stmt
);
3040 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3041 &def
, &dt
[1], &op1_vectype
))
3043 if (vect_print_dump_info (REPORT_DETAILS
))
3044 fprintf (vect_dump
, "use not simple.");
3049 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3056 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3059 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3061 gcc_assert (ncopies
>= 1);
3063 /* Determine whether the shift amount is a vector, or scalar. If the
3064 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3066 if (dt
[1] == vect_internal_def
&& !slp_node
)
3067 scalar_shift_arg
= false;
3068 else if (dt
[1] == vect_constant_def
3069 || dt
[1] == vect_external_def
3070 || dt
[1] == vect_internal_def
)
3072 /* In SLP, need to check whether the shift count is the same,
3073 in loops if it is a constant or invariant, it is always
3077 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3080 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3081 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3082 scalar_shift_arg
= false;
3087 if (vect_print_dump_info (REPORT_DETAILS
))
3088 fprintf (vect_dump
, "operand mode requires invariant argument.");
3092 /* Vector shifted by vector. */
3093 if (!scalar_shift_arg
)
3095 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3096 if (vect_print_dump_info (REPORT_DETAILS
))
3097 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3099 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3100 if (op1_vectype
== NULL_TREE
3101 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3103 if (vect_print_dump_info (REPORT_DETAILS
))
3104 fprintf (vect_dump
, "unusable type for last operand in"
3105 " vector/vector shift/rotate.");
3109 /* See if the machine has a vector shifted by scalar insn and if not
3110 then see if it has a vector shifted by vector insn. */
3113 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3115 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3117 if (vect_print_dump_info (REPORT_DETAILS
))
3118 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3122 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3124 && (optab_handler (optab
, TYPE_MODE (vectype
))
3125 != CODE_FOR_nothing
))
3127 scalar_shift_arg
= false;
3129 if (vect_print_dump_info (REPORT_DETAILS
))
3130 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3132 /* Unlike the other binary operators, shifts/rotates have
3133 the rhs being int, instead of the same type as the lhs,
3134 so make sure the scalar is the right type if we are
3135 dealing with vectors of long long/long/short/char. */
3136 if (dt
[1] == vect_constant_def
)
3137 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3138 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3142 && TYPE_MODE (TREE_TYPE (vectype
))
3143 != TYPE_MODE (TREE_TYPE (op1
)))
3145 if (vect_print_dump_info (REPORT_DETAILS
))
3146 fprintf (vect_dump
, "unusable type for last operand in"
3147 " vector/vector shift/rotate.");
3150 if (vec_stmt
&& !slp_node
)
3152 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3153 op1
= vect_init_vector (stmt
, op1
,
3154 TREE_TYPE (vectype
), NULL
);
3161 /* Supportable by target? */
3164 if (vect_print_dump_info (REPORT_DETAILS
))
3165 fprintf (vect_dump
, "no optab.");
3168 vec_mode
= TYPE_MODE (vectype
);
3169 icode
= (int) optab_handler (optab
, vec_mode
);
3170 if (icode
== CODE_FOR_nothing
)
3172 if (vect_print_dump_info (REPORT_DETAILS
))
3173 fprintf (vect_dump
, "op not supported by target.");
3174 /* Check only during analysis. */
3175 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3176 || (vf
< vect_min_worthwhile_factor (code
)
3179 if (vect_print_dump_info (REPORT_DETAILS
))
3180 fprintf (vect_dump
, "proceeding using word mode.");
3183 /* Worthwhile without SIMD support? Check only during analysis. */
3184 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3185 && vf
< vect_min_worthwhile_factor (code
)
3188 if (vect_print_dump_info (REPORT_DETAILS
))
3189 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3193 if (!vec_stmt
) /* transformation not required. */
3195 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3196 if (vect_print_dump_info (REPORT_DETAILS
))
3197 fprintf (vect_dump
, "=== vectorizable_shift ===");
3198 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3204 if (vect_print_dump_info (REPORT_DETAILS
))
3205 fprintf (vect_dump
, "transform binary/unary operation.");
3208 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3210 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3211 created in the previous stages of the recursion, so no allocation is
3212 needed, except for the case of shift with scalar shift argument. In that
3213 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3214 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3215 In case of loop-based vectorization we allocate VECs of size 1. We
3216 allocate VEC_OPRNDS1 only in case of binary operation. */
3219 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3220 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3222 else if (scalar_shift_arg
)
3223 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3225 prev_stmt_info
= NULL
;
3226 for (j
= 0; j
< ncopies
; j
++)
3231 if (scalar_shift_arg
)
3233 /* Vector shl and shr insn patterns can be defined with scalar
3234 operand 2 (shift operand). In this case, use constant or loop
3235 invariant op1 directly, without extending it to vector mode
3237 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3238 if (!VECTOR_MODE_P (optab_op2_mode
))
3240 if (vect_print_dump_info (REPORT_DETAILS
))
3241 fprintf (vect_dump
, "operand 1 using scalar mode.");
3243 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3246 /* Store vec_oprnd1 for every vector stmt to be created
3247 for SLP_NODE. We check during the analysis that all
3248 the shift arguments are the same.
3249 TODO: Allow different constants for different vector
3250 stmts generated for an SLP instance. */
3251 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3252 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3257 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3258 (a special case for certain kind of vector shifts); otherwise,
3259 operand 1 should be of a vector type (the usual case). */
3261 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3264 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3268 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3270 /* Arguments are ready. Create the new vector stmt. */
3271 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3273 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3274 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3275 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3276 gimple_assign_set_lhs (new_stmt
, new_temp
);
3277 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3279 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3286 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3288 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3289 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3292 VEC_free (tree
, heap
, vec_oprnds0
);
3293 VEC_free (tree
, heap
, vec_oprnds1
);
3299 /* Function vectorizable_operation.
3301 Check if STMT performs a binary, unary or ternary operation that can
3303 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3304 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3305 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3308 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3309 gimple
*vec_stmt
, slp_tree slp_node
)
3313 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3314 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3316 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3317 enum tree_code code
;
3318 enum machine_mode vec_mode
;
3325 enum vect_def_type dt
[3]
3326 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3327 gimple new_stmt
= NULL
;
3328 stmt_vec_info prev_stmt_info
;
3334 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3335 tree vop0
, vop1
, vop2
;
3336 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3339 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3342 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3345 /* Is STMT a vectorizable binary/unary operation? */
3346 if (!is_gimple_assign (stmt
))
3349 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3352 code
= gimple_assign_rhs_code (stmt
);
3354 /* For pointer addition, we should use the normal plus for
3355 the vector addition. */
3356 if (code
== POINTER_PLUS_EXPR
)
3359 /* Support only unary or binary operations. */
3360 op_type
= TREE_CODE_LENGTH (code
);
3361 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3363 if (vect_print_dump_info (REPORT_DETAILS
))
3364 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3369 scalar_dest
= gimple_assign_lhs (stmt
);
3370 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3372 /* Most operations cannot handle bit-precision types without extra
3374 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3375 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3376 /* Exception are bitwise binary operations. */
3377 && code
!= BIT_IOR_EXPR
3378 && code
!= BIT_XOR_EXPR
3379 && code
!= BIT_AND_EXPR
)
3381 if (vect_print_dump_info (REPORT_DETAILS
))
3382 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3386 op0
= gimple_assign_rhs1 (stmt
);
3387 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3388 &def_stmt
, &def
, &dt
[0], &vectype
))
3390 if (vect_print_dump_info (REPORT_DETAILS
))
3391 fprintf (vect_dump
, "use not simple.");
3394 /* If op0 is an external or constant def use a vector type with
3395 the same size as the output vector type. */
3397 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3399 gcc_assert (vectype
);
3402 if (vect_print_dump_info (REPORT_DETAILS
))
3404 fprintf (vect_dump
, "no vectype for scalar type ");
3405 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3411 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3412 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3413 if (nunits_out
!= nunits_in
)
3416 if (op_type
== binary_op
|| op_type
== ternary_op
)
3418 op1
= gimple_assign_rhs2 (stmt
);
3419 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3422 if (vect_print_dump_info (REPORT_DETAILS
))
3423 fprintf (vect_dump
, "use not simple.");
3427 if (op_type
== ternary_op
)
3429 op2
= gimple_assign_rhs3 (stmt
);
3430 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3433 if (vect_print_dump_info (REPORT_DETAILS
))
3434 fprintf (vect_dump
, "use not simple.");
3440 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3444 /* Multiple types in SLP are handled by creating the appropriate number of
3445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3447 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3450 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3452 gcc_assert (ncopies
>= 1);
3454 /* Shifts are handled in vectorizable_shift (). */
3455 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3456 || code
== RROTATE_EXPR
)
3459 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3461 /* Supportable by target? */
3464 if (vect_print_dump_info (REPORT_DETAILS
))
3465 fprintf (vect_dump
, "no optab.");
3468 vec_mode
= TYPE_MODE (vectype
);
3469 icode
= (int) optab_handler (optab
, vec_mode
);
3470 if (icode
== CODE_FOR_nothing
)
3472 if (vect_print_dump_info (REPORT_DETAILS
))
3473 fprintf (vect_dump
, "op not supported by target.");
3474 /* Check only during analysis. */
3475 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3476 || (vf
< vect_min_worthwhile_factor (code
)
3479 if (vect_print_dump_info (REPORT_DETAILS
))
3480 fprintf (vect_dump
, "proceeding using word mode.");
3483 /* Worthwhile without SIMD support? Check only during analysis. */
3484 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3485 && vf
< vect_min_worthwhile_factor (code
)
3488 if (vect_print_dump_info (REPORT_DETAILS
))
3489 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3493 if (!vec_stmt
) /* transformation not required. */
3495 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3496 if (vect_print_dump_info (REPORT_DETAILS
))
3497 fprintf (vect_dump
, "=== vectorizable_operation ===");
3498 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3504 if (vect_print_dump_info (REPORT_DETAILS
))
3505 fprintf (vect_dump
, "transform binary/unary operation.");
3508 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3510 /* In case the vectorization factor (VF) is bigger than the number
3511 of elements that we can fit in a vectype (nunits), we have to generate
3512 more than one vector stmt - i.e - we need to "unroll" the
3513 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3514 from one copy of the vector stmt to the next, in the field
3515 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3516 stages to find the correct vector defs to be used when vectorizing
3517 stmts that use the defs of the current stmt. The example below
3518 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3519 we need to create 4 vectorized stmts):
3521 before vectorization:
3522 RELATED_STMT VEC_STMT
3526 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3528 RELATED_STMT VEC_STMT
3529 VS1_0: vx0 = memref0 VS1_1 -
3530 VS1_1: vx1 = memref1 VS1_2 -
3531 VS1_2: vx2 = memref2 VS1_3 -
3532 VS1_3: vx3 = memref3 - -
3533 S1: x = load - VS1_0
3536 step2: vectorize stmt S2 (done here):
3537 To vectorize stmt S2 we first need to find the relevant vector
3538 def for the first operand 'x'. This is, as usual, obtained from
3539 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3540 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3541 relevant vector def 'vx0'. Having found 'vx0' we can generate
3542 the vector stmt VS2_0, and as usual, record it in the
3543 STMT_VINFO_VEC_STMT of stmt S2.
3544 When creating the second copy (VS2_1), we obtain the relevant vector
3545 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3546 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3547 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3548 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3549 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3550 chain of stmts and pointers:
3551 RELATED_STMT VEC_STMT
3552 VS1_0: vx0 = memref0 VS1_1 -
3553 VS1_1: vx1 = memref1 VS1_2 -
3554 VS1_2: vx2 = memref2 VS1_3 -
3555 VS1_3: vx3 = memref3 - -
3556 S1: x = load - VS1_0
3557 VS2_0: vz0 = vx0 + v1 VS2_1 -
3558 VS2_1: vz1 = vx1 + v1 VS2_2 -
3559 VS2_2: vz2 = vx2 + v1 VS2_3 -
3560 VS2_3: vz3 = vx3 + v1 - -
3561 S2: z = x + 1 - VS2_0 */
3563 prev_stmt_info
= NULL
;
3564 for (j
= 0; j
< ncopies
; j
++)
3569 if (op_type
== binary_op
|| op_type
== ternary_op
)
3570 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3573 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3575 if (op_type
== ternary_op
)
3577 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3578 VEC_quick_push (tree
, vec_oprnds2
,
3579 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3584 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3585 if (op_type
== ternary_op
)
3587 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3588 VEC_quick_push (tree
, vec_oprnds2
,
3589 vect_get_vec_def_for_stmt_copy (dt
[2],
3594 /* Arguments are ready. Create the new vector stmt. */
3595 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3597 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3598 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3599 vop2
= ((op_type
== ternary_op
)
3600 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3601 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3603 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3604 gimple_assign_set_lhs (new_stmt
, new_temp
);
3605 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3607 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3614 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3616 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3617 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3620 VEC_free (tree
, heap
, vec_oprnds0
);
3622 VEC_free (tree
, heap
, vec_oprnds1
);
3624 VEC_free (tree
, heap
, vec_oprnds2
);
3630 /* Function vectorizable_store.
3632 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3635 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3636 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3639 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3645 tree vec_oprnd
= NULL_TREE
;
3646 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3647 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3648 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3650 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3651 struct loop
*loop
= NULL
;
3652 enum machine_mode vec_mode
;
3654 enum dr_alignment_support alignment_support_scheme
;
3657 enum vect_def_type dt
;
3658 stmt_vec_info prev_stmt_info
= NULL
;
3659 tree dataref_ptr
= NULL_TREE
;
3660 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3663 gimple next_stmt
, first_stmt
= NULL
;
3664 bool strided_store
= false;
3665 bool store_lanes_p
= false;
3666 unsigned int group_size
, i
;
3667 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3669 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3670 bool slp
= (slp_node
!= NULL
);
3671 unsigned int vec_num
;
3672 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3676 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3678 /* Multiple types in SLP are handled by creating the appropriate number of
3679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3681 if (slp
|| PURE_SLP_STMT (stmt_info
))
3684 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3686 gcc_assert (ncopies
>= 1);
3688 /* FORNOW. This restriction should be relaxed. */
3689 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3691 if (vect_print_dump_info (REPORT_DETAILS
))
3692 fprintf (vect_dump
, "multiple types in nested loop.");
3696 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3699 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3702 /* Is vectorizable store? */
3704 if (!is_gimple_assign (stmt
))
3707 scalar_dest
= gimple_assign_lhs (stmt
);
3708 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3709 && is_pattern_stmt_p (stmt_info
))
3710 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3711 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3712 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3713 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3714 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3715 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3716 && TREE_CODE (scalar_dest
) != MEM_REF
)
3719 gcc_assert (gimple_assign_single_p (stmt
));
3720 op
= gimple_assign_rhs1 (stmt
);
3721 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3724 if (vect_print_dump_info (REPORT_DETAILS
))
3725 fprintf (vect_dump
, "use not simple.");
3729 elem_type
= TREE_TYPE (vectype
);
3730 vec_mode
= TYPE_MODE (vectype
);
3732 /* FORNOW. In some cases can vectorize even if data-type not supported
3733 (e.g. - array initialization with 0). */
3734 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3737 if (!STMT_VINFO_DATA_REF (stmt_info
))
3740 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3741 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3742 size_zero_node
) < 0)
3744 if (vect_print_dump_info (REPORT_DETAILS
))
3745 fprintf (vect_dump
, "negative step for store.");
3749 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
3751 strided_store
= true;
3752 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3753 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3755 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3756 if (vect_store_lanes_supported (vectype
, group_size
))
3757 store_lanes_p
= true;
3758 else if (!vect_strided_store_supported (vectype
, group_size
))
3762 if (first_stmt
== stmt
)
3764 /* STMT is the leader of the group. Check the operands of all the
3765 stmts of the group. */
3766 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3769 gcc_assert (gimple_assign_single_p (next_stmt
));
3770 op
= gimple_assign_rhs1 (next_stmt
);
3771 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3772 &def_stmt
, &def
, &dt
))
3774 if (vect_print_dump_info (REPORT_DETAILS
))
3775 fprintf (vect_dump
, "use not simple.");
3778 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3783 if (!vec_stmt
) /* transformation not required. */
3785 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3786 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
, NULL
);
3794 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3795 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3797 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3800 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3802 /* We vectorize all the stmts of the interleaving group when we
3803 reach the last stmt in the group. */
3804 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3805 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3814 strided_store
= false;
3815 /* VEC_NUM is the number of vect stmts to be created for this
3817 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3818 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3819 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3820 op
= gimple_assign_rhs1 (first_stmt
);
3823 /* VEC_NUM is the number of vect stmts to be created for this
3825 vec_num
= group_size
;
3831 group_size
= vec_num
= 1;
3834 if (vect_print_dump_info (REPORT_DETAILS
))
3835 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3837 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3838 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3840 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3841 gcc_assert (alignment_support_scheme
);
3842 /* Targets with store-lane instructions must not require explicit
3844 gcc_assert (!store_lanes_p
3845 || alignment_support_scheme
== dr_aligned
3846 || alignment_support_scheme
== dr_unaligned_supported
);
3849 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3851 aggr_type
= vectype
;
3853 /* In case the vectorization factor (VF) is bigger than the number
3854 of elements that we can fit in a vectype (nunits), we have to generate
3855 more than one vector stmt - i.e - we need to "unroll" the
3856 vector stmt by a factor VF/nunits. For more details see documentation in
3857 vect_get_vec_def_for_copy_stmt. */
3859 /* In case of interleaving (non-unit strided access):
3866 We create vectorized stores starting from base address (the access of the
3867 first stmt in the chain (S2 in the above example), when the last store stmt
3868 of the chain (S4) is reached:
3871 VS2: &base + vec_size*1 = vx0
3872 VS3: &base + vec_size*2 = vx1
3873 VS4: &base + vec_size*3 = vx3
3875 Then permutation statements are generated:
3877 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3878 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3881 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3882 (the order of the data-refs in the output of vect_permute_store_chain
3883 corresponds to the order of scalar stmts in the interleaving chain - see
3884 the documentation of vect_permute_store_chain()).
3886 In case of both multiple types and interleaving, above vector stores and
3887 permutation stmts are created for every copy. The result vector stmts are
3888 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3889 STMT_VINFO_RELATED_STMT for the next copies.
3892 prev_stmt_info
= NULL
;
3893 for (j
= 0; j
< ncopies
; j
++)
3902 /* Get vectorized arguments for SLP_NODE. */
3903 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
3904 NULL
, slp_node
, -1);
3906 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
3910 /* For interleaved stores we collect vectorized defs for all the
3911 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3912 used as an input to vect_permute_store_chain(), and OPRNDS as
3913 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3915 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3916 OPRNDS are of size 1. */
3917 next_stmt
= first_stmt
;
3918 for (i
= 0; i
< group_size
; i
++)
3920 /* Since gaps are not supported for interleaved stores,
3921 GROUP_SIZE is the exact number of stmts in the chain.
3922 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3923 there is no interleaving, GROUP_SIZE is 1, and only one
3924 iteration of the loop will be executed. */
3925 gcc_assert (next_stmt
3926 && gimple_assign_single_p (next_stmt
));
3927 op
= gimple_assign_rhs1 (next_stmt
);
3929 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
3931 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
3932 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
3933 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3937 /* We should have catched mismatched types earlier. */
3938 gcc_assert (useless_type_conversion_p (vectype
,
3939 TREE_TYPE (vec_oprnd
)));
3940 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
3941 NULL_TREE
, &dummy
, gsi
,
3942 &ptr_incr
, false, &inv_p
);
3943 gcc_assert (bb_vinfo
|| !inv_p
);
3947 /* For interleaved stores we created vectorized defs for all the
3948 defs stored in OPRNDS in the previous iteration (previous copy).
3949 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3950 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3952 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3953 OPRNDS are of size 1. */
3954 for (i
= 0; i
< group_size
; i
++)
3956 op
= VEC_index (tree
, oprnds
, i
);
3957 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3959 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
3960 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
3961 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
3963 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
3964 TYPE_SIZE_UNIT (aggr_type
));
3971 /* Combine all the vectors into an array. */
3972 vec_array
= create_vector_array (vectype
, vec_num
);
3973 for (i
= 0; i
< vec_num
; i
++)
3975 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
3976 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
3980 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3981 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
3982 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
3983 gimple_call_set_lhs (new_stmt
, data_ref
);
3984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3985 mark_symbols_for_renaming (new_stmt
);
3992 result_chain
= VEC_alloc (tree
, heap
, group_size
);
3994 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
3998 next_stmt
= first_stmt
;
3999 for (i
= 0; i
< vec_num
; i
++)
4001 struct ptr_info_def
*pi
;
4004 /* Bump the vector pointer. */
4005 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4009 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4010 else if (strided_store
)
4011 /* For strided stores vectorized defs are interleaved in
4012 vect_permute_store_chain(). */
4013 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4015 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4016 build_int_cst (reference_alias_ptr_type
4017 (DR_REF (first_dr
)), 0));
4018 pi
= get_ptr_info (dataref_ptr
);
4019 pi
->align
= TYPE_ALIGN_UNIT (vectype
);
4020 if (aligned_access_p (first_dr
))
4022 else if (DR_MISALIGNMENT (first_dr
) == -1)
4024 TREE_TYPE (data_ref
)
4025 = build_aligned_type (TREE_TYPE (data_ref
),
4026 TYPE_ALIGN (elem_type
));
4027 pi
->align
= TYPE_ALIGN_UNIT (elem_type
);
4032 TREE_TYPE (data_ref
)
4033 = build_aligned_type (TREE_TYPE (data_ref
),
4034 TYPE_ALIGN (elem_type
));
4035 pi
->misalign
= DR_MISALIGNMENT (first_dr
);
4038 /* Arguments are ready. Create the new vector stmt. */
4039 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4040 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4041 mark_symbols_for_renaming (new_stmt
);
4046 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4054 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4056 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4057 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4061 VEC_free (tree
, heap
, dr_chain
);
4062 VEC_free (tree
, heap
, oprnds
);
4064 VEC_free (tree
, heap
, result_chain
);
4066 VEC_free (tree
, heap
, vec_oprnds
);
4071 /* Given a vector type VECTYPE and permutation SEL returns
4072 the VECTOR_CST mask that implements the permutation of the
4073 vector elements. If that is impossible to do, returns NULL. */
4076 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4078 tree mask_elt_type
, mask_type
, mask_vec
;
4081 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4083 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4087 = lang_hooks
.types
.type_for_size
4088 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype
))), 1);
4089 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4092 for (i
= nunits
- 1; i
>= 0; i
--)
4093 mask_vec
= tree_cons (NULL
, build_int_cst (mask_elt_type
, sel
[i
]),
4095 mask_vec
= build_vector (mask_type
, mask_vec
);
4100 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4101 reversal of the vector elements. If that is impossible to do,
4105 perm_mask_for_reverse (tree vectype
)
4110 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4111 sel
= XALLOCAVEC (unsigned char, nunits
);
4113 for (i
= 0; i
< nunits
; ++i
)
4114 sel
[i
] = nunits
- 1 - i
;
4116 return vect_gen_perm_mask (vectype
, sel
);
4119 /* Given a vector variable X and Y, that was generated for the scalar
4120 STMT, generate instructions to permute the vector elements of X and Y
4121 using permutation mask MASK_VEC, insert them at *GSI and return the
4122 permuted vector variable. */
4125 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4126 gimple_stmt_iterator
*gsi
)
4128 tree vectype
= TREE_TYPE (x
);
4129 tree perm_dest
, data_ref
;
4132 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4133 data_ref
= make_ssa_name (perm_dest
, NULL
);
4135 /* Generate the permute statement. */
4136 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4138 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4143 /* vectorizable_load.
4145 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4147 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4148 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4149 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4152 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4153 slp_tree slp_node
, slp_instance slp_node_instance
)
4156 tree vec_dest
= NULL
;
4157 tree data_ref
= NULL
;
4158 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4159 stmt_vec_info prev_stmt_info
;
4160 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4161 struct loop
*loop
= NULL
;
4162 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4163 bool nested_in_vect_loop
= false;
4164 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4165 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4168 enum machine_mode mode
;
4169 gimple new_stmt
= NULL
;
4171 enum dr_alignment_support alignment_support_scheme
;
4172 tree dataref_ptr
= NULL_TREE
;
4174 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4176 int i
, j
, group_size
;
4177 tree msq
= NULL_TREE
, lsq
;
4178 tree offset
= NULL_TREE
;
4179 tree realignment_token
= NULL_TREE
;
4181 VEC(tree
,heap
) *dr_chain
= NULL
;
4182 bool strided_load
= false;
4183 bool load_lanes_p
= false;
4187 bool compute_in_loop
= false;
4188 struct loop
*at_loop
;
4190 bool slp
= (slp_node
!= NULL
);
4191 bool slp_perm
= false;
4192 enum tree_code code
;
4193 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4196 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4197 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4198 int gather_scale
= 1;
4199 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4203 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4204 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4205 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4210 /* Multiple types in SLP are handled by creating the appropriate number of
4211 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4213 if (slp
|| PURE_SLP_STMT (stmt_info
))
4216 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4218 gcc_assert (ncopies
>= 1);
4220 /* FORNOW. This restriction should be relaxed. */
4221 if (nested_in_vect_loop
&& ncopies
> 1)
4223 if (vect_print_dump_info (REPORT_DETAILS
))
4224 fprintf (vect_dump
, "multiple types in nested loop.");
4228 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4231 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4234 /* Is vectorizable load? */
4235 if (!is_gimple_assign (stmt
))
4238 scalar_dest
= gimple_assign_lhs (stmt
);
4239 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4242 code
= gimple_assign_rhs_code (stmt
);
4243 if (code
!= ARRAY_REF
4244 && code
!= INDIRECT_REF
4245 && code
!= COMPONENT_REF
4246 && code
!= IMAGPART_EXPR
4247 && code
!= REALPART_EXPR
4249 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4252 if (!STMT_VINFO_DATA_REF (stmt_info
))
4255 negative
= tree_int_cst_compare (nested_in_vect_loop
4256 ? STMT_VINFO_DR_STEP (stmt_info
)
4258 size_zero_node
) < 0;
4259 if (negative
&& ncopies
> 1)
4261 if (vect_print_dump_info (REPORT_DETAILS
))
4262 fprintf (vect_dump
, "multiple types with negative step.");
4266 elem_type
= TREE_TYPE (vectype
);
4267 mode
= TYPE_MODE (vectype
);
4269 /* FORNOW. In some cases can vectorize even if data-type not supported
4270 (e.g. - data copies). */
4271 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4273 if (vect_print_dump_info (REPORT_DETAILS
))
4274 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4278 /* Check if the load is a part of an interleaving chain. */
4279 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
4281 strided_load
= true;
4283 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4285 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4286 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4288 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4289 if (vect_load_lanes_supported (vectype
, group_size
))
4290 load_lanes_p
= true;
4291 else if (!vect_strided_load_supported (vectype
, group_size
))
4298 gcc_assert (!strided_load
&& !STMT_VINFO_GATHER_P (stmt_info
));
4299 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4300 if (alignment_support_scheme
!= dr_aligned
4301 && alignment_support_scheme
!= dr_unaligned_supported
)
4303 if (vect_print_dump_info (REPORT_DETAILS
))
4304 fprintf (vect_dump
, "negative step but alignment required.");
4307 if (!perm_mask_for_reverse (vectype
))
4309 if (vect_print_dump_info (REPORT_DETAILS
))
4310 fprintf (vect_dump
, "negative step and reversing not supported.");
4315 if (STMT_VINFO_GATHER_P (stmt_info
))
4319 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4320 &gather_off
, &gather_scale
);
4321 gcc_assert (gather_decl
);
4322 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4323 &def_stmt
, &def
, &gather_dt
,
4324 &gather_off_vectype
))
4326 if (vect_print_dump_info (REPORT_DETAILS
))
4327 fprintf (vect_dump
, "gather index use not simple.");
4332 if (!vec_stmt
) /* transformation not required. */
4334 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4335 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
);
4339 if (vect_print_dump_info (REPORT_DETAILS
))
4340 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4344 if (STMT_VINFO_GATHER_P (stmt_info
))
4346 tree vec_oprnd0
= NULL_TREE
, op
;
4347 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4348 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4349 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4350 edge pe
= loop_preheader_edge (loop
);
4353 enum { NARROW
, NONE
, WIDEN
} modifier
;
4354 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4356 if (nunits
== gather_off_nunits
)
4358 else if (nunits
== gather_off_nunits
/ 2)
4360 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4363 for (i
= 0; i
< gather_off_nunits
; ++i
)
4364 sel
[i
] = i
| nunits
;
4366 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4367 gcc_assert (perm_mask
!= NULL_TREE
);
4369 else if (nunits
== gather_off_nunits
* 2)
4371 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4374 for (i
= 0; i
< nunits
; ++i
)
4375 sel
[i
] = i
< gather_off_nunits
4376 ? i
: i
+ nunits
- gather_off_nunits
;
4378 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4379 gcc_assert (perm_mask
!= NULL_TREE
);
4385 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4386 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4387 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4388 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4389 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4390 scaletype
= TREE_VALUE (arglist
);
4391 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4392 && types_compatible_p (srctype
, masktype
));
4394 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4396 ptr
= fold_convert (ptrtype
, gather_base
);
4397 if (!is_gimple_min_invariant (ptr
))
4399 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4400 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4401 gcc_assert (!new_bb
);
4404 /* Currently we support only unconditional gather loads,
4405 so mask should be all ones. */
4406 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4407 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4408 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4412 for (j
= 0; j
< 6; ++j
)
4414 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4415 mask
= build_real (TREE_TYPE (masktype
), r
);
4419 mask
= build_vector_from_val (masktype
, mask
);
4420 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4422 scale
= build_int_cst (scaletype
, gather_scale
);
4424 prev_stmt_info
= NULL
;
4425 for (j
= 0; j
< ncopies
; ++j
)
4427 if (modifier
== WIDEN
&& (j
& 1))
4428 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4429 perm_mask
, stmt
, gsi
);
4432 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4435 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4437 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4439 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4440 == TYPE_VECTOR_SUBPARTS (idxtype
));
4441 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4442 add_referenced_var (var
);
4443 var
= make_ssa_name (var
, NULL
);
4444 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4446 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4448 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4453 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4455 if (!useless_type_conversion_p (vectype
, rettype
))
4457 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4458 == TYPE_VECTOR_SUBPARTS (rettype
));
4459 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4460 add_referenced_var (var
);
4461 op
= make_ssa_name (var
, new_stmt
);
4462 gimple_call_set_lhs (new_stmt
, op
);
4463 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4464 var
= make_ssa_name (vec_dest
, NULL
);
4465 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4467 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4472 var
= make_ssa_name (vec_dest
, new_stmt
);
4473 gimple_call_set_lhs (new_stmt
, var
);
4476 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4478 if (modifier
== NARROW
)
4485 var
= permute_vec_elements (prev_res
, var
,
4486 perm_mask
, stmt
, gsi
);
4487 new_stmt
= SSA_NAME_DEF_STMT (var
);
4490 if (prev_stmt_info
== NULL
)
4491 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4493 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4494 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4501 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4503 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4504 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4505 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4507 /* Check if the chain of loads is already vectorized. */
4508 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4510 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4513 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4514 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4516 /* VEC_NUM is the number of vect stmts to be created for this group. */
4519 strided_load
= false;
4520 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4521 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4525 vec_num
= group_size
;
4531 group_size
= vec_num
= 1;
4534 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4535 gcc_assert (alignment_support_scheme
);
4536 /* Targets with load-lane instructions must not require explicit
4538 gcc_assert (!load_lanes_p
4539 || alignment_support_scheme
== dr_aligned
4540 || alignment_support_scheme
== dr_unaligned_supported
);
4542 /* In case the vectorization factor (VF) is bigger than the number
4543 of elements that we can fit in a vectype (nunits), we have to generate
4544 more than one vector stmt - i.e - we need to "unroll" the
4545 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4546 from one copy of the vector stmt to the next, in the field
4547 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4548 stages to find the correct vector defs to be used when vectorizing
4549 stmts that use the defs of the current stmt. The example below
4550 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4551 need to create 4 vectorized stmts):
4553 before vectorization:
4554 RELATED_STMT VEC_STMT
4558 step 1: vectorize stmt S1:
4559 We first create the vector stmt VS1_0, and, as usual, record a
4560 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4561 Next, we create the vector stmt VS1_1, and record a pointer to
4562 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4563 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4565 RELATED_STMT VEC_STMT
4566 VS1_0: vx0 = memref0 VS1_1 -
4567 VS1_1: vx1 = memref1 VS1_2 -
4568 VS1_2: vx2 = memref2 VS1_3 -
4569 VS1_3: vx3 = memref3 - -
4570 S1: x = load - VS1_0
4573 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4574 information we recorded in RELATED_STMT field is used to vectorize
4577 /* In case of interleaving (non-unit strided access):
4584 Vectorized loads are created in the order of memory accesses
4585 starting from the access of the first stmt of the chain:
4588 VS2: vx1 = &base + vec_size*1
4589 VS3: vx3 = &base + vec_size*2
4590 VS4: vx4 = &base + vec_size*3
4592 Then permutation statements are generated:
4594 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4595 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4598 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4599 (the order of the data-refs in the output of vect_permute_load_chain
4600 corresponds to the order of scalar stmts in the interleaving chain - see
4601 the documentation of vect_permute_load_chain()).
4602 The generation of permutation stmts and recording them in
4603 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4605 In case of both multiple types and interleaving, the vector loads and
4606 permutation stmts above are created for every copy. The result vector
4607 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4608 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4610 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4611 on a target that supports unaligned accesses (dr_unaligned_supported)
4612 we generate the following code:
4616 p = p + indx * vectype_size;
4621 Otherwise, the data reference is potentially unaligned on a target that
4622 does not support unaligned accesses (dr_explicit_realign_optimized) -
4623 then generate the following code, in which the data in each iteration is
4624 obtained by two vector loads, one from the previous iteration, and one
4625 from the current iteration:
4627 msq_init = *(floor(p1))
4628 p2 = initial_addr + VS - 1;
4629 realignment_token = call target_builtin;
4632 p2 = p2 + indx * vectype_size
4634 vec_dest = realign_load (msq, lsq, realignment_token)
4639 /* If the misalignment remains the same throughout the execution of the
4640 loop, we can create the init_addr and permutation mask at the loop
4641 preheader. Otherwise, it needs to be created inside the loop.
4642 This can only occur when vectorizing memory accesses in the inner-loop
4643 nested within an outer-loop that is being vectorized. */
4645 if (nested_in_vect_loop
4646 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4647 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4649 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4650 compute_in_loop
= true;
4653 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4654 || alignment_support_scheme
== dr_explicit_realign
)
4655 && !compute_in_loop
)
4657 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4658 alignment_support_scheme
, NULL_TREE
,
4660 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4662 phi
= SSA_NAME_DEF_STMT (msq
);
4663 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4670 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4673 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4675 aggr_type
= vectype
;
4677 prev_stmt_info
= NULL
;
4678 for (j
= 0; j
< ncopies
; j
++)
4680 /* 1. Create the vector or array pointer update chain. */
4682 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4683 offset
, &dummy
, gsi
,
4684 &ptr_incr
, false, &inv_p
);
4686 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4687 TYPE_SIZE_UNIT (aggr_type
));
4689 if (strided_load
|| slp_perm
)
4690 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4696 vec_array
= create_vector_array (vectype
, vec_num
);
4699 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4700 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4701 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4702 gimple_call_set_lhs (new_stmt
, vec_array
);
4703 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4704 mark_symbols_for_renaming (new_stmt
);
4706 /* Extract each vector into an SSA_NAME. */
4707 for (i
= 0; i
< vec_num
; i
++)
4709 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4711 VEC_quick_push (tree
, dr_chain
, new_temp
);
4714 /* Record the mapping between SSA_NAMEs and statements. */
4715 vect_record_strided_load_vectors (stmt
, dr_chain
);
4719 for (i
= 0; i
< vec_num
; i
++)
4722 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4725 /* 2. Create the vector-load in the loop. */
4726 switch (alignment_support_scheme
)
4729 case dr_unaligned_supported
:
4731 struct ptr_info_def
*pi
;
4733 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4734 build_int_cst (reference_alias_ptr_type
4735 (DR_REF (first_dr
)), 0));
4736 pi
= get_ptr_info (dataref_ptr
);
4737 pi
->align
= TYPE_ALIGN_UNIT (vectype
);
4738 if (alignment_support_scheme
== dr_aligned
)
4740 gcc_assert (aligned_access_p (first_dr
));
4743 else if (DR_MISALIGNMENT (first_dr
) == -1)
4745 TREE_TYPE (data_ref
)
4746 = build_aligned_type (TREE_TYPE (data_ref
),
4747 TYPE_ALIGN (elem_type
));
4748 pi
->align
= TYPE_ALIGN_UNIT (elem_type
);
4753 TREE_TYPE (data_ref
)
4754 = build_aligned_type (TREE_TYPE (data_ref
),
4755 TYPE_ALIGN (elem_type
));
4756 pi
->misalign
= DR_MISALIGNMENT (first_dr
);
4760 case dr_explicit_realign
:
4765 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4767 if (compute_in_loop
)
4768 msq
= vect_setup_realignment (first_stmt
, gsi
,
4770 dr_explicit_realign
,
4773 new_stmt
= gimple_build_assign_with_ops
4774 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4776 (TREE_TYPE (dataref_ptr
),
4777 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4778 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4779 gimple_assign_set_lhs (new_stmt
, ptr
);
4780 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4782 = build2 (MEM_REF
, vectype
, ptr
,
4783 build_int_cst (reference_alias_ptr_type
4784 (DR_REF (first_dr
)), 0));
4785 vec_dest
= vect_create_destination_var (scalar_dest
,
4787 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4788 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4789 gimple_assign_set_lhs (new_stmt
, new_temp
);
4790 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4791 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4792 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4795 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4796 TYPE_SIZE_UNIT (elem_type
));
4797 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
4798 new_stmt
= gimple_build_assign_with_ops
4799 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
4802 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4803 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4804 gimple_assign_set_lhs (new_stmt
, ptr
);
4805 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4807 = build2 (MEM_REF
, vectype
, ptr
,
4808 build_int_cst (reference_alias_ptr_type
4809 (DR_REF (first_dr
)), 0));
4812 case dr_explicit_realign_optimized
:
4813 new_stmt
= gimple_build_assign_with_ops
4814 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4816 (TREE_TYPE (dataref_ptr
),
4817 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4818 new_temp
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
),
4820 gimple_assign_set_lhs (new_stmt
, new_temp
);
4821 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4823 = build2 (MEM_REF
, vectype
, new_temp
,
4824 build_int_cst (reference_alias_ptr_type
4825 (DR_REF (first_dr
)), 0));
4830 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4831 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4832 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4833 gimple_assign_set_lhs (new_stmt
, new_temp
);
4834 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4835 mark_symbols_for_renaming (new_stmt
);
4837 /* 3. Handle explicit realignment if necessary/supported.
4839 vec_dest = realign_load (msq, lsq, realignment_token) */
4840 if (alignment_support_scheme
== dr_explicit_realign_optimized
4841 || alignment_support_scheme
== dr_explicit_realign
)
4843 lsq
= gimple_assign_lhs (new_stmt
);
4844 if (!realignment_token
)
4845 realignment_token
= dataref_ptr
;
4846 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4848 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
4851 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4852 gimple_assign_set_lhs (new_stmt
, new_temp
);
4853 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4855 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4858 if (i
== vec_num
- 1 && j
== ncopies
- 1)
4859 add_phi_arg (phi
, lsq
,
4860 loop_latch_edge (containing_loop
),
4866 /* 4. Handle invariant-load. */
4867 if (inv_p
&& !bb_vinfo
)
4870 gimple_stmt_iterator gsi2
= *gsi
;
4871 gcc_assert (!strided_load
);
4874 if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4877 tem
= fold_convert (TREE_TYPE (vectype
), tem
);
4878 tem
= force_gimple_operand_gsi (&gsi2
, tem
, true,
4882 vec_inv
= build_vector_from_val (vectype
, tem
);
4883 new_temp
= vect_init_vector (stmt
, vec_inv
,
4885 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4890 tree perm_mask
= perm_mask_for_reverse (vectype
);
4891 new_temp
= permute_vec_elements (new_temp
, new_temp
,
4892 perm_mask
, stmt
, gsi
);
4893 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4896 /* Collect vector loads and later create their permutation in
4897 vect_transform_strided_load (). */
4898 if (strided_load
|| slp_perm
)
4899 VEC_quick_push (tree
, dr_chain
, new_temp
);
4901 /* Store vector loads in the corresponding SLP_NODE. */
4902 if (slp
&& !slp_perm
)
4903 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
4908 if (slp
&& !slp_perm
)
4913 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
4914 slp_node_instance
, false))
4916 VEC_free (tree
, heap
, dr_chain
);
4925 vect_transform_strided_load (stmt
, dr_chain
, group_size
, gsi
);
4926 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4931 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4934 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4938 VEC_free (tree
, heap
, dr_chain
);
4944 /* Function vect_is_simple_cond.
4947 LOOP - the loop that is being vectorized.
4948 COND - Condition that is checked for simple use.
4951 *COMP_VECTYPE - the vector type for the comparison.
4953 Returns whether a COND can be vectorized. Checks whether
4954 condition operands are supportable using vec_is_simple_use. */
4957 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
4958 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
4962 enum vect_def_type dt
;
4963 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
4965 if (!COMPARISON_CLASS_P (cond
))
4968 lhs
= TREE_OPERAND (cond
, 0);
4969 rhs
= TREE_OPERAND (cond
, 1);
4971 if (TREE_CODE (lhs
) == SSA_NAME
)
4973 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
4974 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
4975 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
4978 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
4979 && TREE_CODE (lhs
) != FIXED_CST
)
4982 if (TREE_CODE (rhs
) == SSA_NAME
)
4984 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
4985 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
4986 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
4989 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
4990 && TREE_CODE (rhs
) != FIXED_CST
)
4993 *comp_vectype
= vectype1
? vectype1
: vectype2
;
4997 /* vectorizable_condition.
4999 Check if STMT is conditional modify expression that can be vectorized.
5000 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5001 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5004 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5005 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5006 else caluse if it is 2).
5008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5011 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5012 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5015 tree scalar_dest
= NULL_TREE
;
5016 tree vec_dest
= NULL_TREE
;
5017 tree cond_expr
, then_clause
, else_clause
;
5018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5019 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5020 tree comp_vectype
= NULL_TREE
;
5021 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5022 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5023 tree vec_compare
, vec_cond_expr
;
5025 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5027 enum vect_def_type dt
, dts
[4];
5028 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5030 enum tree_code code
;
5031 stmt_vec_info prev_stmt_info
= NULL
;
5033 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5034 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5035 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5037 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5040 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5042 gcc_assert (ncopies
>= 1);
5043 if (reduc_index
&& ncopies
> 1)
5044 return false; /* FORNOW */
5046 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5049 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5052 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5053 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5057 /* FORNOW: not yet supported. */
5058 if (STMT_VINFO_LIVE_P (stmt_info
))
5060 if (vect_print_dump_info (REPORT_DETAILS
))
5061 fprintf (vect_dump
, "value used after loop.");
5065 /* Is vectorizable conditional operation? */
5066 if (!is_gimple_assign (stmt
))
5069 code
= gimple_assign_rhs_code (stmt
);
5071 if (code
!= COND_EXPR
)
5074 cond_expr
= gimple_assign_rhs1 (stmt
);
5075 then_clause
= gimple_assign_rhs2 (stmt
);
5076 else_clause
= gimple_assign_rhs3 (stmt
);
5078 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5083 if (TREE_CODE (then_clause
) == SSA_NAME
)
5085 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5086 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5087 &then_def_stmt
, &def
, &dt
))
5090 else if (TREE_CODE (then_clause
) != INTEGER_CST
5091 && TREE_CODE (then_clause
) != REAL_CST
5092 && TREE_CODE (then_clause
) != FIXED_CST
)
5095 if (TREE_CODE (else_clause
) == SSA_NAME
)
5097 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5098 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5099 &else_def_stmt
, &def
, &dt
))
5102 else if (TREE_CODE (else_clause
) != INTEGER_CST
5103 && TREE_CODE (else_clause
) != REAL_CST
5104 && TREE_CODE (else_clause
) != FIXED_CST
)
5109 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5110 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5117 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5118 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5119 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5120 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5124 scalar_dest
= gimple_assign_lhs (stmt
);
5125 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5127 /* Handle cond expr. */
5128 for (j
= 0; j
< ncopies
; j
++)
5130 gimple new_stmt
= NULL
;
5135 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5136 VEC (slp_void_p
, heap
) *vec_defs
;
5138 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5139 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5140 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5141 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5142 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5143 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5144 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5145 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5146 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5147 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5149 VEC_free (tree
, heap
, ops
);
5150 VEC_free (slp_void_p
, heap
, vec_defs
);
5156 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5158 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5159 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5162 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5164 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5165 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5166 if (reduc_index
== 1)
5167 vec_then_clause
= reduc_def
;
5170 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5172 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5173 NULL
, >emp
, &def
, &dts
[2]);
5175 if (reduc_index
== 2)
5176 vec_else_clause
= reduc_def
;
5179 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5181 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5182 NULL
, >emp
, &def
, &dts
[3]);
5188 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5189 VEC_pop (tree
, vec_oprnds0
));
5190 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5191 VEC_pop (tree
, vec_oprnds1
));
5192 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5193 VEC_pop (tree
, vec_oprnds2
));
5194 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5195 VEC_pop (tree
, vec_oprnds3
));
5200 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5201 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5202 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5203 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5206 /* Arguments are ready. Create the new vector stmt. */
5207 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5209 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5210 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5211 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5213 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5214 vec_cond_lhs
, vec_cond_rhs
);
5215 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5216 vec_compare
, vec_then_clause
, vec_else_clause
);
5218 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5219 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5220 gimple_assign_set_lhs (new_stmt
, new_temp
);
5221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5223 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5230 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5232 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5234 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5237 VEC_free (tree
, heap
, vec_oprnds0
);
5238 VEC_free (tree
, heap
, vec_oprnds1
);
5239 VEC_free (tree
, heap
, vec_oprnds2
);
5240 VEC_free (tree
, heap
, vec_oprnds3
);
5246 /* Make sure the statement is vectorizable. */
5249 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5251 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5253 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5255 tree scalar_type
, vectype
;
5256 gimple pattern_stmt
;
5257 gimple_seq pattern_def_seq
;
5259 if (vect_print_dump_info (REPORT_DETAILS
))
5261 fprintf (vect_dump
, "==> examining statement: ");
5262 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5265 if (gimple_has_volatile_ops (stmt
))
5267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5268 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5273 /* Skip stmts that do not need to be vectorized. In loops this is expected
5275 - the COND_EXPR which is the loop exit condition
5276 - any LABEL_EXPRs in the loop
5277 - computations that are used only for array indexing or loop control.
5278 In basic blocks we only analyze statements that are a part of some SLP
5279 instance, therefore, all the statements are relevant.
5281 Pattern statement needs to be analyzed instead of the original statement
5282 if the original statement is not relevant. Otherwise, we analyze both
5285 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5286 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5287 && !STMT_VINFO_LIVE_P (stmt_info
))
5289 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5291 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5292 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5294 /* Analyze PATTERN_STMT instead of the original stmt. */
5295 stmt
= pattern_stmt
;
5296 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5297 if (vect_print_dump_info (REPORT_DETAILS
))
5299 fprintf (vect_dump
, "==> examining pattern statement: ");
5300 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5305 if (vect_print_dump_info (REPORT_DETAILS
))
5306 fprintf (vect_dump
, "irrelevant.");
5311 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5313 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5314 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5316 /* Analyze PATTERN_STMT too. */
5317 if (vect_print_dump_info (REPORT_DETAILS
))
5319 fprintf (vect_dump
, "==> examining pattern statement: ");
5320 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5323 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5327 if (is_pattern_stmt_p (stmt_info
)
5328 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5330 gimple_stmt_iterator si
;
5332 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5334 gimple pattern_def_stmt
= gsi_stmt (si
);
5335 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5336 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5338 /* Analyze def stmt of STMT if it's a pattern stmt. */
5339 if (vect_print_dump_info (REPORT_DETAILS
))
5341 fprintf (vect_dump
, "==> examining pattern def statement: ");
5342 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5345 if (!vect_analyze_stmt (pattern_def_stmt
,
5346 need_to_vectorize
, node
))
5352 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5354 case vect_internal_def
:
5357 case vect_reduction_def
:
5358 case vect_nested_cycle
:
5359 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5360 || relevance
== vect_used_in_outer_by_reduction
5361 || relevance
== vect_unused_in_scope
));
5364 case vect_induction_def
:
5365 case vect_constant_def
:
5366 case vect_external_def
:
5367 case vect_unknown_def_type
:
5374 gcc_assert (PURE_SLP_STMT (stmt_info
));
5376 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5377 if (vect_print_dump_info (REPORT_DETAILS
))
5379 fprintf (vect_dump
, "get vectype for scalar type: ");
5380 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5383 vectype
= get_vectype_for_scalar_type (scalar_type
);
5386 if (vect_print_dump_info (REPORT_DETAILS
))
5388 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5389 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5394 if (vect_print_dump_info (REPORT_DETAILS
))
5396 fprintf (vect_dump
, "vectype: ");
5397 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5400 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5403 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5405 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5407 *need_to_vectorize
= true;
5412 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5413 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5414 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5415 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5416 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5417 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5418 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5419 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5420 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5421 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5422 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5426 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5427 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5428 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5429 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5430 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5431 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5432 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5433 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5438 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5440 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5441 fprintf (vect_dump
, "supported: ");
5442 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5452 need extra handling, except for vectorizable reductions. */
5453 if (STMT_VINFO_LIVE_P (stmt_info
)
5454 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5455 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5461 fprintf (vect_dump
, "not vectorized: live stmt not ");
5462 fprintf (vect_dump
, "supported: ");
5463 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5473 /* Function vect_transform_stmt.
5475 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5478 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5479 bool *strided_store
, slp_tree slp_node
,
5480 slp_instance slp_node_instance
)
5482 bool is_store
= false;
5483 gimple vec_stmt
= NULL
;
5484 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5487 switch (STMT_VINFO_TYPE (stmt_info
))
5489 case type_demotion_vec_info_type
:
5490 case type_promotion_vec_info_type
:
5491 case type_conversion_vec_info_type
:
5492 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5496 case induc_vec_info_type
:
5497 gcc_assert (!slp_node
);
5498 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5502 case shift_vec_info_type
:
5503 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5507 case op_vec_info_type
:
5508 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5512 case assignment_vec_info_type
:
5513 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5517 case load_vec_info_type
:
5518 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5523 case store_vec_info_type
:
5524 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5526 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
) && !slp_node
)
5528 /* In case of interleaving, the whole chain is vectorized when the
5529 last store in the chain is reached. Store stmts before the last
5530 one are skipped, and there vec_stmt_info shouldn't be freed
5532 *strided_store
= true;
5533 if (STMT_VINFO_VEC_STMT (stmt_info
))
5540 case condition_vec_info_type
:
5541 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5545 case call_vec_info_type
:
5546 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5547 stmt
= gsi_stmt (*gsi
);
5550 case reduc_vec_info_type
:
5551 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5556 if (!STMT_VINFO_LIVE_P (stmt_info
))
5558 if (vect_print_dump_info (REPORT_DETAILS
))
5559 fprintf (vect_dump
, "stmt not supported.");
5564 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5565 is being vectorized, but outside the immediately enclosing loop. */
5567 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5568 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5569 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5570 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5571 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5572 || STMT_VINFO_RELEVANT (stmt_info
) ==
5573 vect_used_in_outer_by_reduction
))
5575 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5576 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5577 imm_use_iterator imm_iter
;
5578 use_operand_p use_p
;
5582 if (vect_print_dump_info (REPORT_DETAILS
))
5583 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5585 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5586 (to be used when vectorizing outer-loop stmts that use the DEF of
5588 if (gimple_code (stmt
) == GIMPLE_PHI
)
5589 scalar_dest
= PHI_RESULT (stmt
);
5591 scalar_dest
= gimple_assign_lhs (stmt
);
5593 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5595 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5597 exit_phi
= USE_STMT (use_p
);
5598 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5603 /* Handle stmts whose DEF is used outside the loop-nest that is
5604 being vectorized. */
5605 if (STMT_VINFO_LIVE_P (stmt_info
)
5606 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5608 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5613 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5619 /* Remove a group of stores (for SLP or interleaving), free their
5623 vect_remove_stores (gimple first_stmt
)
5625 gimple next
= first_stmt
;
5627 gimple_stmt_iterator next_si
;
5631 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5633 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5634 if (is_pattern_stmt_p (stmt_info
))
5635 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5636 /* Free the attached stmt_vec_info and remove the stmt. */
5637 next_si
= gsi_for_stmt (next
);
5638 gsi_remove (&next_si
, true);
5639 free_stmt_vec_info (next
);
5645 /* Function new_stmt_vec_info.
5647 Create and initialize a new stmt_vec_info struct for STMT. */
5650 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5651 bb_vec_info bb_vinfo
)
5654 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5656 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5657 STMT_VINFO_STMT (res
) = stmt
;
5658 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5659 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5660 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5661 STMT_VINFO_LIVE_P (res
) = false;
5662 STMT_VINFO_VECTYPE (res
) = NULL
;
5663 STMT_VINFO_VEC_STMT (res
) = NULL
;
5664 STMT_VINFO_VECTORIZABLE (res
) = true;
5665 STMT_VINFO_IN_PATTERN_P (res
) = false;
5666 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5667 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5668 STMT_VINFO_DATA_REF (res
) = NULL
;
5670 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5671 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5672 STMT_VINFO_DR_INIT (res
) = NULL
;
5673 STMT_VINFO_DR_STEP (res
) = NULL
;
5674 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5676 if (gimple_code (stmt
) == GIMPLE_PHI
5677 && is_loop_header_bb_p (gimple_bb (stmt
)))
5678 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5680 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5682 STMT_VINFO_SAME_ALIGN_REFS (res
) = NULL
;
5683 STMT_VINFO_INSIDE_OF_LOOP_COST (res
) = 0;
5684 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
5685 STMT_SLP_TYPE (res
) = loop_vect
;
5686 GROUP_FIRST_ELEMENT (res
) = NULL
;
5687 GROUP_NEXT_ELEMENT (res
) = NULL
;
5688 GROUP_SIZE (res
) = 0;
5689 GROUP_STORE_COUNT (res
) = 0;
5690 GROUP_GAP (res
) = 0;
5691 GROUP_SAME_DR_STMT (res
) = NULL
;
5692 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5698 /* Create a hash table for stmt_vec_info. */
5701 init_stmt_vec_info_vec (void)
5703 gcc_assert (!stmt_vec_info_vec
);
5704 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5708 /* Free hash table for stmt_vec_info. */
5711 free_stmt_vec_info_vec (void)
5713 gcc_assert (stmt_vec_info_vec
);
5714 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5718 /* Free stmt vectorization related info. */
5721 free_stmt_vec_info (gimple stmt
)
5723 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5728 /* Check if this statement has a related "pattern stmt"
5729 (introduced by the vectorizer during the pattern recognition
5730 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5732 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5734 stmt_vec_info patt_info
5735 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5738 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5741 gimple_stmt_iterator si
;
5742 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5743 free_stmt_vec_info (gsi_stmt (si
));
5745 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5749 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5750 set_vinfo_for_stmt (stmt
, NULL
);
5755 /* Function get_vectype_for_scalar_type_and_size.
5757 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5761 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5763 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5764 enum machine_mode simd_mode
;
5765 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5772 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5773 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5776 /* For vector types of elements whose mode precision doesn't
5777 match their types precision we use a element type of mode
5778 precision. The vectorization routines will have to make sure
5779 they support the proper result truncation/extension.
5780 We also make sure to build vector types with INTEGER_TYPE
5781 component type only. */
5782 if (INTEGRAL_TYPE_P (scalar_type
)
5783 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5784 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5785 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5786 TYPE_UNSIGNED (scalar_type
));
5788 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5789 When the component mode passes the above test simply use a type
5790 corresponding to that mode. The theory is that any use that
5791 would cause problems with this will disable vectorization anyway. */
5792 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5793 && !INTEGRAL_TYPE_P (scalar_type
)
5794 && !POINTER_TYPE_P (scalar_type
))
5795 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5797 /* We can't build a vector type of elements with alignment bigger than
5799 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5800 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
5801 TYPE_UNSIGNED (scalar_type
));
5803 /* If we felt back to using the mode fail if there was
5804 no scalar type for it. */
5805 if (scalar_type
== NULL_TREE
)
5808 /* If no size was supplied use the mode the target prefers. Otherwise
5809 lookup a vector mode of the specified size. */
5811 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
5813 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
5814 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
5818 vectype
= build_vector_type (scalar_type
, nunits
);
5819 if (vect_print_dump_info (REPORT_DETAILS
))
5821 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
5822 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5828 if (vect_print_dump_info (REPORT_DETAILS
))
5830 fprintf (vect_dump
, "vectype: ");
5831 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5834 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5835 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
5837 if (vect_print_dump_info (REPORT_DETAILS
))
5838 fprintf (vect_dump
, "mode not supported by target.");
5845 unsigned int current_vector_size
;
5847 /* Function get_vectype_for_scalar_type.
5849 Returns the vector type corresponding to SCALAR_TYPE as supported
5853 get_vectype_for_scalar_type (tree scalar_type
)
5856 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
5857 current_vector_size
);
5859 && current_vector_size
== 0)
5860 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
5864 /* Function get_same_sized_vectype
5866 Returns a vector type corresponding to SCALAR_TYPE of size
5867 VECTOR_TYPE if supported by the target. */
5870 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
5872 return get_vectype_for_scalar_type_and_size
5873 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
5876 /* Function vect_is_simple_use.
5879 LOOP_VINFO - the vect info of the loop that is being vectorized.
5880 BB_VINFO - the vect info of the basic block that is being vectorized.
5881 OPERAND - operand of STMT in the loop or bb.
5882 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5884 Returns whether a stmt with OPERAND can be vectorized.
5885 For loops, supportable operands are constants, loop invariants, and operands
5886 that are defined by the current iteration of the loop. Unsupportable
5887 operands are those that are defined by a previous iteration of the loop (as
5888 is the case in reduction/induction computations).
5889 For basic blocks, supportable operands are constants and bb invariants.
5890 For now, operands defined outside the basic block are not supported. */
5893 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
5894 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
5895 tree
*def
, enum vect_def_type
*dt
)
5898 stmt_vec_info stmt_vinfo
;
5899 struct loop
*loop
= NULL
;
5902 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5907 if (vect_print_dump_info (REPORT_DETAILS
))
5909 fprintf (vect_dump
, "vect_is_simple_use: operand ");
5910 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
5913 if (TREE_CODE (operand
) == INTEGER_CST
|| TREE_CODE (operand
) == REAL_CST
)
5915 *dt
= vect_constant_def
;
5919 if (is_gimple_min_invariant (operand
))
5922 *dt
= vect_external_def
;
5926 if (TREE_CODE (operand
) == PAREN_EXPR
)
5928 if (vect_print_dump_info (REPORT_DETAILS
))
5929 fprintf (vect_dump
, "non-associatable copy.");
5930 operand
= TREE_OPERAND (operand
, 0);
5933 if (TREE_CODE (operand
) != SSA_NAME
)
5935 if (vect_print_dump_info (REPORT_DETAILS
))
5936 fprintf (vect_dump
, "not ssa-name.");
5940 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
5941 if (*def_stmt
== NULL
)
5943 if (vect_print_dump_info (REPORT_DETAILS
))
5944 fprintf (vect_dump
, "no def_stmt.");
5948 if (vect_print_dump_info (REPORT_DETAILS
))
5950 fprintf (vect_dump
, "def_stmt: ");
5951 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
5954 /* Empty stmt is expected only in case of a function argument.
5955 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5956 if (gimple_nop_p (*def_stmt
))
5959 *dt
= vect_external_def
;
5963 bb
= gimple_bb (*def_stmt
);
5965 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
5966 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
5967 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
5968 *dt
= vect_external_def
;
5971 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
5972 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
5975 if (*dt
== vect_unknown_def_type
5977 && *dt
== vect_double_reduction_def
5978 && gimple_code (stmt
) != GIMPLE_PHI
))
5980 if (vect_print_dump_info (REPORT_DETAILS
))
5981 fprintf (vect_dump
, "Unsupported pattern.");
5985 if (vect_print_dump_info (REPORT_DETAILS
))
5986 fprintf (vect_dump
, "type of def: %d.",*dt
);
5988 switch (gimple_code (*def_stmt
))
5991 *def
= gimple_phi_result (*def_stmt
);
5995 *def
= gimple_assign_lhs (*def_stmt
);
5999 *def
= gimple_call_lhs (*def_stmt
);
6004 if (vect_print_dump_info (REPORT_DETAILS
))
6005 fprintf (vect_dump
, "unsupported defining stmt: ");
6012 /* Function vect_is_simple_use_1.
6014 Same as vect_is_simple_use_1 but also determines the vector operand
6015 type of OPERAND and stores it to *VECTYPE. If the definition of
6016 OPERAND is vect_uninitialized_def, vect_constant_def or
6017 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6018 is responsible to compute the best suited vector type for the
6022 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6023 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6024 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6026 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6030 /* Now get a vector type if the def is internal, otherwise supply
6031 NULL_TREE and leave it up to the caller to figure out a proper
6032 type for the use stmt. */
6033 if (*dt
== vect_internal_def
6034 || *dt
== vect_induction_def
6035 || *dt
== vect_reduction_def
6036 || *dt
== vect_double_reduction_def
6037 || *dt
== vect_nested_cycle
)
6039 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6041 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6042 && !STMT_VINFO_RELEVANT (stmt_info
)
6043 && !STMT_VINFO_LIVE_P (stmt_info
))
6044 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6046 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6047 gcc_assert (*vectype
!= NULL_TREE
);
6049 else if (*dt
== vect_uninitialized_def
6050 || *dt
== vect_constant_def
6051 || *dt
== vect_external_def
)
6052 *vectype
= NULL_TREE
;
6060 /* Function supportable_widening_operation
6062 Check whether an operation represented by the code CODE is a
6063 widening operation that is supported by the target platform in
6064 vector form (i.e., when operating on arguments of type VECTYPE_IN
6065 producing a result of type VECTYPE_OUT).
6067 Widening operations we currently support are NOP (CONVERT), FLOAT
6068 and WIDEN_MULT. This function checks if these operations are supported
6069 by the target platform either directly (via vector tree-codes), or via
6073 - CODE1 and CODE2 are codes of vector operations to be used when
6074 vectorizing the operation, if available.
6075 - DECL1 and DECL2 are decls of target builtin functions to be used
6076 when vectorizing the operation, if available. In this case,
6077 CODE1 and CODE2 are CALL_EXPR.
6078 - MULTI_STEP_CVT determines the number of required intermediate steps in
6079 case of multi-step conversion (like char->short->int - in that case
6080 MULTI_STEP_CVT will be 1).
6081 - INTERM_TYPES contains the intermediate type required to perform the
6082 widening operation (short in the above example). */
6085 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6086 tree vectype_out
, tree vectype_in
,
6087 tree
*decl1
, tree
*decl2
,
6088 enum tree_code
*code1
, enum tree_code
*code2
,
6089 int *multi_step_cvt
,
6090 VEC (tree
, heap
) **interm_types
)
6092 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6093 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6094 struct loop
*vect_loop
= NULL
;
6096 enum machine_mode vec_mode
;
6097 enum insn_code icode1
, icode2
;
6098 optab optab1
, optab2
;
6099 tree vectype
= vectype_in
;
6100 tree wide_vectype
= vectype_out
;
6101 enum tree_code c1
, c2
;
6103 tree prev_type
, intermediate_type
;
6104 enum machine_mode intermediate_mode
, prev_mode
;
6105 optab optab3
, optab4
;
6107 *multi_step_cvt
= 0;
6109 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6111 /* The result of a vectorized widening operation usually requires two vectors
6112 (because the widened results do not fit into one vector). The generated
6113 vector results would normally be expected to be generated in the same
6114 order as in the original scalar computation, i.e. if 8 results are
6115 generated in each vector iteration, they are to be organized as follows:
6116 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6118 However, in the special case that the result of the widening operation is
6119 used in a reduction computation only, the order doesn't matter (because
6120 when vectorizing a reduction we change the order of the computation).
6121 Some targets can take advantage of this and generate more efficient code.
6122 For example, targets like Altivec, that support widen_mult using a sequence
6123 of {mult_even,mult_odd} generate the following vectors:
6124 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6126 When vectorizing outer-loops, we execute the inner-loop sequentially
6127 (each vectorized inner-loop iteration contributes to VF outer-loop
6128 iterations in parallel). We therefore don't allow to change the order
6129 of the computation in the inner-loop during outer-loop vectorization. */
6132 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6133 && !nested_in_vect_loop_p (vect_loop
, stmt
))
6139 && code
== WIDEN_MULT_EXPR
6140 && targetm
.vectorize
.builtin_mul_widen_even
6141 && targetm
.vectorize
.builtin_mul_widen_even (vectype
)
6142 && targetm
.vectorize
.builtin_mul_widen_odd
6143 && targetm
.vectorize
.builtin_mul_widen_odd (vectype
))
6145 if (vect_print_dump_info (REPORT_DETAILS
))
6146 fprintf (vect_dump
, "Unordered widening operation detected.");
6148 *code1
= *code2
= CALL_EXPR
;
6149 *decl1
= targetm
.vectorize
.builtin_mul_widen_even (vectype
);
6150 *decl2
= targetm
.vectorize
.builtin_mul_widen_odd (vectype
);
6156 case WIDEN_MULT_EXPR
:
6157 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6158 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6161 case WIDEN_LSHIFT_EXPR
:
6162 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6163 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6167 c1
= VEC_UNPACK_LO_EXPR
;
6168 c2
= VEC_UNPACK_HI_EXPR
;
6172 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6173 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6176 case FIX_TRUNC_EXPR
:
6177 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6178 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6179 computing the operation. */
6186 if (BYTES_BIG_ENDIAN
)
6188 enum tree_code ctmp
= c1
;
6193 if (code
== FIX_TRUNC_EXPR
)
6195 /* The signedness is determined from output operand. */
6196 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6197 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6201 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6202 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6205 if (!optab1
|| !optab2
)
6208 vec_mode
= TYPE_MODE (vectype
);
6209 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6210 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6216 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6217 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6220 /* Check if it's a multi-step conversion that can be done using intermediate
6223 prev_type
= vectype
;
6224 prev_mode
= vec_mode
;
6226 if (!CONVERT_EXPR_CODE_P (code
))
6229 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6230 intermediate steps in promotion sequence. We try
6231 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6233 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6234 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6236 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6238 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6239 TYPE_UNSIGNED (prev_type
));
6240 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6241 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6243 if (!optab3
|| !optab4
6244 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6245 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6246 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6247 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6248 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6249 == CODE_FOR_nothing
)
6250 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6251 == CODE_FOR_nothing
))
6254 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6255 (*multi_step_cvt
)++;
6257 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6258 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6261 prev_type
= intermediate_type
;
6262 prev_mode
= intermediate_mode
;
6265 VEC_free (tree
, heap
, *interm_types
);
6270 /* Function supportable_narrowing_operation
6272 Check whether an operation represented by the code CODE is a
6273 narrowing operation that is supported by the target platform in
6274 vector form (i.e., when operating on arguments of type VECTYPE_IN
6275 and producing a result of type VECTYPE_OUT).
6277 Narrowing operations we currently support are NOP (CONVERT) and
6278 FIX_TRUNC. This function checks if these operations are supported by
6279 the target platform directly via vector tree-codes.
6282 - CODE1 is the code of a vector operation to be used when
6283 vectorizing the operation, if available.
6284 - MULTI_STEP_CVT determines the number of required intermediate steps in
6285 case of multi-step conversion (like int->short->char - in that case
6286 MULTI_STEP_CVT will be 1).
6287 - INTERM_TYPES contains the intermediate type required to perform the
6288 narrowing operation (short in the above example). */
6291 supportable_narrowing_operation (enum tree_code code
,
6292 tree vectype_out
, tree vectype_in
,
6293 enum tree_code
*code1
, int *multi_step_cvt
,
6294 VEC (tree
, heap
) **interm_types
)
6296 enum machine_mode vec_mode
;
6297 enum insn_code icode1
;
6298 optab optab1
, interm_optab
;
6299 tree vectype
= vectype_in
;
6300 tree narrow_vectype
= vectype_out
;
6302 tree intermediate_type
;
6303 enum machine_mode intermediate_mode
, prev_mode
;
6307 *multi_step_cvt
= 0;
6311 c1
= VEC_PACK_TRUNC_EXPR
;
6314 case FIX_TRUNC_EXPR
:
6315 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6319 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6320 tree code and optabs used for computing the operation. */
6327 if (code
== FIX_TRUNC_EXPR
)
6328 /* The signedness is determined from output operand. */
6329 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6331 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6336 vec_mode
= TYPE_MODE (vectype
);
6337 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6342 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6345 /* Check if it's a multi-step conversion that can be done using intermediate
6347 prev_mode
= vec_mode
;
6348 if (code
== FIX_TRUNC_EXPR
)
6349 uns
= TYPE_UNSIGNED (vectype_out
);
6351 uns
= TYPE_UNSIGNED (vectype
);
6353 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6354 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6355 costly than signed. */
6356 if (code
== FIX_TRUNC_EXPR
&& uns
)
6358 enum insn_code icode2
;
6361 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6363 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6364 if (interm_optab
!= NULL
6365 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6366 && insn_data
[icode1
].operand
[0].mode
6367 == insn_data
[icode2
].operand
[0].mode
)
6370 optab1
= interm_optab
;
6375 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6376 intermediate steps in promotion sequence. We try
6377 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6378 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6379 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6381 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6383 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6385 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6388 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6389 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6390 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6391 == CODE_FOR_nothing
))
6394 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6395 (*multi_step_cvt
)++;
6397 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6400 prev_mode
= intermediate_mode
;
6401 optab1
= interm_optab
;
6404 VEC_free (tree
, heap
, *interm_types
);