1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
169 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
183 enum vect_relevant relevant
, bool live_p
)
185 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
186 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
187 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
188 gimple
*pattern_stmt
;
190 if (dump_enabled_p ())
192 dump_printf_loc (MSG_NOTE
, vect_location
,
193 "mark relevant %d, live %d: ", relevant
, live_p
);
194 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
203 /* This is the last stmt in a sequence that was detected as a
204 pattern that can potentially be vectorized. Don't mark the stmt
205 as relevant/live because it's not going to be vectorized.
206 Instead mark the pattern-stmt that replaces it. */
208 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE
, vect_location
,
212 "last stmt in pattern. don't mark"
213 " relevant/live.\n");
214 stmt_info
= vinfo_for_stmt (pattern_stmt
);
215 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
216 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
217 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
221 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
222 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
223 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
225 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
226 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
228 if (dump_enabled_p ())
229 dump_printf_loc (MSG_NOTE
, vect_location
,
230 "already marked relevant/live.\n");
234 worklist
->safe_push (stmt
);
238 /* Function vect_stmt_relevant_p.
240 Return true if STMT in loop that is represented by LOOP_VINFO is
241 "relevant for vectorization".
243 A stmt is considered "relevant for vectorization" if:
244 - it has uses outside the loop.
245 - it has vdefs (it alters memory).
246 - control stmts in the loop (except for the exit condition).
248 CHECKME: what other side effects would the vectorizer allow? */
251 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
252 enum vect_relevant
*relevant
, bool *live_p
)
254 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
256 imm_use_iterator imm_iter
;
260 *relevant
= vect_unused_in_scope
;
263 /* cond stmt other than loop exit cond. */
264 if (is_ctrl_stmt (stmt
)
265 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
266 != loop_exit_ctrl_vec_info_type
)
267 *relevant
= vect_used_in_scope
;
269 /* changing memory. */
270 if (gimple_code (stmt
) != GIMPLE_PHI
)
271 if (gimple_vdef (stmt
)
272 && !gimple_clobber_p (stmt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_NOTE
, vect_location
,
276 "vec_stmt_relevant_p: stmt has vdefs.\n");
277 *relevant
= vect_used_in_scope
;
280 /* uses outside the loop. */
281 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
283 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
285 basic_block bb
= gimple_bb (USE_STMT (use_p
));
286 if (!flow_bb_inside_loop_p (loop
, bb
))
288 if (dump_enabled_p ())
289 dump_printf_loc (MSG_NOTE
, vect_location
,
290 "vec_stmt_relevant_p: used out of loop.\n");
292 if (is_gimple_debug (USE_STMT (use_p
)))
295 /* We expect all such uses to be in the loop exit phis
296 (because of loop closed form) */
297 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
298 gcc_assert (bb
== single_exit (loop
)->dest
);
305 return (*live_p
|| *relevant
);
309 /* Function exist_non_indexing_operands_for_use_p
311 USE is one of the uses attached to STMT. Check if USE is
312 used in STMT for anything other than indexing an array. */
315 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
318 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
320 /* USE corresponds to some operand in STMT. If there is no data
321 reference in STMT, then any operand that corresponds to USE
322 is not indexing an array. */
323 if (!STMT_VINFO_DATA_REF (stmt_info
))
326 /* STMT has a data_ref. FORNOW this means that its of one of
330 (This should have been verified in analyze_data_refs).
332 'var' in the second case corresponds to a def, not a use,
333 so USE cannot correspond to any operands that are not used
336 Therefore, all we need to check is if STMT falls into the
337 first case, and whether var corresponds to USE. */
339 if (!gimple_assign_copy_p (stmt
))
341 if (is_gimple_call (stmt
)
342 && gimple_call_internal_p (stmt
))
343 switch (gimple_call_internal_fn (stmt
))
346 operand
= gimple_call_arg (stmt
, 3);
351 operand
= gimple_call_arg (stmt
, 2);
361 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
363 operand
= gimple_assign_rhs1 (stmt
);
364 if (TREE_CODE (operand
) != SSA_NAME
)
375 Function process_use.
378 - a USE in STMT in a loop represented by LOOP_VINFO
379 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
380 that defined USE. This is done by calling mark_relevant and passing it
381 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
382 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
386 Generally, LIVE_P and RELEVANT are used to define the liveness and
387 relevance info of the DEF_STMT of this USE:
388 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
389 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
391 - case 1: If USE is used only for address computations (e.g. array indexing),
392 which does not need to be directly vectorized, then the liveness/relevance
393 of the respective DEF_STMT is left unchanged.
394 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
395 skip DEF_STMT cause it had already been processed.
396 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
397 be modified accordingly.
399 Return true if everything is as expected. Return false otherwise. */
402 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
403 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
406 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
407 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
408 stmt_vec_info dstmt_vinfo
;
409 basic_block bb
, def_bb
;
411 enum vect_def_type dt
;
413 /* case 1: we are only interested in uses that need to be vectorized. Uses
414 that are used for address computation are not considered relevant. */
415 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
418 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
422 "not vectorized: unsupported use in stmt.\n");
426 if (!def_stmt
|| gimple_nop_p (def_stmt
))
429 def_bb
= gimple_bb (def_stmt
);
430 if (!flow_bb_inside_loop_p (loop
, def_bb
))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
437 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
438 DEF_STMT must have already been processed, because this should be the
439 only way that STMT, which is a reduction-phi, was put in the worklist,
440 as there should be no other uses for DEF_STMT in the loop. So we just
441 check that everything is as expected, and we are done. */
442 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
443 bb
= gimple_bb (stmt
);
444 if (gimple_code (stmt
) == GIMPLE_PHI
445 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
446 && gimple_code (def_stmt
) != GIMPLE_PHI
447 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
448 && bb
->loop_father
== def_bb
->loop_father
)
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_NOTE
, vect_location
,
452 "reduc-stmt defining reduc-phi in the same nest.\n");
453 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
454 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
455 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
456 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
457 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
461 /* case 3a: outer-loop stmt defining an inner-loop stmt:
462 outer-loop-header-bb:
468 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_NOTE
, vect_location
,
472 "outer-loop def-stmt defining inner-loop stmt.\n");
476 case vect_unused_in_scope
:
477 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
478 vect_used_in_scope
: vect_unused_in_scope
;
481 case vect_used_in_outer_by_reduction
:
482 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
483 relevant
= vect_used_by_reduction
;
486 case vect_used_in_outer
:
487 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
488 relevant
= vect_used_in_scope
;
491 case vect_used_in_scope
:
499 /* case 3b: inner-loop stmt defining an outer-loop stmt:
500 outer-loop-header-bb:
504 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
506 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
,
510 "inner-loop def-stmt defining outer-loop stmt.\n");
514 case vect_unused_in_scope
:
515 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
516 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
517 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
520 case vect_used_by_reduction
:
521 relevant
= vect_used_in_outer_by_reduction
;
524 case vect_used_in_scope
:
525 relevant
= vect_used_in_outer
;
533 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
);
538 /* Function vect_mark_stmts_to_be_vectorized.
540 Not all stmts in the loop need to be vectorized. For example:
549 Stmt 1 and 3 do not need to be vectorized, because loop control and
550 addressing of vectorized data-refs are handled differently.
552 This pass detects such stmts. */
555 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
557 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
558 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
559 unsigned int nbbs
= loop
->num_nodes
;
560 gimple_stmt_iterator si
;
563 stmt_vec_info stmt_vinfo
;
567 enum vect_relevant relevant
, tmp_relevant
;
568 enum vect_def_type def_type
;
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
,
572 "=== vect_mark_stmts_to_be_vectorized ===\n");
574 auto_vec
<gimple
*, 64> worklist
;
576 /* 1. Init worklist. */
577 for (i
= 0; i
< nbbs
; i
++)
580 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
583 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
586 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
589 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
590 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
592 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
594 stmt
= gsi_stmt (si
);
595 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
598 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
601 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
602 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
606 /* 2. Process_worklist */
607 while (worklist
.length () > 0)
612 stmt
= worklist
.pop ();
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
616 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
619 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
620 (DEF_STMT) as relevant/irrelevant and live/dead according to the
621 liveness and relevance properties of STMT. */
622 stmt_vinfo
= vinfo_for_stmt (stmt
);
623 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
624 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
626 /* Generally, the liveness and relevance properties of STMT are
627 propagated as is to the DEF_STMTs of its USEs:
628 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
629 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
631 One exception is when STMT has been identified as defining a reduction
632 variable; in this case we set the liveness/relevance as follows:
634 relevant = vect_used_by_reduction
635 This is because we distinguish between two kinds of relevant stmts -
636 those that are used by a reduction computation, and those that are
637 (also) used by a regular computation. This allows us later on to
638 identify stmts that are used solely by a reduction, and therefore the
639 order of the results that they produce does not have to be kept. */
641 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
642 tmp_relevant
= relevant
;
645 case vect_reduction_def
:
646 switch (tmp_relevant
)
648 case vect_unused_in_scope
:
649 relevant
= vect_used_by_reduction
;
652 case vect_used_by_reduction
:
653 if (gimple_code (stmt
) == GIMPLE_PHI
)
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
660 "unsupported use of reduction.\n");
667 case vect_nested_cycle
:
668 if (tmp_relevant
!= vect_unused_in_scope
669 && tmp_relevant
!= vect_used_in_outer_by_reduction
670 && tmp_relevant
!= vect_used_in_outer
)
672 if (dump_enabled_p ())
673 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
674 "unsupported use of nested cycle.\n");
682 case vect_double_reduction_def
:
683 if (tmp_relevant
!= vect_unused_in_scope
684 && tmp_relevant
!= vect_used_by_reduction
)
686 if (dump_enabled_p ())
687 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
688 "unsupported use of double reduction.\n");
700 if (is_pattern_stmt_p (stmt_vinfo
))
702 /* Pattern statements are not inserted into the code, so
703 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
704 have to scan the RHS or function arguments instead. */
705 if (is_gimple_assign (stmt
))
707 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
708 tree op
= gimple_assign_rhs1 (stmt
);
711 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
713 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
714 live_p
, relevant
, &worklist
, false)
715 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
716 live_p
, relevant
, &worklist
, false))
720 for (; i
< gimple_num_ops (stmt
); i
++)
722 op
= gimple_op (stmt
, i
);
723 if (TREE_CODE (op
) == SSA_NAME
724 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
729 else if (is_gimple_call (stmt
))
731 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
733 tree arg
= gimple_call_arg (stmt
, i
);
734 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
741 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
743 tree op
= USE_FROM_PTR (use_p
);
744 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
749 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
752 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
754 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
758 } /* while worklist */
764 /* Function vect_model_simple_cost.
766 Models cost for simple operations, i.e. those that only emit ncopies of a
767 single op. Right now, this does not account for multiple insns that could
768 be generated for the single vector op. We will handle that shortly. */
771 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
772 enum vect_def_type
*dt
,
773 stmt_vector_for_cost
*prologue_cost_vec
,
774 stmt_vector_for_cost
*body_cost_vec
)
777 int inside_cost
= 0, prologue_cost
= 0;
779 /* The SLP costs were already calculated during SLP tree build. */
780 if (PURE_SLP_STMT (stmt_info
))
783 /* FORNOW: Assuming maximum 2 args per stmts. */
784 for (i
= 0; i
< 2; i
++)
785 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
786 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
787 stmt_info
, 0, vect_prologue
);
789 /* Pass the inside-of-loop statements to the target-specific cost model. */
790 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
791 stmt_info
, 0, vect_body
);
793 if (dump_enabled_p ())
794 dump_printf_loc (MSG_NOTE
, vect_location
,
795 "vect_model_simple_cost: inside_cost = %d, "
796 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
800 /* Model cost for type demotion and promotion operations. PWR is normally
801 zero for single-step promotions and demotions. It will be one if
802 two-step promotion/demotion is required, and so on. Each additional
803 step doubles the number of instructions required. */
806 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
807 enum vect_def_type
*dt
, int pwr
)
810 int inside_cost
= 0, prologue_cost
= 0;
811 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
812 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
813 void *target_cost_data
;
815 /* The SLP costs were already calculated during SLP tree build. */
816 if (PURE_SLP_STMT (stmt_info
))
820 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
822 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
824 for (i
= 0; i
< pwr
+ 1; i
++)
826 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
828 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
829 vec_promote_demote
, stmt_info
, 0,
833 /* FORNOW: Assuming maximum 2 args per stmts. */
834 for (i
= 0; i
< 2; i
++)
835 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
836 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
837 stmt_info
, 0, vect_prologue
);
839 if (dump_enabled_p ())
840 dump_printf_loc (MSG_NOTE
, vect_location
,
841 "vect_model_promotion_demotion_cost: inside_cost = %d, "
842 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
845 /* Function vect_cost_group_size
847 For grouped load or store, return the group_size only if it is the first
848 load or store of a group, else return 1. This ensures that group size is
849 only returned once per group. */
852 vect_cost_group_size (stmt_vec_info stmt_info
)
854 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
856 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
857 return GROUP_SIZE (stmt_info
);
863 /* Function vect_model_store_cost
865 Models cost for stores. In the case of grouped accesses, one access
866 has the overhead of the grouped access attributed to it. */
869 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
870 bool store_lanes_p
, enum vect_def_type dt
,
872 stmt_vector_for_cost
*prologue_cost_vec
,
873 stmt_vector_for_cost
*body_cost_vec
)
876 unsigned int inside_cost
= 0, prologue_cost
= 0;
877 struct data_reference
*first_dr
;
880 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
881 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
882 stmt_info
, 0, vect_prologue
);
884 /* Grouped access? */
885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
889 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
894 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
895 group_size
= vect_cost_group_size (stmt_info
);
898 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
900 /* Not a grouped access. */
904 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
907 /* We assume that the cost of a single store-lanes instruction is
908 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
909 access is instead being provided by a permute-and-store operation,
910 include the cost of the permutes. */
911 if (!store_lanes_p
&& group_size
> 1
912 && !STMT_VINFO_STRIDED_P (stmt_info
))
914 /* Uses a high and low interleave or shuffle operations for each
916 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
917 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
918 stmt_info
, 0, vect_body
);
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE
, vect_location
,
922 "vect_model_store_cost: strided group_size = %d .\n",
926 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
927 /* Costs of the stores. */
928 if (STMT_VINFO_STRIDED_P (stmt_info
)
929 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
931 /* N scalar stores plus extracting the elements. */
932 inside_cost
+= record_stmt_cost (body_cost_vec
,
933 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
934 scalar_store
, stmt_info
, 0, vect_body
);
937 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
939 if (STMT_VINFO_STRIDED_P (stmt_info
))
940 inside_cost
+= record_stmt_cost (body_cost_vec
,
941 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
942 vec_to_scalar
, stmt_info
, 0, vect_body
);
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_NOTE
, vect_location
,
946 "vect_model_store_cost: inside_cost = %d, "
947 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
951 /* Calculate cost of DR's memory access. */
953 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
954 unsigned int *inside_cost
,
955 stmt_vector_for_cost
*body_cost_vec
)
957 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
958 gimple
*stmt
= DR_STMT (dr
);
959 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
961 switch (alignment_support_scheme
)
965 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
966 vector_store
, stmt_info
, 0,
969 if (dump_enabled_p ())
970 dump_printf_loc (MSG_NOTE
, vect_location
,
971 "vect_model_store_cost: aligned.\n");
975 case dr_unaligned_supported
:
977 /* Here, we assign an additional cost for the unaligned store. */
978 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
979 unaligned_store
, stmt_info
,
980 DR_MISALIGNMENT (dr
), vect_body
);
981 if (dump_enabled_p ())
982 dump_printf_loc (MSG_NOTE
, vect_location
,
983 "vect_model_store_cost: unaligned supported by "
988 case dr_unaligned_unsupported
:
990 *inside_cost
= VECT_MAX_COST
;
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
994 "vect_model_store_cost: unsupported access.\n");
1004 /* Function vect_model_load_cost
1006 Models cost for loads. In the case of grouped accesses, the last access
1007 has the overhead of the grouped access attributed to it. Since unaligned
1008 accesses are supported for loads, we also account for the costs of the
1009 access scheme chosen. */
1012 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1013 bool load_lanes_p
, slp_tree slp_node
,
1014 stmt_vector_for_cost
*prologue_cost_vec
,
1015 stmt_vector_for_cost
*body_cost_vec
)
1019 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1020 unsigned int inside_cost
= 0, prologue_cost
= 0;
1022 /* Grouped accesses? */
1023 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1024 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1026 group_size
= vect_cost_group_size (stmt_info
);
1027 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1029 /* Not a grouped access. */
1036 /* We assume that the cost of a single load-lanes instruction is
1037 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1038 access is instead being provided by a load-and-permute operation,
1039 include the cost of the permutes. */
1040 if (!load_lanes_p
&& group_size
> 1
1041 && !STMT_VINFO_STRIDED_P (stmt_info
))
1043 /* Uses an even and odd extract operations or shuffle operations
1044 for each needed permute. */
1045 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1046 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1047 stmt_info
, 0, vect_body
);
1049 if (dump_enabled_p ())
1050 dump_printf_loc (MSG_NOTE
, vect_location
,
1051 "vect_model_load_cost: strided group_size = %d .\n",
1055 /* The loads themselves. */
1056 if (STMT_VINFO_STRIDED_P (stmt_info
)
1057 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1059 /* N scalar loads plus gathering them into a vector. */
1060 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1061 inside_cost
+= record_stmt_cost (body_cost_vec
,
1062 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1063 scalar_load
, stmt_info
, 0, vect_body
);
1066 vect_get_load_cost (first_dr
, ncopies
,
1067 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1068 || group_size
> 1 || slp_node
),
1069 &inside_cost
, &prologue_cost
,
1070 prologue_cost_vec
, body_cost_vec
, true);
1071 if (STMT_VINFO_STRIDED_P (stmt_info
))
1072 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1073 stmt_info
, 0, vect_body
);
1075 if (dump_enabled_p ())
1076 dump_printf_loc (MSG_NOTE
, vect_location
,
1077 "vect_model_load_cost: inside_cost = %d, "
1078 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1082 /* Calculate cost of DR's memory access. */
1084 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1085 bool add_realign_cost
, unsigned int *inside_cost
,
1086 unsigned int *prologue_cost
,
1087 stmt_vector_for_cost
*prologue_cost_vec
,
1088 stmt_vector_for_cost
*body_cost_vec
,
1089 bool record_prologue_costs
)
1091 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1092 gimple
*stmt
= DR_STMT (dr
);
1093 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1095 switch (alignment_support_scheme
)
1099 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1100 stmt_info
, 0, vect_body
);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE
, vect_location
,
1104 "vect_model_load_cost: aligned.\n");
1108 case dr_unaligned_supported
:
1110 /* Here, we assign an additional cost for the unaligned load. */
1111 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1112 unaligned_load
, stmt_info
,
1113 DR_MISALIGNMENT (dr
), vect_body
);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE
, vect_location
,
1117 "vect_model_load_cost: unaligned supported by "
1122 case dr_explicit_realign
:
1124 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1125 vector_load
, stmt_info
, 0, vect_body
);
1126 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1127 vec_perm
, stmt_info
, 0, vect_body
);
1129 /* FIXME: If the misalignment remains fixed across the iterations of
1130 the containing loop, the following cost should be added to the
1132 if (targetm
.vectorize
.builtin_mask_for_load
)
1133 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1134 stmt_info
, 0, vect_body
);
1136 if (dump_enabled_p ())
1137 dump_printf_loc (MSG_NOTE
, vect_location
,
1138 "vect_model_load_cost: explicit realign\n");
1142 case dr_explicit_realign_optimized
:
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: unaligned software "
1149 /* Unaligned software pipeline has a load of an address, an initial
1150 load, and possibly a mask operation to "prime" the loop. However,
1151 if this is an access in a group of loads, which provide grouped
1152 access, then the above cost should only be considered for one
1153 access in the group. Inside the loop, there is a load op
1154 and a realignment op. */
1156 if (add_realign_cost
&& record_prologue_costs
)
1158 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1159 vector_stmt
, stmt_info
,
1161 if (targetm
.vectorize
.builtin_mask_for_load
)
1162 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1163 vector_stmt
, stmt_info
,
1167 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1168 stmt_info
, 0, vect_body
);
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1170 stmt_info
, 0, vect_body
);
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE
, vect_location
,
1174 "vect_model_load_cost: explicit realign optimized"
1180 case dr_unaligned_unsupported
:
1182 *inside_cost
= VECT_MAX_COST
;
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1186 "vect_model_load_cost: unsupported access.\n");
1195 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1196 the loop preheader for the vectorized stmt STMT. */
1199 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1202 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1205 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1206 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1210 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1214 if (nested_in_vect_loop_p (loop
, stmt
))
1217 pe
= loop_preheader_edge (loop
);
1218 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1219 gcc_assert (!new_bb
);
1223 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1225 gimple_stmt_iterator gsi_bb_start
;
1227 gcc_assert (bb_vinfo
);
1228 bb
= BB_VINFO_BB (bb_vinfo
);
1229 gsi_bb_start
= gsi_after_labels (bb
);
1230 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1234 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE
, vect_location
,
1237 "created new init_stmt: ");
1238 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1242 /* Function vect_init_vector.
1244 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1245 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1246 vector type a vector with all elements equal to VAL is created first.
1247 Place the initialization at BSI if it is not NULL. Otherwise, place the
1248 initialization at the loop preheader.
1249 Return the DEF of INIT_STMT.
1250 It will be used in the vectorization of STMT. */
1253 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1258 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1259 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1261 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1262 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1264 /* Scalar boolean value should be transformed into
1265 all zeros or all ones value before building a vector. */
1266 if (VECTOR_BOOLEAN_TYPE_P (type
))
1268 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1269 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1271 if (CONSTANT_CLASS_P (val
))
1272 val
= integer_zerop (val
) ? false_val
: true_val
;
1275 new_temp
= make_ssa_name (TREE_TYPE (type
));
1276 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1277 val
, true_val
, false_val
);
1278 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1282 else if (CONSTANT_CLASS_P (val
))
1283 val
= fold_convert (TREE_TYPE (type
), val
);
1286 new_temp
= make_ssa_name (TREE_TYPE (type
));
1287 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1288 init_stmt
= gimple_build_assign (new_temp
,
1289 fold_build1 (VIEW_CONVERT_EXPR
,
1293 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1294 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1298 val
= build_vector_from_val (type
, val
);
1301 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1302 init_stmt
= gimple_build_assign (new_temp
, val
);
1303 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1308 /* Function vect_get_vec_def_for_operand.
1310 OP is an operand in STMT. This function returns a (vector) def that will be
1311 used in the vectorized stmt for STMT.
1313 In the case that OP is an SSA_NAME which is defined in the loop, then
1314 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1316 In case OP is an invariant or constant, a new stmt that creates a vector def
1317 needs to be introduced. VECTYPE may be used to specify a required type for
1318 vector invariant. */
1321 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1326 stmt_vec_info def_stmt_info
= NULL
;
1327 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1328 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1329 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1330 enum vect_def_type dt
;
1334 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "vect_get_vec_def_for_operand: ");
1338 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1339 dump_printf (MSG_NOTE
, "\n");
1342 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1343 gcc_assert (is_simple_use
);
1344 if (dump_enabled_p ())
1346 int loc_printed
= 0;
1350 dump_printf (MSG_NOTE
, " def_stmt = ");
1352 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1353 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1359 /* operand is a constant or a loop invariant. */
1360 case vect_constant_def
:
1361 case vect_external_def
:
1364 vector_type
= vectype
;
1365 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1366 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1367 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1369 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1371 gcc_assert (vector_type
);
1372 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1375 /* operand is defined inside the loop. */
1376 case vect_internal_def
:
1378 /* Get the def from the vectorized stmt. */
1379 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1381 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1382 /* Get vectorized pattern statement. */
1384 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1385 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1386 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1387 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1388 gcc_assert (vec_stmt
);
1389 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1390 vec_oprnd
= PHI_RESULT (vec_stmt
);
1391 else if (is_gimple_call (vec_stmt
))
1392 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1394 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1398 /* operand is defined by a loop header phi - reduction */
1399 case vect_reduction_def
:
1400 case vect_double_reduction_def
:
1401 case vect_nested_cycle
:
1402 /* Code should use get_initial_def_for_reduction. */
1405 /* operand is defined by loop-header phi - induction. */
1406 case vect_induction_def
:
1408 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1410 /* Get the def from the vectorized stmt. */
1411 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1412 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1413 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1414 vec_oprnd
= PHI_RESULT (vec_stmt
);
1416 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1426 /* Function vect_get_vec_def_for_stmt_copy
1428 Return a vector-def for an operand. This function is used when the
1429 vectorized stmt to be created (by the caller to this function) is a "copy"
1430 created in case the vectorized result cannot fit in one vector, and several
1431 copies of the vector-stmt are required. In this case the vector-def is
1432 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1433 of the stmt that defines VEC_OPRND.
1434 DT is the type of the vector def VEC_OPRND.
1437 In case the vectorization factor (VF) is bigger than the number
1438 of elements that can fit in a vectype (nunits), we have to generate
1439 more than one vector stmt to vectorize the scalar stmt. This situation
1440 arises when there are multiple data-types operated upon in the loop; the
1441 smallest data-type determines the VF, and as a result, when vectorizing
1442 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1443 vector stmt (each computing a vector of 'nunits' results, and together
1444 computing 'VF' results in each iteration). This function is called when
1445 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1446 which VF=16 and nunits=4, so the number of copies required is 4):
1448 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1450 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1451 VS1.1: vx.1 = memref1 VS1.2
1452 VS1.2: vx.2 = memref2 VS1.3
1453 VS1.3: vx.3 = memref3
1455 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1456 VSnew.1: vz1 = vx.1 + ... VSnew.2
1457 VSnew.2: vz2 = vx.2 + ... VSnew.3
1458 VSnew.3: vz3 = vx.3 + ...
1460 The vectorization of S1 is explained in vectorizable_load.
1461 The vectorization of S2:
1462 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1463 the function 'vect_get_vec_def_for_operand' is called to
1464 get the relevant vector-def for each operand of S2. For operand x it
1465 returns the vector-def 'vx.0'.
1467 To create the remaining copies of the vector-stmt (VSnew.j), this
1468 function is called to get the relevant vector-def for each operand. It is
1469 obtained from the respective VS1.j stmt, which is recorded in the
1470 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1472 For example, to obtain the vector-def 'vx.1' in order to create the
1473 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1474 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1475 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1476 and return its def ('vx.1').
1477 Overall, to create the above sequence this function will be called 3 times:
1478 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1479 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1480 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1483 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1485 gimple
*vec_stmt_for_operand
;
1486 stmt_vec_info def_stmt_info
;
1488 /* Do nothing; can reuse same def. */
1489 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1492 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1493 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1494 gcc_assert (def_stmt_info
);
1495 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1496 gcc_assert (vec_stmt_for_operand
);
1497 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1498 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1500 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1505 /* Get vectorized definitions for the operands to create a copy of an original
1506 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1509 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1510 vec
<tree
> *vec_oprnds0
,
1511 vec
<tree
> *vec_oprnds1
)
1513 tree vec_oprnd
= vec_oprnds0
->pop ();
1515 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1516 vec_oprnds0
->quick_push (vec_oprnd
);
1518 if (vec_oprnds1
&& vec_oprnds1
->length ())
1520 vec_oprnd
= vec_oprnds1
->pop ();
1521 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1522 vec_oprnds1
->quick_push (vec_oprnd
);
1527 /* Get vectorized definitions for OP0 and OP1.
1528 REDUC_INDEX is the index of reduction operand in case of reduction,
1529 and -1 otherwise. */
1532 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1533 vec
<tree
> *vec_oprnds0
,
1534 vec
<tree
> *vec_oprnds1
,
1535 slp_tree slp_node
, int reduc_index
)
1539 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1540 auto_vec
<tree
> ops (nops
);
1541 auto_vec
<vec
<tree
> > vec_defs (nops
);
1543 ops
.quick_push (op0
);
1545 ops
.quick_push (op1
);
1547 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1549 *vec_oprnds0
= vec_defs
[0];
1551 *vec_oprnds1
= vec_defs
[1];
1557 vec_oprnds0
->create (1);
1558 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1559 vec_oprnds0
->quick_push (vec_oprnd
);
1563 vec_oprnds1
->create (1);
1564 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1565 vec_oprnds1
->quick_push (vec_oprnd
);
1571 /* Function vect_finish_stmt_generation.
1573 Insert a new stmt. */
1576 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1577 gimple_stmt_iterator
*gsi
)
1579 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1580 vec_info
*vinfo
= stmt_info
->vinfo
;
1582 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1584 if (!gsi_end_p (*gsi
)
1585 && gimple_has_mem_ops (vec_stmt
))
1587 gimple
*at_stmt
= gsi_stmt (*gsi
);
1588 tree vuse
= gimple_vuse (at_stmt
);
1589 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1591 tree vdef
= gimple_vdef (at_stmt
);
1592 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1593 /* If we have an SSA vuse and insert a store, update virtual
1594 SSA form to avoid triggering the renamer. Do so only
1595 if we can easily see all uses - which is what almost always
1596 happens with the way vectorized stmts are inserted. */
1597 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1598 && ((is_gimple_assign (vec_stmt
)
1599 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1600 || (is_gimple_call (vec_stmt
)
1601 && !(gimple_call_flags (vec_stmt
)
1602 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1604 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1605 gimple_set_vdef (vec_stmt
, new_vdef
);
1606 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1610 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1612 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1614 if (dump_enabled_p ())
1616 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1617 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1620 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1622 /* While EH edges will generally prevent vectorization, stmt might
1623 e.g. be in a must-not-throw region. Ensure newly created stmts
1624 that could throw are part of the same region. */
1625 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1626 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1627 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1630 /* We want to vectorize a call to combined function CFN with function
1631 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1632 as the types of all inputs. Check whether this is possible using
1633 an internal function, returning its code if so or IFN_LAST if not. */
1636 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1637 tree vectype_out
, tree vectype_in
)
1640 if (internal_fn_p (cfn
))
1641 ifn
= as_internal_fn (cfn
);
1643 ifn
= associated_internal_fn (fndecl
);
1644 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1646 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1647 if (info
.vectorizable
)
1649 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1650 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1651 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1652 OPTIMIZE_FOR_SPEED
))
1660 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1661 gimple_stmt_iterator
*);
1664 /* Function vectorizable_mask_load_store.
1666 Check if STMT performs a conditional load or store that can be vectorized.
1667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1668 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1669 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1672 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1673 gimple
**vec_stmt
, slp_tree slp_node
)
1675 tree vec_dest
= NULL
;
1676 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1677 stmt_vec_info prev_stmt_info
;
1678 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1679 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1680 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1681 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1682 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1683 tree rhs_vectype
= NULL_TREE
;
1688 tree dataref_ptr
= NULL_TREE
;
1690 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1694 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1695 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1696 int gather_scale
= 1;
1697 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1701 enum vect_def_type dt
;
1703 if (slp_node
!= NULL
)
1706 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1707 gcc_assert (ncopies
>= 1);
1709 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1710 mask
= gimple_call_arg (stmt
, 2);
1712 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1715 /* FORNOW. This restriction should be relaxed. */
1716 if (nested_in_vect_loop
&& ncopies
> 1)
1718 if (dump_enabled_p ())
1719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1720 "multiple types in nested loop.");
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1727 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1731 if (!STMT_VINFO_DATA_REF (stmt_info
))
1734 elem_type
= TREE_TYPE (vectype
);
1736 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1739 if (STMT_VINFO_STRIDED_P (stmt_info
))
1742 if (TREE_CODE (mask
) != SSA_NAME
)
1745 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1749 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1751 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
1752 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
1757 tree rhs
= gimple_call_arg (stmt
, 3);
1758 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1762 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1765 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1766 &gather_off
, &gather_scale
);
1767 gcc_assert (gather_decl
);
1768 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1769 &gather_off_vectype
))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1773 "gather index use not simple.");
1777 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1779 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1780 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1784 "masked gather with integer mask not supported.");
1788 else if (tree_int_cst_compare (nested_in_vect_loop
1789 ? STMT_VINFO_DR_STEP (stmt_info
)
1790 : DR_STEP (dr
), size_zero_node
) <= 0)
1792 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1793 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1794 TYPE_MODE (mask_vectype
),
1797 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1800 if (!vec_stmt
) /* transformation not required. */
1802 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1804 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1807 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1813 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1815 tree vec_oprnd0
= NULL_TREE
, op
;
1816 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1817 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1818 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1819 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1820 tree mask_perm_mask
= NULL_TREE
;
1821 edge pe
= loop_preheader_edge (loop
);
1824 enum { NARROW
, NONE
, WIDEN
} modifier
;
1825 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1827 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1828 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1829 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1830 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1831 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1832 scaletype
= TREE_VALUE (arglist
);
1833 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1834 && types_compatible_p (srctype
, masktype
));
1836 if (nunits
== gather_off_nunits
)
1838 else if (nunits
== gather_off_nunits
/ 2)
1840 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1843 for (i
= 0; i
< gather_off_nunits
; ++i
)
1844 sel
[i
] = i
| nunits
;
1846 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1848 else if (nunits
== gather_off_nunits
* 2)
1850 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1853 for (i
= 0; i
< nunits
; ++i
)
1854 sel
[i
] = i
< gather_off_nunits
1855 ? i
: i
+ nunits
- gather_off_nunits
;
1857 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1859 for (i
= 0; i
< nunits
; ++i
)
1860 sel
[i
] = i
| gather_off_nunits
;
1861 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1866 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1868 ptr
= fold_convert (ptrtype
, gather_base
);
1869 if (!is_gimple_min_invariant (ptr
))
1871 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1872 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1873 gcc_assert (!new_bb
);
1876 scale
= build_int_cst (scaletype
, gather_scale
);
1878 prev_stmt_info
= NULL
;
1879 for (j
= 0; j
< ncopies
; ++j
)
1881 if (modifier
== WIDEN
&& (j
& 1))
1882 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1883 perm_mask
, stmt
, gsi
);
1886 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1889 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1891 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1893 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1894 == TYPE_VECTOR_SUBPARTS (idxtype
));
1895 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1896 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1898 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1903 if (mask_perm_mask
&& (j
& 1))
1904 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1905 mask_perm_mask
, stmt
, gsi
);
1909 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1912 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1913 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1917 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1919 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1920 == TYPE_VECTOR_SUBPARTS (masktype
));
1921 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1922 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1924 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1925 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1931 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1934 if (!useless_type_conversion_p (vectype
, rettype
))
1936 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1937 == TYPE_VECTOR_SUBPARTS (rettype
));
1938 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1939 gimple_call_set_lhs (new_stmt
, op
);
1940 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1941 var
= make_ssa_name (vec_dest
);
1942 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1943 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1947 var
= make_ssa_name (vec_dest
, new_stmt
);
1948 gimple_call_set_lhs (new_stmt
, var
);
1951 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1953 if (modifier
== NARROW
)
1960 var
= permute_vec_elements (prev_res
, var
,
1961 perm_mask
, stmt
, gsi
);
1962 new_stmt
= SSA_NAME_DEF_STMT (var
);
1965 if (prev_stmt_info
== NULL
)
1966 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1968 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1969 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1972 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1974 if (STMT_VINFO_RELATED_STMT (stmt_info
))
1976 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
1977 stmt_info
= vinfo_for_stmt (stmt
);
1979 tree lhs
= gimple_call_lhs (stmt
);
1980 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
1981 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1982 set_vinfo_for_stmt (stmt
, NULL
);
1983 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1984 gsi_replace (gsi
, new_stmt
, true);
1989 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
1990 prev_stmt_info
= NULL
;
1991 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
1992 for (i
= 0; i
< ncopies
; i
++)
1994 unsigned align
, misalign
;
1998 tree rhs
= gimple_call_arg (stmt
, 3);
1999 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2000 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2001 /* We should have catched mismatched types earlier. */
2002 gcc_assert (useless_type_conversion_p (vectype
,
2003 TREE_TYPE (vec_rhs
)));
2004 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2005 NULL_TREE
, &dummy
, gsi
,
2006 &ptr_incr
, false, &inv_p
);
2007 gcc_assert (!inv_p
);
2011 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2012 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2013 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2014 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2015 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2016 TYPE_SIZE_UNIT (vectype
));
2019 align
= TYPE_ALIGN_UNIT (vectype
);
2020 if (aligned_access_p (dr
))
2022 else if (DR_MISALIGNMENT (dr
) == -1)
2024 align
= TYPE_ALIGN_UNIT (elem_type
);
2028 misalign
= DR_MISALIGNMENT (dr
);
2029 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2031 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2032 misalign
? misalign
& -misalign
: align
);
2034 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2035 ptr
, vec_mask
, vec_rhs
);
2036 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2038 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2040 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2041 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2046 tree vec_mask
= NULL_TREE
;
2047 prev_stmt_info
= NULL
;
2048 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2049 for (i
= 0; i
< ncopies
; i
++)
2051 unsigned align
, misalign
;
2055 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2056 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2057 NULL_TREE
, &dummy
, gsi
,
2058 &ptr_incr
, false, &inv_p
);
2059 gcc_assert (!inv_p
);
2063 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2064 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2065 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2066 TYPE_SIZE_UNIT (vectype
));
2069 align
= TYPE_ALIGN_UNIT (vectype
);
2070 if (aligned_access_p (dr
))
2072 else if (DR_MISALIGNMENT (dr
) == -1)
2074 align
= TYPE_ALIGN_UNIT (elem_type
);
2078 misalign
= DR_MISALIGNMENT (dr
);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2081 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2082 misalign
? misalign
& -misalign
: align
);
2084 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2086 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2087 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2089 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2091 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2092 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2098 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2100 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2102 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2103 stmt_info
= vinfo_for_stmt (stmt
);
2105 tree lhs
= gimple_call_lhs (stmt
);
2106 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2107 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2108 set_vinfo_for_stmt (stmt
, NULL
);
2109 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2110 gsi_replace (gsi
, new_stmt
, true);
2116 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2117 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2118 in a single step. On success, store the binary pack code in
2122 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2123 tree_code
*convert_code
)
2125 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2126 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2130 int multi_step_cvt
= 0;
2131 auto_vec
<tree
, 8> interm_types
;
2132 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2133 &code
, &multi_step_cvt
,
2138 *convert_code
= code
;
2142 /* Function vectorizable_call.
2144 Check if GS performs a function call that can be vectorized.
2145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2150 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2157 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2158 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2159 tree vectype_out
, vectype_in
;
2162 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2163 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2164 vec_info
*vinfo
= stmt_info
->vinfo
;
2165 tree fndecl
, new_temp
, rhs_type
;
2167 enum vect_def_type dt
[3]
2168 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2169 gimple
*new_stmt
= NULL
;
2171 vec
<tree
> vargs
= vNULL
;
2172 enum { NARROW
, NONE
, WIDEN
} modifier
;
2176 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2179 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2183 /* Is GS a vectorizable call? */
2184 stmt
= dyn_cast
<gcall
*> (gs
);
2188 if (gimple_call_internal_p (stmt
)
2189 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2190 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2191 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2194 if (gimple_call_lhs (stmt
) == NULL_TREE
2195 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2198 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2200 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2202 /* Process function arguments. */
2203 rhs_type
= NULL_TREE
;
2204 vectype_in
= NULL_TREE
;
2205 nargs
= gimple_call_num_args (stmt
);
2207 /* Bail out if the function has more than three arguments, we do not have
2208 interesting builtin functions to vectorize with more than two arguments
2209 except for fma. No arguments is also not good. */
2210 if (nargs
== 0 || nargs
> 3)
2213 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2214 if (gimple_call_internal_p (stmt
)
2215 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2218 rhs_type
= unsigned_type_node
;
2221 for (i
= 0; i
< nargs
; i
++)
2225 op
= gimple_call_arg (stmt
, i
);
2227 /* We can only handle calls with arguments of the same type. */
2229 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2233 "argument types differ.\n");
2237 rhs_type
= TREE_TYPE (op
);
2239 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "use not simple.\n");
2248 vectype_in
= opvectype
;
2250 && opvectype
!= vectype_in
)
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2254 "argument vector types differ.\n");
2258 /* If all arguments are external or constant defs use a vector type with
2259 the same size as the output vector type. */
2261 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2263 gcc_assert (vectype_in
);
2266 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2269 "no vectype for scalar type ");
2270 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2271 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2278 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2279 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2280 if (nunits_in
== nunits_out
/ 2)
2282 else if (nunits_out
== nunits_in
)
2284 else if (nunits_out
== nunits_in
/ 2)
2289 /* We only handle functions that do not read or clobber memory. */
2290 if (gimple_vuse (stmt
))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "function reads from or writes to memory.\n");
2298 /* For now, we only vectorize functions if a target specific builtin
2299 is available. TODO -- in some cases, it might be profitable to
2300 insert the calls for pieces of the vector, in order to be able
2301 to vectorize other operations in the loop. */
2303 internal_fn ifn
= IFN_LAST
;
2304 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2305 tree callee
= gimple_call_fndecl (stmt
);
2307 /* First try using an internal function. */
2308 tree_code convert_code
= ERROR_MARK
;
2310 && (modifier
== NONE
2311 || (modifier
== NARROW
2312 && simple_integer_narrowing (vectype_out
, vectype_in
,
2314 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2317 /* If that fails, try asking for a target-specific built-in function. */
2318 if (ifn
== IFN_LAST
)
2320 if (cfn
!= CFN_LAST
)
2321 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2322 (cfn
, vectype_out
, vectype_in
);
2324 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2325 (callee
, vectype_out
, vectype_in
);
2328 if (ifn
== IFN_LAST
&& !fndecl
)
2330 if (cfn
== CFN_GOMP_SIMD_LANE
2333 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2334 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2335 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2336 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2338 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2339 { 0, 1, 2, ... vf - 1 } vector. */
2340 gcc_assert (nargs
== 0);
2344 if (dump_enabled_p ())
2345 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2346 "function is not vectorizable.\n");
2351 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2353 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2354 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2356 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2358 /* Sanity check: make sure that at least one copy of the vectorized stmt
2359 needs to be generated. */
2360 gcc_assert (ncopies
>= 1);
2362 if (!vec_stmt
) /* transformation not required. */
2364 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2368 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2369 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2370 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2371 vec_promote_demote
, stmt_info
, 0, vect_body
);
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2382 scalar_dest
= gimple_call_lhs (stmt
);
2383 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2385 prev_stmt_info
= NULL
;
2386 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2388 tree prev_res
= NULL_TREE
;
2389 for (j
= 0; j
< ncopies
; ++j
)
2391 /* Build argument list for the vectorized call. */
2393 vargs
.create (nargs
);
2399 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2400 vec
<tree
> vec_oprnds0
;
2402 for (i
= 0; i
< nargs
; i
++)
2403 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2404 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2405 vec_oprnds0
= vec_defs
[0];
2407 /* Arguments are ready. Create the new vector stmt. */
2408 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2411 for (k
= 0; k
< nargs
; k
++)
2413 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2414 vargs
[k
] = vec_oprndsk
[i
];
2416 if (modifier
== NARROW
)
2418 tree half_res
= make_ssa_name (vectype_in
);
2419 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2420 gimple_call_set_lhs (new_stmt
, half_res
);
2421 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2424 prev_res
= half_res
;
2427 new_temp
= make_ssa_name (vec_dest
);
2428 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2429 prev_res
, half_res
);
2433 if (ifn
!= IFN_LAST
)
2434 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2436 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2437 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2438 gimple_call_set_lhs (new_stmt
, new_temp
);
2440 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2441 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2444 for (i
= 0; i
< nargs
; i
++)
2446 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2447 vec_oprndsi
.release ();
2452 for (i
= 0; i
< nargs
; i
++)
2454 op
= gimple_call_arg (stmt
, i
);
2457 = vect_get_vec_def_for_operand (op
, stmt
);
2460 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2462 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2465 vargs
.quick_push (vec_oprnd0
);
2468 if (gimple_call_internal_p (stmt
)
2469 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2471 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2473 for (k
= 0; k
< nunits_out
; ++k
)
2474 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2475 tree cst
= build_vector (vectype_out
, v
);
2477 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2478 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2479 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2480 new_temp
= make_ssa_name (vec_dest
);
2481 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2483 else if (modifier
== NARROW
)
2485 tree half_res
= make_ssa_name (vectype_in
);
2486 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2487 gimple_call_set_lhs (new_stmt
, half_res
);
2488 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2491 prev_res
= half_res
;
2494 new_temp
= make_ssa_name (vec_dest
);
2495 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2496 prev_res
, half_res
);
2500 if (ifn
!= IFN_LAST
)
2501 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2503 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2504 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2505 gimple_call_set_lhs (new_stmt
, new_temp
);
2507 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2509 if (j
== (modifier
== NARROW
? 1 : 0))
2510 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2512 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2514 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2517 else if (modifier
== NARROW
)
2519 for (j
= 0; j
< ncopies
; ++j
)
2521 /* Build argument list for the vectorized call. */
2523 vargs
.create (nargs
* 2);
2529 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2530 vec
<tree
> vec_oprnds0
;
2532 for (i
= 0; i
< nargs
; i
++)
2533 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2534 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2535 vec_oprnds0
= vec_defs
[0];
2537 /* Arguments are ready. Create the new vector stmt. */
2538 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2542 for (k
= 0; k
< nargs
; k
++)
2544 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2545 vargs
.quick_push (vec_oprndsk
[i
]);
2546 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2548 if (ifn
!= IFN_LAST
)
2549 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2551 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2552 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2553 gimple_call_set_lhs (new_stmt
, new_temp
);
2554 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2555 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2558 for (i
= 0; i
< nargs
; i
++)
2560 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2561 vec_oprndsi
.release ();
2566 for (i
= 0; i
< nargs
; i
++)
2568 op
= gimple_call_arg (stmt
, i
);
2572 = vect_get_vec_def_for_operand (op
, stmt
);
2574 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2578 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2580 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2582 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2585 vargs
.quick_push (vec_oprnd0
);
2586 vargs
.quick_push (vec_oprnd1
);
2589 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2590 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2591 gimple_call_set_lhs (new_stmt
, new_temp
);
2592 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2595 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2597 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2599 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2602 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2605 /* No current target implements this case. */
2610 /* The call in STMT might prevent it from being removed in dce.
2611 We however cannot remove it here, due to the way the ssa name
2612 it defines is mapped to the new definition. So just replace
2613 rhs of the statement with something harmless. */
2618 type
= TREE_TYPE (scalar_dest
);
2619 if (is_pattern_stmt_p (stmt_info
))
2620 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2622 lhs
= gimple_call_lhs (stmt
);
2624 if (gimple_call_internal_p (stmt
)
2625 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2627 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2628 with vf - 1 rather than 0, that is the last iteration of the
2630 imm_use_iterator iter
;
2631 use_operand_p use_p
;
2633 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2635 basic_block use_bb
= gimple_bb (use_stmt
);
2637 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), use_bb
))
2639 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2640 SET_USE (use_p
, build_int_cst (TREE_TYPE (lhs
),
2641 ncopies
* nunits_out
- 1));
2642 update_stmt (use_stmt
);
2647 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2648 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2649 set_vinfo_for_stmt (stmt
, NULL
);
2650 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2651 gsi_replace (gsi
, new_stmt
, false);
2657 struct simd_call_arg_info
2661 enum vect_def_type dt
;
2662 HOST_WIDE_INT linear_step
;
2664 bool simd_lane_linear
;
2667 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2668 is linear within simd lane (but not within whole loop), note it in
2672 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2673 struct simd_call_arg_info
*arginfo
)
2675 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2677 if (!is_gimple_assign (def_stmt
)
2678 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2679 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2682 tree base
= gimple_assign_rhs1 (def_stmt
);
2683 HOST_WIDE_INT linear_step
= 0;
2684 tree v
= gimple_assign_rhs2 (def_stmt
);
2685 while (TREE_CODE (v
) == SSA_NAME
)
2688 def_stmt
= SSA_NAME_DEF_STMT (v
);
2689 if (is_gimple_assign (def_stmt
))
2690 switch (gimple_assign_rhs_code (def_stmt
))
2693 t
= gimple_assign_rhs2 (def_stmt
);
2694 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2696 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2697 v
= gimple_assign_rhs1 (def_stmt
);
2700 t
= gimple_assign_rhs2 (def_stmt
);
2701 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2703 linear_step
= tree_to_shwi (t
);
2704 v
= gimple_assign_rhs1 (def_stmt
);
2707 t
= gimple_assign_rhs1 (def_stmt
);
2708 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2709 || (TYPE_PRECISION (TREE_TYPE (v
))
2710 < TYPE_PRECISION (TREE_TYPE (t
))))
2719 else if (is_gimple_call (def_stmt
)
2720 && gimple_call_internal_p (def_stmt
)
2721 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2723 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2724 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2729 arginfo
->linear_step
= linear_step
;
2731 arginfo
->simd_lane_linear
= true;
2737 /* Function vectorizable_simd_clone_call.
2739 Check if STMT performs a function call that can be vectorized
2740 by calling a simd clone of the function.
2741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2746 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2747 gimple
**vec_stmt
, slp_tree slp_node
)
2752 tree vec_oprnd0
= NULL_TREE
;
2753 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2755 unsigned int nunits
;
2756 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2757 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2758 vec_info
*vinfo
= stmt_info
->vinfo
;
2759 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2760 tree fndecl
, new_temp
;
2762 gimple
*new_stmt
= NULL
;
2764 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2765 vec
<tree
> vargs
= vNULL
;
2767 tree lhs
, rtype
, ratype
;
2768 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2770 /* Is STMT a vectorizable call? */
2771 if (!is_gimple_call (stmt
))
2774 fndecl
= gimple_call_fndecl (stmt
);
2775 if (fndecl
== NULL_TREE
)
2778 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2779 if (node
== NULL
|| node
->simd_clones
== NULL
)
2782 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2785 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2789 if (gimple_call_lhs (stmt
)
2790 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2793 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2795 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2797 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2801 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2804 /* Process function arguments. */
2805 nargs
= gimple_call_num_args (stmt
);
2807 /* Bail out if the function has zero arguments. */
2811 arginfo
.create (nargs
);
2813 for (i
= 0; i
< nargs
; i
++)
2815 simd_call_arg_info thisarginfo
;
2818 thisarginfo
.linear_step
= 0;
2819 thisarginfo
.align
= 0;
2820 thisarginfo
.op
= NULL_TREE
;
2821 thisarginfo
.simd_lane_linear
= false;
2823 op
= gimple_call_arg (stmt
, i
);
2824 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2825 &thisarginfo
.vectype
)
2826 || thisarginfo
.dt
== vect_uninitialized_def
)
2828 if (dump_enabled_p ())
2829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2830 "use not simple.\n");
2835 if (thisarginfo
.dt
== vect_constant_def
2836 || thisarginfo
.dt
== vect_external_def
)
2837 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2839 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2841 /* For linear arguments, the analyze phase should have saved
2842 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2843 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2844 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2846 gcc_assert (vec_stmt
);
2847 thisarginfo
.linear_step
2848 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2850 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2851 thisarginfo
.simd_lane_linear
2852 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2853 == boolean_true_node
);
2854 /* If loop has been peeled for alignment, we need to adjust it. */
2855 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2856 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2857 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2859 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2860 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2861 tree opt
= TREE_TYPE (thisarginfo
.op
);
2862 bias
= fold_convert (TREE_TYPE (step
), bias
);
2863 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2865 = fold_build2 (POINTER_TYPE_P (opt
)
2866 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2867 thisarginfo
.op
, bias
);
2871 && thisarginfo
.dt
!= vect_constant_def
2872 && thisarginfo
.dt
!= vect_external_def
2874 && TREE_CODE (op
) == SSA_NAME
2875 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2877 && tree_fits_shwi_p (iv
.step
))
2879 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2880 thisarginfo
.op
= iv
.base
;
2882 else if ((thisarginfo
.dt
== vect_constant_def
2883 || thisarginfo
.dt
== vect_external_def
)
2884 && POINTER_TYPE_P (TREE_TYPE (op
)))
2885 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2886 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2888 if (POINTER_TYPE_P (TREE_TYPE (op
))
2889 && !thisarginfo
.linear_step
2891 && thisarginfo
.dt
!= vect_constant_def
2892 && thisarginfo
.dt
!= vect_external_def
2895 && TREE_CODE (op
) == SSA_NAME
)
2896 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2898 arginfo
.quick_push (thisarginfo
);
2901 unsigned int badness
= 0;
2902 struct cgraph_node
*bestn
= NULL
;
2903 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2904 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2906 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2907 n
= n
->simdclone
->next_clone
)
2909 unsigned int this_badness
= 0;
2910 if (n
->simdclone
->simdlen
2911 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2912 || n
->simdclone
->nargs
!= nargs
)
2914 if (n
->simdclone
->simdlen
2915 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2916 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2917 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2918 if (n
->simdclone
->inbranch
)
2919 this_badness
+= 2048;
2920 int target_badness
= targetm
.simd_clone
.usable (n
);
2921 if (target_badness
< 0)
2923 this_badness
+= target_badness
* 512;
2924 /* FORNOW: Have to add code to add the mask argument. */
2925 if (n
->simdclone
->inbranch
)
2927 for (i
= 0; i
< nargs
; i
++)
2929 switch (n
->simdclone
->args
[i
].arg_type
)
2931 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2932 if (!useless_type_conversion_p
2933 (n
->simdclone
->args
[i
].orig_type
,
2934 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2936 else if (arginfo
[i
].dt
== vect_constant_def
2937 || arginfo
[i
].dt
== vect_external_def
2938 || arginfo
[i
].linear_step
)
2941 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2942 if (arginfo
[i
].dt
!= vect_constant_def
2943 && arginfo
[i
].dt
!= vect_external_def
)
2946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2948 if (arginfo
[i
].dt
== vect_constant_def
2949 || arginfo
[i
].dt
== vect_external_def
2950 || (arginfo
[i
].linear_step
2951 != n
->simdclone
->args
[i
].linear_step
))
2954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2963 case SIMD_CLONE_ARG_TYPE_MASK
:
2966 if (i
== (size_t) -1)
2968 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2973 if (arginfo
[i
].align
)
2974 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2975 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2977 if (i
== (size_t) -1)
2979 if (bestn
== NULL
|| this_badness
< badness
)
2982 badness
= this_badness
;
2992 for (i
= 0; i
< nargs
; i
++)
2993 if ((arginfo
[i
].dt
== vect_constant_def
2994 || arginfo
[i
].dt
== vect_external_def
)
2995 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2998 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3000 if (arginfo
[i
].vectype
== NULL
3001 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3002 > bestn
->simdclone
->simdlen
))
3009 fndecl
= bestn
->decl
;
3010 nunits
= bestn
->simdclone
->simdlen
;
3011 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3013 /* If the function isn't const, only allow it in simd loops where user
3014 has asserted that at least nunits consecutive iterations can be
3015 performed using SIMD instructions. */
3016 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3017 && gimple_vuse (stmt
))
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies
>= 1);
3027 if (!vec_stmt
) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3030 for (i
= 0; i
< nargs
; i
++)
3031 if ((bestn
->simdclone
->args
[i
].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3033 || (bestn
->simdclone
->args
[i
].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3039 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3040 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3041 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3043 tree sll
= arginfo
[i
].simd_lane_linear
3044 ? boolean_true_node
: boolean_false_node
;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3047 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3062 scalar_dest
= gimple_call_lhs (stmt
);
3063 vec_dest
= NULL_TREE
;
3068 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3069 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3070 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3073 rtype
= TREE_TYPE (ratype
);
3077 prev_stmt_info
= NULL
;
3078 for (j
= 0; j
< ncopies
; ++j
)
3080 /* Build argument list for the vectorized call. */
3082 vargs
.create (nargs
);
3086 for (i
= 0; i
< nargs
; i
++)
3088 unsigned int k
, l
, m
, o
;
3090 op
= gimple_call_arg (stmt
, i
);
3091 switch (bestn
->simdclone
->args
[i
].arg_type
)
3093 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3094 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3095 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3096 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3098 if (TYPE_VECTOR_SUBPARTS (atype
)
3099 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3101 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3102 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3103 / TYPE_VECTOR_SUBPARTS (atype
));
3104 gcc_assert ((k
& (k
- 1)) == 0);
3107 = vect_get_vec_def_for_operand (op
, stmt
);
3110 vec_oprnd0
= arginfo
[i
].op
;
3111 if ((m
& (k
- 1)) == 0)
3113 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3116 arginfo
[i
].op
= vec_oprnd0
;
3118 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3120 bitsize_int ((m
& (k
- 1)) * prec
));
3122 = gimple_build_assign (make_ssa_name (atype
),
3124 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3125 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3129 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3130 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3131 gcc_assert ((k
& (k
- 1)) == 0);
3132 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3134 vec_alloc (ctor_elts
, k
);
3137 for (l
= 0; l
< k
; l
++)
3139 if (m
== 0 && l
== 0)
3141 = vect_get_vec_def_for_operand (op
, stmt
);
3144 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3146 arginfo
[i
].op
= vec_oprnd0
;
3149 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3153 vargs
.safe_push (vec_oprnd0
);
3156 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3158 = gimple_build_assign (make_ssa_name (atype
),
3160 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3161 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3166 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3167 vargs
.safe_push (op
);
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3170 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3175 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3180 edge pe
= loop_preheader_edge (loop
);
3181 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3182 gcc_assert (!new_bb
);
3184 if (arginfo
[i
].simd_lane_linear
)
3186 vargs
.safe_push (arginfo
[i
].op
);
3189 tree phi_res
= copy_ssa_name (op
);
3190 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3191 set_vinfo_for_stmt (new_phi
,
3192 new_stmt_vec_info (new_phi
, loop_vinfo
));
3193 add_phi_arg (new_phi
, arginfo
[i
].op
,
3194 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3196 = POINTER_TYPE_P (TREE_TYPE (op
))
3197 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3198 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3199 ? sizetype
: TREE_TYPE (op
);
3201 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3203 tree tcst
= wide_int_to_tree (type
, cst
);
3204 tree phi_arg
= copy_ssa_name (op
);
3206 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3207 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3208 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3209 set_vinfo_for_stmt (new_stmt
,
3210 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3211 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3213 arginfo
[i
].op
= phi_res
;
3214 vargs
.safe_push (phi_res
);
3219 = POINTER_TYPE_P (TREE_TYPE (op
))
3220 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3221 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3222 ? sizetype
: TREE_TYPE (op
);
3224 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3226 tree tcst
= wide_int_to_tree (type
, cst
);
3227 new_temp
= make_ssa_name (TREE_TYPE (op
));
3228 new_stmt
= gimple_build_assign (new_temp
, code
,
3229 arginfo
[i
].op
, tcst
);
3230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3231 vargs
.safe_push (new_temp
);
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3239 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3245 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3248 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3250 new_temp
= create_tmp_var (ratype
);
3251 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3252 == TYPE_VECTOR_SUBPARTS (rtype
))
3253 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3255 new_temp
= make_ssa_name (rtype
, new_stmt
);
3256 gimple_call_set_lhs (new_stmt
, new_temp
);
3258 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3262 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3265 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3266 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3267 gcc_assert ((k
& (k
- 1)) == 0);
3268 for (l
= 0; l
< k
; l
++)
3273 t
= build_fold_addr_expr (new_temp
);
3274 t
= build2 (MEM_REF
, vectype
, t
,
3275 build_int_cst (TREE_TYPE (t
),
3276 l
* prec
/ BITS_PER_UNIT
));
3279 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3280 size_int (prec
), bitsize_int (l
* prec
));
3282 = gimple_build_assign (make_ssa_name (vectype
), t
);
3283 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3284 if (j
== 0 && l
== 0)
3285 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3287 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3289 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3294 tree clobber
= build_constructor (ratype
, NULL
);
3295 TREE_THIS_VOLATILE (clobber
) = 1;
3296 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3297 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3301 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3303 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3304 / TYPE_VECTOR_SUBPARTS (rtype
));
3305 gcc_assert ((k
& (k
- 1)) == 0);
3306 if ((j
& (k
- 1)) == 0)
3307 vec_alloc (ret_ctor_elts
, k
);
3310 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3311 for (m
= 0; m
< o
; m
++)
3313 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3314 size_int (m
), NULL_TREE
, NULL_TREE
);
3316 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3317 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3319 gimple_assign_lhs (new_stmt
));
3321 tree clobber
= build_constructor (ratype
, NULL
);
3322 TREE_THIS_VOLATILE (clobber
) = 1;
3323 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3324 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3327 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3328 if ((j
& (k
- 1)) != k
- 1)
3330 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3332 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3333 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3335 if ((unsigned) j
== k
- 1)
3336 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3338 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3340 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3345 tree t
= build_fold_addr_expr (new_temp
);
3346 t
= build2 (MEM_REF
, vectype
, t
,
3347 build_int_cst (TREE_TYPE (t
), 0));
3349 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3350 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3351 tree clobber
= build_constructor (ratype
, NULL
);
3352 TREE_THIS_VOLATILE (clobber
) = 1;
3353 vect_finish_stmt_generation (stmt
,
3354 gimple_build_assign (new_temp
,
3360 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3362 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3364 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3369 /* The call in STMT might prevent it from being removed in dce.
3370 We however cannot remove it here, due to the way the ssa name
3371 it defines is mapped to the new definition. So just replace
3372 rhs of the statement with something harmless. */
3379 type
= TREE_TYPE (scalar_dest
);
3380 if (is_pattern_stmt_p (stmt_info
))
3381 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3383 lhs
= gimple_call_lhs (stmt
);
3384 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3387 new_stmt
= gimple_build_nop ();
3388 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3389 set_vinfo_for_stmt (stmt
, NULL
);
3390 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3391 gsi_replace (gsi
, new_stmt
, true);
3392 unlink_stmt_vdef (stmt
);
3398 /* Function vect_gen_widened_results_half
3400 Create a vector stmt whose code, type, number of arguments, and result
3401 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3402 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3403 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3404 needs to be created (DECL is a function-decl of a target-builtin).
3405 STMT is the original scalar stmt that we are vectorizing. */
3408 vect_gen_widened_results_half (enum tree_code code
,
3410 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3411 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3417 /* Generate half of the widened result: */
3418 if (code
== CALL_EXPR
)
3420 /* Target specific support */
3421 if (op_type
== binary_op
)
3422 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3424 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3425 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3426 gimple_call_set_lhs (new_stmt
, new_temp
);
3430 /* Generic support */
3431 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3432 if (op_type
!= binary_op
)
3434 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3435 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3436 gimple_assign_set_lhs (new_stmt
, new_temp
);
3438 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3444 /* Get vectorized definitions for loop-based vectorization. For the first
3445 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3446 scalar operand), and for the rest we get a copy with
3447 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3448 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3449 The vectors are collected into VEC_OPRNDS. */
3452 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3453 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3457 /* Get first vector operand. */
3458 /* All the vector operands except the very first one (that is scalar oprnd)
3460 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3461 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3463 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3465 vec_oprnds
->quick_push (vec_oprnd
);
3467 /* Get second vector operand. */
3468 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3469 vec_oprnds
->quick_push (vec_oprnd
);
3473 /* For conversion in multiple steps, continue to get operands
3476 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3480 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3481 For multi-step conversions store the resulting vectors and call the function
3485 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3486 int multi_step_cvt
, gimple
*stmt
,
3488 gimple_stmt_iterator
*gsi
,
3489 slp_tree slp_node
, enum tree_code code
,
3490 stmt_vec_info
*prev_stmt_info
)
3493 tree vop0
, vop1
, new_tmp
, vec_dest
;
3495 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3497 vec_dest
= vec_dsts
.pop ();
3499 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3501 /* Create demotion operation. */
3502 vop0
= (*vec_oprnds
)[i
];
3503 vop1
= (*vec_oprnds
)[i
+ 1];
3504 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3505 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3506 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3507 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3510 /* Store the resulting vector for next recursive call. */
3511 (*vec_oprnds
)[i
/2] = new_tmp
;
3514 /* This is the last step of the conversion sequence. Store the
3515 vectors in SLP_NODE or in vector info of the scalar statement
3516 (or in STMT_VINFO_RELATED_STMT chain). */
3518 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3521 if (!*prev_stmt_info
)
3522 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3524 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3526 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3531 /* For multi-step demotion operations we first generate demotion operations
3532 from the source type to the intermediate types, and then combine the
3533 results (stored in VEC_OPRNDS) in demotion operation to the destination
3537 /* At each level of recursion we have half of the operands we had at the
3539 vec_oprnds
->truncate ((i
+1)/2);
3540 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3541 stmt
, vec_dsts
, gsi
, slp_node
,
3542 VEC_PACK_TRUNC_EXPR
,
3546 vec_dsts
.quick_push (vec_dest
);
3550 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3551 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3552 the resulting vectors and call the function recursively. */
3555 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3556 vec
<tree
> *vec_oprnds1
,
3557 gimple
*stmt
, tree vec_dest
,
3558 gimple_stmt_iterator
*gsi
,
3559 enum tree_code code1
,
3560 enum tree_code code2
, tree decl1
,
3561 tree decl2
, int op_type
)
3564 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3565 gimple
*new_stmt1
, *new_stmt2
;
3566 vec
<tree
> vec_tmp
= vNULL
;
3568 vec_tmp
.create (vec_oprnds0
->length () * 2);
3569 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3571 if (op_type
== binary_op
)
3572 vop1
= (*vec_oprnds1
)[i
];
3576 /* Generate the two halves of promotion operation. */
3577 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3578 op_type
, vec_dest
, gsi
, stmt
);
3579 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3580 op_type
, vec_dest
, gsi
, stmt
);
3581 if (is_gimple_call (new_stmt1
))
3583 new_tmp1
= gimple_call_lhs (new_stmt1
);
3584 new_tmp2
= gimple_call_lhs (new_stmt2
);
3588 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3589 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3592 /* Store the results for the next step. */
3593 vec_tmp
.quick_push (new_tmp1
);
3594 vec_tmp
.quick_push (new_tmp2
);
3597 vec_oprnds0
->release ();
3598 *vec_oprnds0
= vec_tmp
;
3602 /* Check if STMT performs a conversion operation, that can be vectorized.
3603 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3605 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3608 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3609 gimple
**vec_stmt
, slp_tree slp_node
)
3613 tree op0
, op1
= NULL_TREE
;
3614 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3616 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3617 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3618 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3619 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3622 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3623 gimple
*new_stmt
= NULL
;
3624 stmt_vec_info prev_stmt_info
;
3627 tree vectype_out
, vectype_in
;
3629 tree lhs_type
, rhs_type
;
3630 enum { NARROW
, NONE
, WIDEN
} modifier
;
3631 vec
<tree
> vec_oprnds0
= vNULL
;
3632 vec
<tree
> vec_oprnds1
= vNULL
;
3634 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3635 vec_info
*vinfo
= stmt_info
->vinfo
;
3636 int multi_step_cvt
= 0;
3637 vec
<tree
> vec_dsts
= vNULL
;
3638 vec
<tree
> interm_types
= vNULL
;
3639 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3641 machine_mode rhs_mode
;
3642 unsigned short fltsz
;
3644 /* Is STMT a vectorizable conversion? */
3646 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3649 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3653 if (!is_gimple_assign (stmt
))
3656 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3659 code
= gimple_assign_rhs_code (stmt
);
3660 if (!CONVERT_EXPR_CODE_P (code
)
3661 && code
!= FIX_TRUNC_EXPR
3662 && code
!= FLOAT_EXPR
3663 && code
!= WIDEN_MULT_EXPR
3664 && code
!= WIDEN_LSHIFT_EXPR
)
3667 op_type
= TREE_CODE_LENGTH (code
);
3669 /* Check types of lhs and rhs. */
3670 scalar_dest
= gimple_assign_lhs (stmt
);
3671 lhs_type
= TREE_TYPE (scalar_dest
);
3672 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3674 op0
= gimple_assign_rhs1 (stmt
);
3675 rhs_type
= TREE_TYPE (op0
);
3677 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3678 && !((INTEGRAL_TYPE_P (lhs_type
)
3679 && INTEGRAL_TYPE_P (rhs_type
))
3680 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3681 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3684 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3685 && ((INTEGRAL_TYPE_P (lhs_type
)
3686 && (TYPE_PRECISION (lhs_type
)
3687 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3688 || (INTEGRAL_TYPE_P (rhs_type
)
3689 && (TYPE_PRECISION (rhs_type
)
3690 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3694 "type conversion to/from bit-precision unsupported."
3699 /* Check the operands of the operation. */
3700 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3702 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3704 "use not simple.\n");
3707 if (op_type
== binary_op
)
3711 op1
= gimple_assign_rhs2 (stmt
);
3712 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3713 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3715 if (CONSTANT_CLASS_P (op0
))
3716 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3718 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3722 if (dump_enabled_p ())
3723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3724 "use not simple.\n");
3729 /* If op0 is an external or constant defs use a vector type of
3730 the same size as the output vector type. */
3732 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3734 gcc_assert (vectype_in
);
3737 if (dump_enabled_p ())
3739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3740 "no vectype for scalar type ");
3741 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3742 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3748 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3749 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3751 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3754 "can't convert between boolean and non "
3756 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3757 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3763 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3764 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3765 if (nunits_in
< nunits_out
)
3767 else if (nunits_out
== nunits_in
)
3772 /* Multiple types in SLP are handled by creating the appropriate number of
3773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3775 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3777 else if (modifier
== NARROW
)
3778 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3780 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3782 /* Sanity check: make sure that at least one copy of the vectorized stmt
3783 needs to be generated. */
3784 gcc_assert (ncopies
>= 1);
3786 /* Supportable by target? */
3790 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3792 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3797 if (dump_enabled_p ())
3798 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3799 "conversion not supported by target.\n");
3803 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3804 &code1
, &code2
, &multi_step_cvt
,
3807 /* Binary widening operation can only be supported directly by the
3809 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3813 if (code
!= FLOAT_EXPR
3814 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3815 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3818 rhs_mode
= TYPE_MODE (rhs_type
);
3819 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3820 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3821 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3822 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3825 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3826 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3827 if (cvt_type
== NULL_TREE
)
3830 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3832 if (!supportable_convert_operation (code
, vectype_out
,
3833 cvt_type
, &decl1
, &codecvt1
))
3836 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3837 cvt_type
, &codecvt1
,
3838 &codecvt2
, &multi_step_cvt
,
3842 gcc_assert (multi_step_cvt
== 0);
3844 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3845 vectype_in
, &code1
, &code2
,
3846 &multi_step_cvt
, &interm_types
))
3850 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3853 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3854 codecvt2
= ERROR_MARK
;
3858 interm_types
.safe_push (cvt_type
);
3859 cvt_type
= NULL_TREE
;
3864 gcc_assert (op_type
== unary_op
);
3865 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3866 &code1
, &multi_step_cvt
,
3870 if (code
!= FIX_TRUNC_EXPR
3871 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3872 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3875 rhs_mode
= TYPE_MODE (rhs_type
);
3877 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3878 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3879 if (cvt_type
== NULL_TREE
)
3881 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3884 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3885 &code1
, &multi_step_cvt
,
3894 if (!vec_stmt
) /* transformation not required. */
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_NOTE
, vect_location
,
3898 "=== vectorizable_conversion ===\n");
3899 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3901 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3902 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3904 else if (modifier
== NARROW
)
3906 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3907 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3911 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3912 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3914 interm_types
.release ();
3919 if (dump_enabled_p ())
3920 dump_printf_loc (MSG_NOTE
, vect_location
,
3921 "transform conversion. ncopies = %d.\n", ncopies
);
3923 if (op_type
== binary_op
)
3925 if (CONSTANT_CLASS_P (op0
))
3926 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3927 else if (CONSTANT_CLASS_P (op1
))
3928 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3931 /* In case of multi-step conversion, we first generate conversion operations
3932 to the intermediate types, and then from that types to the final one.
3933 We create vector destinations for the intermediate type (TYPES) received
3934 from supportable_*_operation, and store them in the correct order
3935 for future use in vect_create_vectorized_*_stmts (). */
3936 vec_dsts
.create (multi_step_cvt
+ 1);
3937 vec_dest
= vect_create_destination_var (scalar_dest
,
3938 (cvt_type
&& modifier
== WIDEN
)
3939 ? cvt_type
: vectype_out
);
3940 vec_dsts
.quick_push (vec_dest
);
3944 for (i
= interm_types
.length () - 1;
3945 interm_types
.iterate (i
, &intermediate_type
); i
--)
3947 vec_dest
= vect_create_destination_var (scalar_dest
,
3949 vec_dsts
.quick_push (vec_dest
);
3954 vec_dest
= vect_create_destination_var (scalar_dest
,
3956 ? vectype_out
: cvt_type
);
3960 if (modifier
== WIDEN
)
3962 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3963 if (op_type
== binary_op
)
3964 vec_oprnds1
.create (1);
3966 else if (modifier
== NARROW
)
3967 vec_oprnds0
.create (
3968 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3970 else if (code
== WIDEN_LSHIFT_EXPR
)
3971 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3974 prev_stmt_info
= NULL
;
3978 for (j
= 0; j
< ncopies
; j
++)
3981 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3984 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3986 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3988 /* Arguments are ready, create the new vector stmt. */
3989 if (code1
== CALL_EXPR
)
3991 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3992 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3993 gimple_call_set_lhs (new_stmt
, new_temp
);
3997 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3998 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3999 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4000 gimple_assign_set_lhs (new_stmt
, new_temp
);
4003 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4005 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4008 if (!prev_stmt_info
)
4009 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4011 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4012 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4019 /* In case the vectorization factor (VF) is bigger than the number
4020 of elements that we can fit in a vectype (nunits), we have to
4021 generate more than one vector stmt - i.e - we need to "unroll"
4022 the vector stmt by a factor VF/nunits. */
4023 for (j
= 0; j
< ncopies
; j
++)
4030 if (code
== WIDEN_LSHIFT_EXPR
)
4035 /* Store vec_oprnd1 for every vector stmt to be created
4036 for SLP_NODE. We check during the analysis that all
4037 the shift arguments are the same. */
4038 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4039 vec_oprnds1
.quick_push (vec_oprnd1
);
4041 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4045 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4046 &vec_oprnds1
, slp_node
, -1);
4050 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4051 vec_oprnds0
.quick_push (vec_oprnd0
);
4052 if (op_type
== binary_op
)
4054 if (code
== WIDEN_LSHIFT_EXPR
)
4057 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4058 vec_oprnds1
.quick_push (vec_oprnd1
);
4064 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4065 vec_oprnds0
.truncate (0);
4066 vec_oprnds0
.quick_push (vec_oprnd0
);
4067 if (op_type
== binary_op
)
4069 if (code
== WIDEN_LSHIFT_EXPR
)
4072 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4074 vec_oprnds1
.truncate (0);
4075 vec_oprnds1
.quick_push (vec_oprnd1
);
4079 /* Arguments are ready. Create the new vector stmts. */
4080 for (i
= multi_step_cvt
; i
>= 0; i
--)
4082 tree this_dest
= vec_dsts
[i
];
4083 enum tree_code c1
= code1
, c2
= code2
;
4084 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4089 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4091 stmt
, this_dest
, gsi
,
4092 c1
, c2
, decl1
, decl2
,
4096 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4100 if (codecvt1
== CALL_EXPR
)
4102 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4103 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4104 gimple_call_set_lhs (new_stmt
, new_temp
);
4108 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4109 new_temp
= make_ssa_name (vec_dest
);
4110 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4114 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4117 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4120 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4123 if (!prev_stmt_info
)
4124 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4126 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4127 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4132 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4136 /* In case the vectorization factor (VF) is bigger than the number
4137 of elements that we can fit in a vectype (nunits), we have to
4138 generate more than one vector stmt - i.e - we need to "unroll"
4139 the vector stmt by a factor VF/nunits. */
4140 for (j
= 0; j
< ncopies
; j
++)
4144 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4148 vec_oprnds0
.truncate (0);
4149 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4150 vect_pow2 (multi_step_cvt
) - 1);
4153 /* Arguments are ready. Create the new vector stmts. */
4155 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4157 if (codecvt1
== CALL_EXPR
)
4159 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4160 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4161 gimple_call_set_lhs (new_stmt
, new_temp
);
4165 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4166 new_temp
= make_ssa_name (vec_dest
);
4167 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4172 vec_oprnds0
[i
] = new_temp
;
4175 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4176 stmt
, vec_dsts
, gsi
,
4181 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4185 vec_oprnds0
.release ();
4186 vec_oprnds1
.release ();
4187 vec_dsts
.release ();
4188 interm_types
.release ();
4194 /* Function vectorizable_assignment.
4196 Check if STMT performs an assignment (copy) that can be vectorized.
4197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4202 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4203 gimple
**vec_stmt
, slp_tree slp_node
)
4208 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4212 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4215 vec
<tree
> vec_oprnds
= vNULL
;
4217 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4218 vec_info
*vinfo
= stmt_info
->vinfo
;
4219 gimple
*new_stmt
= NULL
;
4220 stmt_vec_info prev_stmt_info
= NULL
;
4221 enum tree_code code
;
4224 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4227 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4231 /* Is vectorizable assignment? */
4232 if (!is_gimple_assign (stmt
))
4235 scalar_dest
= gimple_assign_lhs (stmt
);
4236 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4239 code
= gimple_assign_rhs_code (stmt
);
4240 if (gimple_assign_single_p (stmt
)
4241 || code
== PAREN_EXPR
4242 || CONVERT_EXPR_CODE_P (code
))
4243 op
= gimple_assign_rhs1 (stmt
);
4247 if (code
== VIEW_CONVERT_EXPR
)
4248 op
= TREE_OPERAND (op
, 0);
4250 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4251 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4253 /* Multiple types in SLP are handled by creating the appropriate number of
4254 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4256 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4259 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4261 gcc_assert (ncopies
>= 1);
4263 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4267 "use not simple.\n");
4271 /* We can handle NOP_EXPR conversions that do not change the number
4272 of elements or the vector size. */
4273 if ((CONVERT_EXPR_CODE_P (code
)
4274 || code
== VIEW_CONVERT_EXPR
)
4276 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4277 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4278 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4281 /* We do not handle bit-precision changes. */
4282 if ((CONVERT_EXPR_CODE_P (code
)
4283 || code
== VIEW_CONVERT_EXPR
)
4284 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4285 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4286 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4287 || ((TYPE_PRECISION (TREE_TYPE (op
))
4288 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4289 /* But a conversion that does not change the bit-pattern is ok. */
4290 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4291 > TYPE_PRECISION (TREE_TYPE (op
)))
4292 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4293 /* Conversion between boolean types of different sizes is
4294 a simple assignment in case their vectypes are same
4296 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4297 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4299 if (dump_enabled_p ())
4300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4301 "type conversion to/from bit-precision "
4306 if (!vec_stmt
) /* transformation not required. */
4308 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_NOTE
, vect_location
,
4311 "=== vectorizable_assignment ===\n");
4312 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4321 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4324 for (j
= 0; j
< ncopies
; j
++)
4328 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4330 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4332 /* Arguments are ready. create the new vector stmt. */
4333 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4335 if (CONVERT_EXPR_CODE_P (code
)
4336 || code
== VIEW_CONVERT_EXPR
)
4337 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4338 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4339 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4340 gimple_assign_set_lhs (new_stmt
, new_temp
);
4341 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4343 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4350 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4352 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4354 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4357 vec_oprnds
.release ();
4362 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4363 either as shift by a scalar or by a vector. */
4366 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4369 machine_mode vec_mode
;
4374 vectype
= get_vectype_for_scalar_type (scalar_type
);
4378 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4380 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4382 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4384 || (optab_handler (optab
, TYPE_MODE (vectype
))
4385 == CODE_FOR_nothing
))
4389 vec_mode
= TYPE_MODE (vectype
);
4390 icode
= (int) optab_handler (optab
, vec_mode
);
4391 if (icode
== CODE_FOR_nothing
)
4398 /* Function vectorizable_shift.
4400 Check if STMT performs a shift operation that can be vectorized.
4401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4406 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4407 gimple
**vec_stmt
, slp_tree slp_node
)
4411 tree op0
, op1
= NULL
;
4412 tree vec_oprnd1
= NULL_TREE
;
4413 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4415 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4416 enum tree_code code
;
4417 machine_mode vec_mode
;
4421 machine_mode optab_op2_mode
;
4423 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4424 gimple
*new_stmt
= NULL
;
4425 stmt_vec_info prev_stmt_info
;
4432 vec
<tree
> vec_oprnds0
= vNULL
;
4433 vec
<tree
> vec_oprnds1
= vNULL
;
4436 bool scalar_shift_arg
= true;
4437 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4438 vec_info
*vinfo
= stmt_info
->vinfo
;
4441 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4444 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4448 /* Is STMT a vectorizable binary/unary operation? */
4449 if (!is_gimple_assign (stmt
))
4452 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4455 code
= gimple_assign_rhs_code (stmt
);
4457 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4458 || code
== RROTATE_EXPR
))
4461 scalar_dest
= gimple_assign_lhs (stmt
);
4462 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4463 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4464 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4466 if (dump_enabled_p ())
4467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4468 "bit-precision shifts not supported.\n");
4472 op0
= gimple_assign_rhs1 (stmt
);
4473 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4477 "use not simple.\n");
4480 /* If op0 is an external or constant def use a vector type with
4481 the same size as the output vector type. */
4483 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4485 gcc_assert (vectype
);
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4490 "no vectype for scalar type\n");
4494 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4495 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4496 if (nunits_out
!= nunits_in
)
4499 op1
= gimple_assign_rhs2 (stmt
);
4500 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4502 if (dump_enabled_p ())
4503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4504 "use not simple.\n");
4509 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4513 /* Multiple types in SLP are handled by creating the appropriate number of
4514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4516 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4519 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4521 gcc_assert (ncopies
>= 1);
4523 /* Determine whether the shift amount is a vector, or scalar. If the
4524 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4526 if ((dt
[1] == vect_internal_def
4527 || dt
[1] == vect_induction_def
)
4529 scalar_shift_arg
= false;
4530 else if (dt
[1] == vect_constant_def
4531 || dt
[1] == vect_external_def
4532 || dt
[1] == vect_internal_def
)
4534 /* In SLP, need to check whether the shift count is the same,
4535 in loops if it is a constant or invariant, it is always
4539 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4542 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4543 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4544 scalar_shift_arg
= false;
4547 /* If the shift amount is computed by a pattern stmt we cannot
4548 use the scalar amount directly thus give up and use a vector
4550 if (dt
[1] == vect_internal_def
)
4552 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4553 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4554 scalar_shift_arg
= false;
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4561 "operand mode requires invariant argument.\n");
4565 /* Vector shifted by vector. */
4566 if (!scalar_shift_arg
)
4568 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4569 if (dump_enabled_p ())
4570 dump_printf_loc (MSG_NOTE
, vect_location
,
4571 "vector/vector shift/rotate found.\n");
4574 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4575 if (op1_vectype
== NULL_TREE
4576 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4580 "unusable type for last operand in"
4581 " vector/vector shift/rotate.\n");
4585 /* See if the machine has a vector shifted by scalar insn and if not
4586 then see if it has a vector shifted by vector insn. */
4589 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4591 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4593 if (dump_enabled_p ())
4594 dump_printf_loc (MSG_NOTE
, vect_location
,
4595 "vector/scalar shift/rotate found.\n");
4599 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4601 && (optab_handler (optab
, TYPE_MODE (vectype
))
4602 != CODE_FOR_nothing
))
4604 scalar_shift_arg
= false;
4606 if (dump_enabled_p ())
4607 dump_printf_loc (MSG_NOTE
, vect_location
,
4608 "vector/vector shift/rotate found.\n");
4610 /* Unlike the other binary operators, shifts/rotates have
4611 the rhs being int, instead of the same type as the lhs,
4612 so make sure the scalar is the right type if we are
4613 dealing with vectors of long long/long/short/char. */
4614 if (dt
[1] == vect_constant_def
)
4615 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4616 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4620 && TYPE_MODE (TREE_TYPE (vectype
))
4621 != TYPE_MODE (TREE_TYPE (op1
)))
4623 if (dump_enabled_p ())
4624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4625 "unusable type for last operand in"
4626 " vector/vector shift/rotate.\n");
4629 if (vec_stmt
&& !slp_node
)
4631 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4632 op1
= vect_init_vector (stmt
, op1
,
4633 TREE_TYPE (vectype
), NULL
);
4640 /* Supportable by target? */
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4648 vec_mode
= TYPE_MODE (vectype
);
4649 icode
= (int) optab_handler (optab
, vec_mode
);
4650 if (icode
== CODE_FOR_nothing
)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4654 "op not supported by target.\n");
4655 /* Check only during analysis. */
4656 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4657 || (vf
< vect_min_worthwhile_factor (code
)
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_NOTE
, vect_location
,
4662 "proceeding using word mode.\n");
4665 /* Worthwhile without SIMD support? Check only during analysis. */
4666 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4667 && vf
< vect_min_worthwhile_factor (code
)
4670 if (dump_enabled_p ())
4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4672 "not worthwhile without SIMD support.\n");
4676 if (!vec_stmt
) /* transformation not required. */
4678 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE
, vect_location
,
4681 "=== vectorizable_shift ===\n");
4682 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_NOTE
, vect_location
,
4690 "transform binary/unary operation.\n");
4693 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4695 prev_stmt_info
= NULL
;
4696 for (j
= 0; j
< ncopies
; j
++)
4701 if (scalar_shift_arg
)
4703 /* Vector shl and shr insn patterns can be defined with scalar
4704 operand 2 (shift operand). In this case, use constant or loop
4705 invariant op1 directly, without extending it to vector mode
4707 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4708 if (!VECTOR_MODE_P (optab_op2_mode
))
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE
, vect_location
,
4712 "operand 1 using scalar mode.\n");
4714 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4715 vec_oprnds1
.quick_push (vec_oprnd1
);
4718 /* Store vec_oprnd1 for every vector stmt to be created
4719 for SLP_NODE. We check during the analysis that all
4720 the shift arguments are the same.
4721 TODO: Allow different constants for different vector
4722 stmts generated for an SLP instance. */
4723 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4724 vec_oprnds1
.quick_push (vec_oprnd1
);
4729 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4730 (a special case for certain kind of vector shifts); otherwise,
4731 operand 1 should be of a vector type (the usual case). */
4733 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4736 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4740 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4742 /* Arguments are ready. Create the new vector stmt. */
4743 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4745 vop1
= vec_oprnds1
[i
];
4746 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4747 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4748 gimple_assign_set_lhs (new_stmt
, new_temp
);
4749 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4751 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4758 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4760 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4761 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4764 vec_oprnds0
.release ();
4765 vec_oprnds1
.release ();
4771 /* Function vectorizable_operation.
4773 Check if STMT performs a binary, unary or ternary operation that can
4775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4780 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4781 gimple
**vec_stmt
, slp_tree slp_node
)
4785 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4786 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4788 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4789 enum tree_code code
;
4790 machine_mode vec_mode
;
4794 bool target_support_p
;
4796 enum vect_def_type dt
[3]
4797 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4798 gimple
*new_stmt
= NULL
;
4799 stmt_vec_info prev_stmt_info
;
4805 vec
<tree
> vec_oprnds0
= vNULL
;
4806 vec
<tree
> vec_oprnds1
= vNULL
;
4807 vec
<tree
> vec_oprnds2
= vNULL
;
4808 tree vop0
, vop1
, vop2
;
4809 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4810 vec_info
*vinfo
= stmt_info
->vinfo
;
4813 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4816 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4820 /* Is STMT a vectorizable binary/unary operation? */
4821 if (!is_gimple_assign (stmt
))
4824 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4827 code
= gimple_assign_rhs_code (stmt
);
4829 /* For pointer addition, we should use the normal plus for
4830 the vector addition. */
4831 if (code
== POINTER_PLUS_EXPR
)
4834 /* Support only unary or binary operations. */
4835 op_type
= TREE_CODE_LENGTH (code
);
4836 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4838 if (dump_enabled_p ())
4839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4840 "num. args = %d (not unary/binary/ternary op).\n",
4845 scalar_dest
= gimple_assign_lhs (stmt
);
4846 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4848 /* Most operations cannot handle bit-precision types without extra
4850 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4851 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4852 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4853 /* Exception are bitwise binary operations. */
4854 && code
!= BIT_IOR_EXPR
4855 && code
!= BIT_XOR_EXPR
4856 && code
!= BIT_AND_EXPR
)
4858 if (dump_enabled_p ())
4859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4860 "bit-precision arithmetic not supported.\n");
4864 op0
= gimple_assign_rhs1 (stmt
);
4865 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4869 "use not simple.\n");
4872 /* If op0 is an external or constant def use a vector type with
4873 the same size as the output vector type. */
4876 /* For boolean type we cannot determine vectype by
4877 invariant value (don't know whether it is a vector
4878 of booleans or vector of integers). We use output
4879 vectype because operations on boolean don't change
4881 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4883 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4887 "not supported operation on bool value.\n");
4890 vectype
= vectype_out
;
4893 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4896 gcc_assert (vectype
);
4899 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4902 "no vectype for scalar type ");
4903 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4905 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4911 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4912 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4913 if (nunits_out
!= nunits_in
)
4916 if (op_type
== binary_op
|| op_type
== ternary_op
)
4918 op1
= gimple_assign_rhs2 (stmt
);
4919 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4921 if (dump_enabled_p ())
4922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4923 "use not simple.\n");
4927 if (op_type
== ternary_op
)
4929 op2
= gimple_assign_rhs3 (stmt
);
4930 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4934 "use not simple.\n");
4940 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4944 /* Multiple types in SLP are handled by creating the appropriate number of
4945 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4947 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4950 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4952 gcc_assert (ncopies
>= 1);
4954 /* Shifts are handled in vectorizable_shift (). */
4955 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4956 || code
== RROTATE_EXPR
)
4959 /* Supportable by target? */
4961 vec_mode
= TYPE_MODE (vectype
);
4962 if (code
== MULT_HIGHPART_EXPR
)
4963 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4966 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4974 target_support_p
= (optab_handler (optab
, vec_mode
)
4975 != CODE_FOR_nothing
);
4978 if (!target_support_p
)
4980 if (dump_enabled_p ())
4981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4982 "op not supported by target.\n");
4983 /* Check only during analysis. */
4984 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4985 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4987 if (dump_enabled_p ())
4988 dump_printf_loc (MSG_NOTE
, vect_location
,
4989 "proceeding using word mode.\n");
4992 /* Worthwhile without SIMD support? Check only during analysis. */
4993 if (!VECTOR_MODE_P (vec_mode
)
4995 && vf
< vect_min_worthwhile_factor (code
))
4997 if (dump_enabled_p ())
4998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4999 "not worthwhile without SIMD support.\n");
5003 if (!vec_stmt
) /* transformation not required. */
5005 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE
, vect_location
,
5008 "=== vectorizable_operation ===\n");
5009 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_NOTE
, vect_location
,
5017 "transform binary/unary operation.\n");
5020 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5022 /* In case the vectorization factor (VF) is bigger than the number
5023 of elements that we can fit in a vectype (nunits), we have to generate
5024 more than one vector stmt - i.e - we need to "unroll" the
5025 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5026 from one copy of the vector stmt to the next, in the field
5027 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5028 stages to find the correct vector defs to be used when vectorizing
5029 stmts that use the defs of the current stmt. The example below
5030 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5031 we need to create 4 vectorized stmts):
5033 before vectorization:
5034 RELATED_STMT VEC_STMT
5038 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5040 RELATED_STMT VEC_STMT
5041 VS1_0: vx0 = memref0 VS1_1 -
5042 VS1_1: vx1 = memref1 VS1_2 -
5043 VS1_2: vx2 = memref2 VS1_3 -
5044 VS1_3: vx3 = memref3 - -
5045 S1: x = load - VS1_0
5048 step2: vectorize stmt S2 (done here):
5049 To vectorize stmt S2 we first need to find the relevant vector
5050 def for the first operand 'x'. This is, as usual, obtained from
5051 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5052 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5053 relevant vector def 'vx0'. Having found 'vx0' we can generate
5054 the vector stmt VS2_0, and as usual, record it in the
5055 STMT_VINFO_VEC_STMT of stmt S2.
5056 When creating the second copy (VS2_1), we obtain the relevant vector
5057 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5058 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5059 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5060 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5061 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5062 chain of stmts and pointers:
5063 RELATED_STMT VEC_STMT
5064 VS1_0: vx0 = memref0 VS1_1 -
5065 VS1_1: vx1 = memref1 VS1_2 -
5066 VS1_2: vx2 = memref2 VS1_3 -
5067 VS1_3: vx3 = memref3 - -
5068 S1: x = load - VS1_0
5069 VS2_0: vz0 = vx0 + v1 VS2_1 -
5070 VS2_1: vz1 = vx1 + v1 VS2_2 -
5071 VS2_2: vz2 = vx2 + v1 VS2_3 -
5072 VS2_3: vz3 = vx3 + v1 - -
5073 S2: z = x + 1 - VS2_0 */
5075 prev_stmt_info
= NULL
;
5076 for (j
= 0; j
< ncopies
; j
++)
5081 if (op_type
== binary_op
|| op_type
== ternary_op
)
5082 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5085 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5087 if (op_type
== ternary_op
)
5088 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5093 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5094 if (op_type
== ternary_op
)
5096 tree vec_oprnd
= vec_oprnds2
.pop ();
5097 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5102 /* Arguments are ready. Create the new vector stmt. */
5103 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5105 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5106 ? vec_oprnds1
[i
] : NULL_TREE
);
5107 vop2
= ((op_type
== ternary_op
)
5108 ? vec_oprnds2
[i
] : NULL_TREE
);
5109 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5110 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5111 gimple_assign_set_lhs (new_stmt
, new_temp
);
5112 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5114 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5121 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5123 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5124 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5127 vec_oprnds0
.release ();
5128 vec_oprnds1
.release ();
5129 vec_oprnds2
.release ();
5134 /* A helper function to ensure data reference DR's base alignment
5138 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5143 if (DR_VECT_AUX (dr
)->base_misaligned
)
5145 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5146 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5148 if (decl_in_symtab_p (base_decl
))
5149 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5152 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
5153 DECL_USER_ALIGN (base_decl
) = 1;
5155 DR_VECT_AUX (dr
)->base_misaligned
= false;
5160 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5161 reversal of the vector elements. If that is impossible to do,
5165 perm_mask_for_reverse (tree vectype
)
5170 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5171 sel
= XALLOCAVEC (unsigned char, nunits
);
5173 for (i
= 0; i
< nunits
; ++i
)
5174 sel
[i
] = nunits
- 1 - i
;
5176 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5178 return vect_gen_perm_mask_checked (vectype
, sel
);
5181 /* Function get_group_alias_ptr_type.
5183 Return the alias type for the group starting at FIRST_STMT. */
5186 get_group_alias_ptr_type (gimple
*first_stmt
)
5188 struct data_reference
*first_dr
, *next_dr
;
5191 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5192 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5195 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5196 if (get_alias_set (DR_REF (first_dr
))
5197 != get_alias_set (DR_REF (next_dr
)))
5199 if (dump_enabled_p ())
5200 dump_printf_loc (MSG_NOTE
, vect_location
,
5201 "conflicting alias set types.\n");
5202 return ptr_type_node
;
5204 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5206 return reference_alias_ptr_type (DR_REF (first_dr
));
5210 /* Function vectorizable_store.
5212 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5214 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5215 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5216 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5219 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5225 tree vec_oprnd
= NULL_TREE
;
5226 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5227 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5230 struct loop
*loop
= NULL
;
5231 machine_mode vec_mode
;
5233 enum dr_alignment_support alignment_support_scheme
;
5235 enum vect_def_type dt
;
5236 stmt_vec_info prev_stmt_info
= NULL
;
5237 tree dataref_ptr
= NULL_TREE
;
5238 tree dataref_offset
= NULL_TREE
;
5239 gimple
*ptr_incr
= NULL
;
5242 gimple
*next_stmt
, *first_stmt
= NULL
;
5243 bool grouped_store
= false;
5244 bool store_lanes_p
= false;
5245 unsigned int group_size
, i
;
5246 vec
<tree
> dr_chain
= vNULL
;
5247 vec
<tree
> oprnds
= vNULL
;
5248 vec
<tree
> result_chain
= vNULL
;
5250 bool negative
= false;
5251 tree offset
= NULL_TREE
;
5252 vec
<tree
> vec_oprnds
= vNULL
;
5253 bool slp
= (slp_node
!= NULL
);
5254 unsigned int vec_num
;
5255 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5256 vec_info
*vinfo
= stmt_info
->vinfo
;
5258 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5259 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5260 int scatter_scale
= 1;
5261 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5262 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5266 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5269 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5273 /* Is vectorizable store? */
5275 if (!is_gimple_assign (stmt
))
5278 scalar_dest
= gimple_assign_lhs (stmt
);
5279 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5280 && is_pattern_stmt_p (stmt_info
))
5281 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5282 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5283 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5284 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5285 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5286 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5287 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5288 && TREE_CODE (scalar_dest
) != MEM_REF
)
5291 gcc_assert (gimple_assign_single_p (stmt
));
5293 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5294 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5297 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5299 /* Multiple types in SLP are handled by creating the appropriate number of
5300 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5302 if (slp
|| PURE_SLP_STMT (stmt_info
))
5305 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5307 gcc_assert (ncopies
>= 1);
5309 /* FORNOW. This restriction should be relaxed. */
5310 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5312 if (dump_enabled_p ())
5313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5314 "multiple types in nested loop.\n");
5318 op
= gimple_assign_rhs1 (stmt
);
5320 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5324 "use not simple.\n");
5328 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5331 elem_type
= TREE_TYPE (vectype
);
5332 vec_mode
= TYPE_MODE (vectype
);
5334 /* FORNOW. In some cases can vectorize even if data-type not supported
5335 (e.g. - array initialization with 0). */
5336 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5339 if (!STMT_VINFO_DATA_REF (stmt_info
))
5342 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5345 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5346 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5347 size_zero_node
) < 0;
5348 if (negative
&& ncopies
> 1)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5352 "multiple types with negative step.\n");
5357 gcc_assert (!grouped_store
);
5358 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5359 if (alignment_support_scheme
!= dr_aligned
5360 && alignment_support_scheme
!= dr_unaligned_supported
)
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5364 "negative step but alignment required.\n");
5367 if (dt
!= vect_constant_def
5368 && dt
!= vect_external_def
5369 && !perm_mask_for_reverse (vectype
))
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5373 "negative step and reversing not supported.\n");
5379 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5381 grouped_store
= true;
5382 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5383 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5385 && !PURE_SLP_STMT (stmt_info
)
5386 && !STMT_VINFO_STRIDED_P (stmt_info
))
5388 if (vect_store_lanes_supported (vectype
, group_size
))
5389 store_lanes_p
= true;
5390 else if (!vect_grouped_store_supported (vectype
, group_size
))
5394 if (STMT_VINFO_STRIDED_P (stmt_info
)
5395 && (slp
|| PURE_SLP_STMT (stmt_info
))
5396 && (group_size
> nunits
5397 || nunits
% group_size
!= 0))
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5400 "unhandled strided group store\n");
5404 if (first_stmt
== stmt
)
5406 /* STMT is the leader of the group. Check the operands of all the
5407 stmts of the group. */
5408 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5411 gcc_assert (gimple_assign_single_p (next_stmt
));
5412 op
= gimple_assign_rhs1 (next_stmt
);
5413 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5415 if (dump_enabled_p ())
5416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 "use not simple.\n");
5420 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5428 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5429 &scatter_off
, &scatter_scale
);
5430 gcc_assert (scatter_decl
);
5431 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5432 &scatter_off_vectype
))
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5436 "scatter index use not simple.");
5441 if (!vec_stmt
) /* transformation not required. */
5443 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5444 /* The SLP costs are calculated during SLP analysis. */
5445 if (!PURE_SLP_STMT (stmt_info
))
5446 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5453 ensure_base_align (stmt_info
, dr
);
5455 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5457 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5458 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5459 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5460 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5461 edge pe
= loop_preheader_edge (loop
);
5464 enum { NARROW
, NONE
, WIDEN
} modifier
;
5465 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5467 if (nunits
== (unsigned int) scatter_off_nunits
)
5469 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5471 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5474 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5475 sel
[i
] = i
| nunits
;
5477 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5478 gcc_assert (perm_mask
!= NULL_TREE
);
5480 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5482 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5485 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5486 sel
[i
] = i
| scatter_off_nunits
;
5488 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5489 gcc_assert (perm_mask
!= NULL_TREE
);
5495 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5496 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5497 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5498 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5499 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5500 scaletype
= TREE_VALUE (arglist
);
5502 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5503 && TREE_CODE (rettype
) == VOID_TYPE
);
5505 ptr
= fold_convert (ptrtype
, scatter_base
);
5506 if (!is_gimple_min_invariant (ptr
))
5508 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5509 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5510 gcc_assert (!new_bb
);
5513 /* Currently we support only unconditional scatter stores,
5514 so mask should be all ones. */
5515 mask
= build_int_cst (masktype
, -1);
5516 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5518 scale
= build_int_cst (scaletype
, scatter_scale
);
5520 prev_stmt_info
= NULL
;
5521 for (j
= 0; j
< ncopies
; ++j
)
5526 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5528 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5530 else if (modifier
!= NONE
&& (j
& 1))
5532 if (modifier
== WIDEN
)
5535 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5536 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5539 else if (modifier
== NARROW
)
5541 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5544 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5552 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5554 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5557 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5559 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5560 == TYPE_VECTOR_SUBPARTS (srctype
));
5561 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5562 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5563 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5564 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5568 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5570 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5571 == TYPE_VECTOR_SUBPARTS (idxtype
));
5572 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5573 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5574 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5575 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5580 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5582 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5584 if (prev_stmt_info
== NULL
)
5585 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5587 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5588 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5595 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5596 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5598 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5601 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5603 /* We vectorize all the stmts of the interleaving group when we
5604 reach the last stmt in the group. */
5605 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5606 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5615 grouped_store
= false;
5616 /* VEC_NUM is the number of vect stmts to be created for this
5618 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5619 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5620 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5621 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5622 op
= gimple_assign_rhs1 (first_stmt
);
5625 /* VEC_NUM is the number of vect stmts to be created for this
5627 vec_num
= group_size
;
5629 ref_type
= get_group_alias_ptr_type (first_stmt
);
5635 group_size
= vec_num
= 1;
5636 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE
, vect_location
,
5641 "transform store. ncopies = %d\n", ncopies
);
5643 if (STMT_VINFO_STRIDED_P (stmt_info
))
5645 gimple_stmt_iterator incr_gsi
;
5651 gimple_seq stmts
= NULL
;
5652 tree stride_base
, stride_step
, alias_off
;
5656 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5659 = fold_build_pointer_plus
5660 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5661 size_binop (PLUS_EXPR
,
5662 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5663 convert_to_ptrofftype (DR_INIT (first_dr
))));
5664 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5666 /* For a store with loop-invariant (but other than power-of-2)
5667 stride (i.e. not a grouped access) like so:
5669 for (i = 0; i < n; i += stride)
5672 we generate a new induction variable and new stores from
5673 the components of the (vectorized) rhs:
5675 for (j = 0; ; j += VF*stride)
5680 array[j + stride] = tmp2;
5684 unsigned nstores
= nunits
;
5685 tree ltype
= elem_type
;
5688 nstores
= nunits
/ group_size
;
5689 if (group_size
< nunits
)
5690 ltype
= build_vector_type (elem_type
, group_size
);
5693 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5694 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5698 ivstep
= stride_step
;
5699 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5700 build_int_cst (TREE_TYPE (ivstep
),
5701 ncopies
* nstores
));
5703 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5705 create_iv (stride_base
, ivstep
, NULL
,
5706 loop
, &incr_gsi
, insert_after
,
5708 incr
= gsi_stmt (incr_gsi
);
5709 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5711 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5713 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5715 prev_stmt_info
= NULL
;
5716 alias_off
= build_int_cst (ref_type
, 0);
5717 next_stmt
= first_stmt
;
5718 for (g
= 0; g
< group_size
; g
++)
5720 running_off
= offvar
;
5723 tree size
= TYPE_SIZE_UNIT (ltype
);
5724 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5726 tree newoff
= copy_ssa_name (running_off
, NULL
);
5727 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5729 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5730 running_off
= newoff
;
5732 for (j
= 0; j
< ncopies
; j
++)
5734 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5735 and first_stmt == stmt. */
5740 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5742 vec_oprnd
= vec_oprnds
[0];
5746 gcc_assert (gimple_assign_single_p (next_stmt
));
5747 op
= gimple_assign_rhs1 (next_stmt
);
5748 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5754 vec_oprnd
= vec_oprnds
[j
];
5757 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5758 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5762 for (i
= 0; i
< nstores
; i
++)
5764 tree newref
, newoff
;
5765 gimple
*incr
, *assign
;
5766 tree size
= TYPE_SIZE (ltype
);
5767 /* Extract the i'th component. */
5768 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5769 bitsize_int (i
), size
);
5770 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5773 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5777 newref
= build2 (MEM_REF
, ltype
,
5778 running_off
, alias_off
);
5780 /* And store it to *running_off. */
5781 assign
= gimple_build_assign (newref
, elem
);
5782 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5784 newoff
= copy_ssa_name (running_off
, NULL
);
5785 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5786 running_off
, stride_step
);
5787 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5789 running_off
= newoff
;
5790 if (g
== group_size
- 1
5793 if (j
== 0 && i
== 0)
5794 STMT_VINFO_VEC_STMT (stmt_info
)
5795 = *vec_stmt
= assign
;
5797 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5798 prev_stmt_info
= vinfo_for_stmt (assign
);
5802 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5807 dr_chain
.create (group_size
);
5808 oprnds
.create (group_size
);
5810 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5811 gcc_assert (alignment_support_scheme
);
5812 /* Targets with store-lane instructions must not require explicit
5814 gcc_assert (!store_lanes_p
5815 || alignment_support_scheme
== dr_aligned
5816 || alignment_support_scheme
== dr_unaligned_supported
);
5819 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5822 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5824 aggr_type
= vectype
;
5826 /* In case the vectorization factor (VF) is bigger than the number
5827 of elements that we can fit in a vectype (nunits), we have to generate
5828 more than one vector stmt - i.e - we need to "unroll" the
5829 vector stmt by a factor VF/nunits. For more details see documentation in
5830 vect_get_vec_def_for_copy_stmt. */
5832 /* In case of interleaving (non-unit grouped access):
5839 We create vectorized stores starting from base address (the access of the
5840 first stmt in the chain (S2 in the above example), when the last store stmt
5841 of the chain (S4) is reached:
5844 VS2: &base + vec_size*1 = vx0
5845 VS3: &base + vec_size*2 = vx1
5846 VS4: &base + vec_size*3 = vx3
5848 Then permutation statements are generated:
5850 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5851 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5854 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5855 (the order of the data-refs in the output of vect_permute_store_chain
5856 corresponds to the order of scalar stmts in the interleaving chain - see
5857 the documentation of vect_permute_store_chain()).
5859 In case of both multiple types and interleaving, above vector stores and
5860 permutation stmts are created for every copy. The result vector stmts are
5861 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5862 STMT_VINFO_RELATED_STMT for the next copies.
5865 prev_stmt_info
= NULL
;
5866 for (j
= 0; j
< ncopies
; j
++)
5873 /* Get vectorized arguments for SLP_NODE. */
5874 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5875 NULL
, slp_node
, -1);
5877 vec_oprnd
= vec_oprnds
[0];
5881 /* For interleaved stores we collect vectorized defs for all the
5882 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5883 used as an input to vect_permute_store_chain(), and OPRNDS as
5884 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5886 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5887 OPRNDS are of size 1. */
5888 next_stmt
= first_stmt
;
5889 for (i
= 0; i
< group_size
; i
++)
5891 /* Since gaps are not supported for interleaved stores,
5892 GROUP_SIZE is the exact number of stmts in the chain.
5893 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5894 there is no interleaving, GROUP_SIZE is 1, and only one
5895 iteration of the loop will be executed. */
5896 gcc_assert (next_stmt
5897 && gimple_assign_single_p (next_stmt
));
5898 op
= gimple_assign_rhs1 (next_stmt
);
5900 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5901 dr_chain
.quick_push (vec_oprnd
);
5902 oprnds
.quick_push (vec_oprnd
);
5903 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5907 /* We should have catched mismatched types earlier. */
5908 gcc_assert (useless_type_conversion_p (vectype
,
5909 TREE_TYPE (vec_oprnd
)));
5910 bool simd_lane_access_p
5911 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5912 if (simd_lane_access_p
5913 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5914 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5915 && integer_zerop (DR_OFFSET (first_dr
))
5916 && integer_zerop (DR_INIT (first_dr
))
5917 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5918 get_alias_set (TREE_TYPE (ref_type
))))
5920 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5921 dataref_offset
= build_int_cst (ref_type
, 0);
5926 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5927 simd_lane_access_p
? loop
: NULL
,
5928 offset
, &dummy
, gsi
, &ptr_incr
,
5929 simd_lane_access_p
, &inv_p
);
5930 gcc_assert (bb_vinfo
|| !inv_p
);
5934 /* For interleaved stores we created vectorized defs for all the
5935 defs stored in OPRNDS in the previous iteration (previous copy).
5936 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5937 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5939 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5940 OPRNDS are of size 1. */
5941 for (i
= 0; i
< group_size
; i
++)
5944 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5945 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5946 dr_chain
[i
] = vec_oprnd
;
5947 oprnds
[i
] = vec_oprnd
;
5951 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5952 TYPE_SIZE_UNIT (aggr_type
));
5954 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5955 TYPE_SIZE_UNIT (aggr_type
));
5962 /* Combine all the vectors into an array. */
5963 vec_array
= create_vector_array (vectype
, vec_num
);
5964 for (i
= 0; i
< vec_num
; i
++)
5966 vec_oprnd
= dr_chain
[i
];
5967 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5971 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5972 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
5973 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5974 gimple_call_set_lhs (new_stmt
, data_ref
);
5975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5983 result_chain
.create (group_size
);
5985 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5989 next_stmt
= first_stmt
;
5990 for (i
= 0; i
< vec_num
; i
++)
5992 unsigned align
, misalign
;
5995 /* Bump the vector pointer. */
5996 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6000 vec_oprnd
= vec_oprnds
[i
];
6001 else if (grouped_store
)
6002 /* For grouped stores vectorized defs are interleaved in
6003 vect_permute_store_chain(). */
6004 vec_oprnd
= result_chain
[i
];
6006 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
6010 : build_int_cst (ref_type
, 0));
6011 align
= TYPE_ALIGN_UNIT (vectype
);
6012 if (aligned_access_p (first_dr
))
6014 else if (DR_MISALIGNMENT (first_dr
) == -1)
6016 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6017 align
= TYPE_ALIGN_UNIT (elem_type
);
6019 align
= get_object_alignment (DR_REF (first_dr
))
6022 TREE_TYPE (data_ref
)
6023 = build_aligned_type (TREE_TYPE (data_ref
),
6024 align
* BITS_PER_UNIT
);
6028 TREE_TYPE (data_ref
)
6029 = build_aligned_type (TREE_TYPE (data_ref
),
6030 TYPE_ALIGN (elem_type
));
6031 misalign
= DR_MISALIGNMENT (first_dr
);
6033 if (dataref_offset
== NULL_TREE
6034 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6035 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6039 && dt
!= vect_constant_def
6040 && dt
!= vect_external_def
)
6042 tree perm_mask
= perm_mask_for_reverse (vectype
);
6044 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6046 tree new_temp
= make_ssa_name (perm_dest
);
6048 /* Generate the permute statement. */
6050 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6051 vec_oprnd
, perm_mask
);
6052 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6054 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6055 vec_oprnd
= new_temp
;
6058 /* Arguments are ready. Create the new vector stmt. */
6059 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6060 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6065 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6073 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6075 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6076 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6080 dr_chain
.release ();
6082 result_chain
.release ();
6083 vec_oprnds
.release ();
6088 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6089 VECTOR_CST mask. No checks are made that the target platform supports the
6090 mask, so callers may wish to test can_vec_perm_p separately, or use
6091 vect_gen_perm_mask_checked. */
6094 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6096 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6099 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6101 mask_elt_type
= lang_hooks
.types
.type_for_mode
6102 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6103 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6105 mask_elts
= XALLOCAVEC (tree
, nunits
);
6106 for (i
= nunits
- 1; i
>= 0; i
--)
6107 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6108 mask_vec
= build_vector (mask_type
, mask_elts
);
6113 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6114 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6117 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6119 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6120 return vect_gen_perm_mask_any (vectype
, sel
);
6123 /* Given a vector variable X and Y, that was generated for the scalar
6124 STMT, generate instructions to permute the vector elements of X and Y
6125 using permutation mask MASK_VEC, insert them at *GSI and return the
6126 permuted vector variable. */
6129 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6130 gimple_stmt_iterator
*gsi
)
6132 tree vectype
= TREE_TYPE (x
);
6133 tree perm_dest
, data_ref
;
6136 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6137 data_ref
= make_ssa_name (perm_dest
);
6139 /* Generate the permute statement. */
6140 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6141 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6146 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6147 inserting them on the loops preheader edge. Returns true if we
6148 were successful in doing so (and thus STMT can be moved then),
6149 otherwise returns false. */
6152 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6158 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6160 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6161 if (!gimple_nop_p (def_stmt
)
6162 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6164 /* Make sure we don't need to recurse. While we could do
6165 so in simple cases when there are more complex use webs
6166 we don't have an easy way to preserve stmt order to fulfil
6167 dependencies within them. */
6170 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6172 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6174 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6175 if (!gimple_nop_p (def_stmt2
)
6176 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6186 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6188 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6189 if (!gimple_nop_p (def_stmt
)
6190 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6192 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6193 gsi_remove (&gsi
, false);
6194 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6201 /* vectorizable_load.
6203 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6210 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6211 slp_tree slp_node
, slp_instance slp_node_instance
)
6214 tree vec_dest
= NULL
;
6215 tree data_ref
= NULL
;
6216 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6217 stmt_vec_info prev_stmt_info
;
6218 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6219 struct loop
*loop
= NULL
;
6220 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6221 bool nested_in_vect_loop
= false;
6222 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6226 gimple
*new_stmt
= NULL
;
6228 enum dr_alignment_support alignment_support_scheme
;
6229 tree dataref_ptr
= NULL_TREE
;
6230 tree dataref_offset
= NULL_TREE
;
6231 gimple
*ptr_incr
= NULL
;
6233 int i
, j
, group_size
, group_gap_adj
;
6234 tree msq
= NULL_TREE
, lsq
;
6235 tree offset
= NULL_TREE
;
6236 tree byte_offset
= NULL_TREE
;
6237 tree realignment_token
= NULL_TREE
;
6239 vec
<tree
> dr_chain
= vNULL
;
6240 bool grouped_load
= false;
6241 bool load_lanes_p
= false;
6243 gimple
*first_stmt_for_drptr
= NULL
;
6245 bool negative
= false;
6246 bool compute_in_loop
= false;
6247 struct loop
*at_loop
;
6249 bool slp
= (slp_node
!= NULL
);
6250 bool slp_perm
= false;
6251 enum tree_code code
;
6252 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6255 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6256 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6257 int gather_scale
= 1;
6258 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6259 vec_info
*vinfo
= stmt_info
->vinfo
;
6262 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6265 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6269 /* Is vectorizable load? */
6270 if (!is_gimple_assign (stmt
))
6273 scalar_dest
= gimple_assign_lhs (stmt
);
6274 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6277 code
= gimple_assign_rhs_code (stmt
);
6278 if (code
!= ARRAY_REF
6279 && code
!= BIT_FIELD_REF
6280 && code
!= INDIRECT_REF
6281 && code
!= COMPONENT_REF
6282 && code
!= IMAGPART_EXPR
6283 && code
!= REALPART_EXPR
6285 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6288 if (!STMT_VINFO_DATA_REF (stmt_info
))
6291 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6292 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6296 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6297 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6298 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6303 /* Multiple types in SLP are handled by creating the appropriate number of
6304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6306 if (slp
|| PURE_SLP_STMT (stmt_info
))
6309 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6311 gcc_assert (ncopies
>= 1);
6313 /* FORNOW. This restriction should be relaxed. */
6314 if (nested_in_vect_loop
&& ncopies
> 1)
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6318 "multiple types in nested loop.\n");
6322 /* Invalidate assumptions made by dependence analysis when vectorization
6323 on the unrolled body effectively re-orders stmts. */
6325 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6326 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6327 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6331 "cannot perform implicit CSE when unrolling "
6332 "with negative dependence distance\n");
6336 elem_type
= TREE_TYPE (vectype
);
6337 mode
= TYPE_MODE (vectype
);
6339 /* FORNOW. In some cases can vectorize even if data-type not supported
6340 (e.g. - data copies). */
6341 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6345 "Aligned load, but unsupported type.\n");
6349 /* Check if the load is a part of an interleaving chain. */
6350 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6352 grouped_load
= true;
6354 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6356 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6358 /* If this is single-element interleaving with an element distance
6359 that leaves unused vector loads around punt - we at least create
6360 very sub-optimal code in that case (and blow up memory,
6362 bool force_peeling
= false;
6363 if (first_stmt
== stmt
6364 && !GROUP_NEXT_ELEMENT (stmt_info
))
6366 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6368 if (dump_enabled_p ())
6369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6370 "single-element interleaving not supported "
6371 "for not adjacent vector loads\n");
6375 /* Single-element interleaving requires peeling for gaps. */
6376 force_peeling
= true;
6379 /* If there is a gap in the end of the group or the group size cannot
6380 be made a multiple of the vector element count then we access excess
6381 elements in the last iteration and thus need to peel that off. */
6383 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6385 || GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6386 || (!slp
&& vf
% GROUP_SIZE (vinfo_for_stmt (first_stmt
)) != 0)))
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6390 "Data access with gaps requires scalar "
6394 if (dump_enabled_p ())
6395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6396 "Peeling for outer loop is not supported\n");
6400 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6403 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6406 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6408 /* ??? The following is overly pessimistic (as well as the loop
6409 case above) in the case we can statically determine the excess
6410 elements loaded are within the bounds of a decl that is accessed.
6411 Likewise for BB vectorizations using masked loads is a possibility. */
6412 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6415 "BB vectorization with gaps at the end of a load "
6416 "is not supported\n");
6421 && !PURE_SLP_STMT (stmt_info
)
6422 && !STMT_VINFO_STRIDED_P (stmt_info
))
6424 if (vect_load_lanes_supported (vectype
, group_size
))
6425 load_lanes_p
= true;
6426 else if (!vect_grouped_load_supported (vectype
, group_size
))
6430 /* Invalidate assumptions made by dependence analysis when vectorization
6431 on the unrolled body effectively re-orders stmts. */
6432 if (!PURE_SLP_STMT (stmt_info
)
6433 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6434 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6435 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6437 if (dump_enabled_p ())
6438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6439 "cannot perform implicit CSE when performing "
6440 "group loads with negative dependence distance\n");
6444 /* Similarly when the stmt is a load that is both part of a SLP
6445 instance and a loop vectorized stmt via the same-dr mechanism
6446 we have to give up. */
6447 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6448 && (STMT_SLP_TYPE (stmt_info
)
6449 != STMT_SLP_TYPE (vinfo_for_stmt
6450 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6452 if (dump_enabled_p ())
6453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6454 "conflicting SLP types for CSEd load\n");
6460 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6463 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6464 &gather_off
, &gather_scale
);
6465 gcc_assert (gather_decl
);
6466 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6467 &gather_off_vectype
))
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6471 "gather index use not simple.\n");
6475 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6478 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6479 && (group_size
> nunits
6480 || nunits
% group_size
!= 0))
6482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6483 "unhandled strided group load\n");
6489 negative
= tree_int_cst_compare (nested_in_vect_loop
6490 ? STMT_VINFO_DR_STEP (stmt_info
)
6492 size_zero_node
) < 0;
6493 if (negative
&& ncopies
> 1)
6495 if (dump_enabled_p ())
6496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6497 "multiple types with negative step.\n");
6505 if (dump_enabled_p ())
6506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6507 "negative step for group load not supported"
6511 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6512 if (alignment_support_scheme
!= dr_aligned
6513 && alignment_support_scheme
!= dr_unaligned_supported
)
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6517 "negative step but alignment required.\n");
6520 if (!perm_mask_for_reverse (vectype
))
6522 if (dump_enabled_p ())
6523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6524 "negative step and reversing not supported."
6531 if (!vec_stmt
) /* transformation not required. */
6533 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6534 /* The SLP costs are calculated during SLP analysis. */
6535 if (!PURE_SLP_STMT (stmt_info
))
6536 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6541 if (dump_enabled_p ())
6542 dump_printf_loc (MSG_NOTE
, vect_location
,
6543 "transform load. ncopies = %d\n", ncopies
);
6547 ensure_base_align (stmt_info
, dr
);
6549 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6551 tree vec_oprnd0
= NULL_TREE
, op
;
6552 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6553 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6554 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6555 edge pe
= loop_preheader_edge (loop
);
6558 enum { NARROW
, NONE
, WIDEN
} modifier
;
6559 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6561 if (nunits
== gather_off_nunits
)
6563 else if (nunits
== gather_off_nunits
/ 2)
6565 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6568 for (i
= 0; i
< gather_off_nunits
; ++i
)
6569 sel
[i
] = i
| nunits
;
6571 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6573 else if (nunits
== gather_off_nunits
* 2)
6575 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6578 for (i
= 0; i
< nunits
; ++i
)
6579 sel
[i
] = i
< gather_off_nunits
6580 ? i
: i
+ nunits
- gather_off_nunits
;
6582 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6588 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6589 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6590 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6591 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6592 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6593 scaletype
= TREE_VALUE (arglist
);
6594 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6596 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6598 ptr
= fold_convert (ptrtype
, gather_base
);
6599 if (!is_gimple_min_invariant (ptr
))
6601 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6602 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6603 gcc_assert (!new_bb
);
6606 /* Currently we support only unconditional gather loads,
6607 so mask should be all ones. */
6608 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6609 mask
= build_int_cst (masktype
, -1);
6610 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6612 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6613 mask
= build_vector_from_val (masktype
, mask
);
6614 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6616 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6620 for (j
= 0; j
< 6; ++j
)
6622 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6623 mask
= build_real (TREE_TYPE (masktype
), r
);
6624 mask
= build_vector_from_val (masktype
, mask
);
6625 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6630 scale
= build_int_cst (scaletype
, gather_scale
);
6632 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6633 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6638 for (j
= 0; j
< 6; ++j
)
6640 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6641 merge
= build_real (TREE_TYPE (rettype
), r
);
6645 merge
= build_vector_from_val (rettype
, merge
);
6646 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6648 prev_stmt_info
= NULL
;
6649 for (j
= 0; j
< ncopies
; ++j
)
6651 if (modifier
== WIDEN
&& (j
& 1))
6652 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6653 perm_mask
, stmt
, gsi
);
6656 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6659 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6661 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6663 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6664 == TYPE_VECTOR_SUBPARTS (idxtype
));
6665 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6666 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6668 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6669 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6674 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6676 if (!useless_type_conversion_p (vectype
, rettype
))
6678 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6679 == TYPE_VECTOR_SUBPARTS (rettype
));
6680 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6681 gimple_call_set_lhs (new_stmt
, op
);
6682 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6683 var
= make_ssa_name (vec_dest
);
6684 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6686 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6690 var
= make_ssa_name (vec_dest
, new_stmt
);
6691 gimple_call_set_lhs (new_stmt
, var
);
6694 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6696 if (modifier
== NARROW
)
6703 var
= permute_vec_elements (prev_res
, var
,
6704 perm_mask
, stmt
, gsi
);
6705 new_stmt
= SSA_NAME_DEF_STMT (var
);
6708 if (prev_stmt_info
== NULL
)
6709 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6711 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6712 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6716 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6718 gimple_stmt_iterator incr_gsi
;
6724 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6725 gimple_seq stmts
= NULL
;
6726 tree stride_base
, stride_step
, alias_off
;
6728 gcc_assert (!nested_in_vect_loop
);
6730 if (slp
&& grouped_load
)
6732 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6733 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6734 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6735 ref_type
= get_group_alias_ptr_type (first_stmt
);
6742 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6746 = fold_build_pointer_plus
6747 (DR_BASE_ADDRESS (first_dr
),
6748 size_binop (PLUS_EXPR
,
6749 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6750 convert_to_ptrofftype (DR_INIT (first_dr
))));
6751 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6753 /* For a load with loop-invariant (but other than power-of-2)
6754 stride (i.e. not a grouped access) like so:
6756 for (i = 0; i < n; i += stride)
6759 we generate a new induction variable and new accesses to
6760 form a new vector (or vectors, depending on ncopies):
6762 for (j = 0; ; j += VF*stride)
6764 tmp2 = array[j + stride];
6766 vectemp = {tmp1, tmp2, ...}
6769 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6770 build_int_cst (TREE_TYPE (stride_step
), vf
));
6772 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6774 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6775 loop
, &incr_gsi
, insert_after
,
6777 incr
= gsi_stmt (incr_gsi
);
6778 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6780 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6781 &stmts
, true, NULL_TREE
);
6783 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6785 prev_stmt_info
= NULL
;
6786 running_off
= offvar
;
6787 alias_off
= build_int_cst (ref_type
, 0);
6788 int nloads
= nunits
;
6789 tree ltype
= TREE_TYPE (vectype
);
6790 auto_vec
<tree
> dr_chain
;
6793 nloads
= nunits
/ group_size
;
6794 if (group_size
< nunits
)
6795 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6798 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6799 /* For SLP permutation support we need to load the whole group,
6800 not only the number of vector stmts the permutation result
6804 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6805 dr_chain
.create (ncopies
);
6808 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6810 for (j
= 0; j
< ncopies
; j
++)
6816 vec_alloc (v
, nloads
);
6817 for (i
= 0; i
< nloads
; i
++)
6819 tree newref
, newoff
;
6821 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6823 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6826 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6827 newoff
= copy_ssa_name (running_off
);
6828 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6829 running_off
, stride_step
);
6830 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6832 running_off
= newoff
;
6835 vec_inv
= build_constructor (vectype
, v
);
6836 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6837 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6841 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6842 build2 (MEM_REF
, ltype
,
6843 running_off
, alias_off
));
6844 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6846 tree newoff
= copy_ssa_name (running_off
);
6847 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6848 running_off
, stride_step
);
6849 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6851 running_off
= newoff
;
6857 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6859 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6864 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6866 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6867 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6871 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6872 slp_node_instance
, false);
6878 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6879 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6880 /* For SLP vectorization we directly vectorize a subchain
6881 without permutation. */
6882 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6883 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6884 /* For BB vectorization always use the first stmt to base
6885 the data ref pointer on. */
6887 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6889 /* Check if the chain of loads is already vectorized. */
6890 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6891 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6892 ??? But we can only do so if there is exactly one
6893 as we have no way to get at the rest. Leave the CSE
6895 ??? With the group load eventually participating
6896 in multiple different permutations (having multiple
6897 slp nodes which refer to the same group) the CSE
6898 is even wrong code. See PR56270. */
6901 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6904 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6907 /* VEC_NUM is the number of vect stmts to be created for this group. */
6910 grouped_load
= false;
6911 /* For SLP permutation support we need to load the whole group,
6912 not only the number of vector stmts the permutation result
6915 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6917 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6918 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6921 vec_num
= group_size
;
6923 ref_type
= get_group_alias_ptr_type (first_stmt
);
6929 group_size
= vec_num
= 1;
6931 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6934 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6935 gcc_assert (alignment_support_scheme
);
6936 /* Targets with load-lane instructions must not require explicit
6938 gcc_assert (!load_lanes_p
6939 || alignment_support_scheme
== dr_aligned
6940 || alignment_support_scheme
== dr_unaligned_supported
);
6942 /* In case the vectorization factor (VF) is bigger than the number
6943 of elements that we can fit in a vectype (nunits), we have to generate
6944 more than one vector stmt - i.e - we need to "unroll" the
6945 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6946 from one copy of the vector stmt to the next, in the field
6947 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6948 stages to find the correct vector defs to be used when vectorizing
6949 stmts that use the defs of the current stmt. The example below
6950 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6951 need to create 4 vectorized stmts):
6953 before vectorization:
6954 RELATED_STMT VEC_STMT
6958 step 1: vectorize stmt S1:
6959 We first create the vector stmt VS1_0, and, as usual, record a
6960 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6961 Next, we create the vector stmt VS1_1, and record a pointer to
6962 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6963 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6965 RELATED_STMT VEC_STMT
6966 VS1_0: vx0 = memref0 VS1_1 -
6967 VS1_1: vx1 = memref1 VS1_2 -
6968 VS1_2: vx2 = memref2 VS1_3 -
6969 VS1_3: vx3 = memref3 - -
6970 S1: x = load - VS1_0
6973 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6974 information we recorded in RELATED_STMT field is used to vectorize
6977 /* In case of interleaving (non-unit grouped access):
6984 Vectorized loads are created in the order of memory accesses
6985 starting from the access of the first stmt of the chain:
6988 VS2: vx1 = &base + vec_size*1
6989 VS3: vx3 = &base + vec_size*2
6990 VS4: vx4 = &base + vec_size*3
6992 Then permutation statements are generated:
6994 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6995 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6998 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6999 (the order of the data-refs in the output of vect_permute_load_chain
7000 corresponds to the order of scalar stmts in the interleaving chain - see
7001 the documentation of vect_permute_load_chain()).
7002 The generation of permutation stmts and recording them in
7003 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7005 In case of both multiple types and interleaving, the vector loads and
7006 permutation stmts above are created for every copy. The result vector
7007 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7008 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7010 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7011 on a target that supports unaligned accesses (dr_unaligned_supported)
7012 we generate the following code:
7016 p = p + indx * vectype_size;
7021 Otherwise, the data reference is potentially unaligned on a target that
7022 does not support unaligned accesses (dr_explicit_realign_optimized) -
7023 then generate the following code, in which the data in each iteration is
7024 obtained by two vector loads, one from the previous iteration, and one
7025 from the current iteration:
7027 msq_init = *(floor(p1))
7028 p2 = initial_addr + VS - 1;
7029 realignment_token = call target_builtin;
7032 p2 = p2 + indx * vectype_size
7034 vec_dest = realign_load (msq, lsq, realignment_token)
7039 /* If the misalignment remains the same throughout the execution of the
7040 loop, we can create the init_addr and permutation mask at the loop
7041 preheader. Otherwise, it needs to be created inside the loop.
7042 This can only occur when vectorizing memory accesses in the inner-loop
7043 nested within an outer-loop that is being vectorized. */
7045 if (nested_in_vect_loop
7046 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7047 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7049 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7050 compute_in_loop
= true;
7053 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7054 || alignment_support_scheme
== dr_explicit_realign
)
7055 && !compute_in_loop
)
7057 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7058 alignment_support_scheme
, NULL_TREE
,
7060 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7062 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7063 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7071 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7074 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7076 aggr_type
= vectype
;
7078 prev_stmt_info
= NULL
;
7079 for (j
= 0; j
< ncopies
; j
++)
7081 /* 1. Create the vector or array pointer update chain. */
7084 bool simd_lane_access_p
7085 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7086 if (simd_lane_access_p
7087 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7088 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7089 && integer_zerop (DR_OFFSET (first_dr
))
7090 && integer_zerop (DR_INIT (first_dr
))
7091 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7092 get_alias_set (TREE_TYPE (ref_type
)))
7093 && (alignment_support_scheme
== dr_aligned
7094 || alignment_support_scheme
== dr_unaligned_supported
))
7096 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7097 dataref_offset
= build_int_cst (ref_type
, 0);
7100 else if (first_stmt_for_drptr
7101 && first_stmt
!= first_stmt_for_drptr
)
7104 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7105 at_loop
, offset
, &dummy
, gsi
,
7106 &ptr_incr
, simd_lane_access_p
,
7107 &inv_p
, byte_offset
);
7108 /* Adjust the pointer by the difference to first_stmt. */
7109 data_reference_p ptrdr
7110 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7111 tree diff
= fold_convert (sizetype
,
7112 size_binop (MINUS_EXPR
,
7115 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7120 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7121 offset
, &dummy
, gsi
, &ptr_incr
,
7122 simd_lane_access_p
, &inv_p
,
7125 else if (dataref_offset
)
7126 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7127 TYPE_SIZE_UNIT (aggr_type
));
7129 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7130 TYPE_SIZE_UNIT (aggr_type
));
7132 if (grouped_load
|| slp_perm
)
7133 dr_chain
.create (vec_num
);
7139 vec_array
= create_vector_array (vectype
, vec_num
);
7142 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7143 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7144 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7145 gimple_call_set_lhs (new_stmt
, vec_array
);
7146 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7148 /* Extract each vector into an SSA_NAME. */
7149 for (i
= 0; i
< vec_num
; i
++)
7151 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7153 dr_chain
.quick_push (new_temp
);
7156 /* Record the mapping between SSA_NAMEs and statements. */
7157 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7161 for (i
= 0; i
< vec_num
; i
++)
7164 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7167 /* 2. Create the vector-load in the loop. */
7168 switch (alignment_support_scheme
)
7171 case dr_unaligned_supported
:
7173 unsigned int align
, misalign
;
7176 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7179 : build_int_cst (ref_type
, 0));
7180 align
= TYPE_ALIGN_UNIT (vectype
);
7181 if (alignment_support_scheme
== dr_aligned
)
7183 gcc_assert (aligned_access_p (first_dr
));
7186 else if (DR_MISALIGNMENT (first_dr
) == -1)
7188 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7189 align
= TYPE_ALIGN_UNIT (elem_type
);
7191 align
= (get_object_alignment (DR_REF (first_dr
))
7194 TREE_TYPE (data_ref
)
7195 = build_aligned_type (TREE_TYPE (data_ref
),
7196 align
* BITS_PER_UNIT
);
7200 TREE_TYPE (data_ref
)
7201 = build_aligned_type (TREE_TYPE (data_ref
),
7202 TYPE_ALIGN (elem_type
));
7203 misalign
= DR_MISALIGNMENT (first_dr
);
7205 if (dataref_offset
== NULL_TREE
7206 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7207 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7211 case dr_explicit_realign
:
7215 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7217 if (compute_in_loop
)
7218 msq
= vect_setup_realignment (first_stmt
, gsi
,
7220 dr_explicit_realign
,
7223 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7224 ptr
= copy_ssa_name (dataref_ptr
);
7226 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7227 new_stmt
= gimple_build_assign
7228 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7230 (TREE_TYPE (dataref_ptr
),
7231 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7232 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7234 = build2 (MEM_REF
, vectype
, ptr
,
7235 build_int_cst (ref_type
, 0));
7236 vec_dest
= vect_create_destination_var (scalar_dest
,
7238 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7239 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7240 gimple_assign_set_lhs (new_stmt
, new_temp
);
7241 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7242 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7243 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7246 bump
= size_binop (MULT_EXPR
, vs
,
7247 TYPE_SIZE_UNIT (elem_type
));
7248 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7249 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7250 new_stmt
= gimple_build_assign
7251 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7254 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7255 ptr
= copy_ssa_name (ptr
, new_stmt
);
7256 gimple_assign_set_lhs (new_stmt
, ptr
);
7257 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7259 = build2 (MEM_REF
, vectype
, ptr
,
7260 build_int_cst (ref_type
, 0));
7263 case dr_explicit_realign_optimized
:
7264 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7265 new_temp
= copy_ssa_name (dataref_ptr
);
7267 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7268 new_stmt
= gimple_build_assign
7269 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7271 (TREE_TYPE (dataref_ptr
),
7272 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7273 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7275 = build2 (MEM_REF
, vectype
, new_temp
,
7276 build_int_cst (ref_type
, 0));
7281 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7282 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7283 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7284 gimple_assign_set_lhs (new_stmt
, new_temp
);
7285 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7287 /* 3. Handle explicit realignment if necessary/supported.
7289 vec_dest = realign_load (msq, lsq, realignment_token) */
7290 if (alignment_support_scheme
== dr_explicit_realign_optimized
7291 || alignment_support_scheme
== dr_explicit_realign
)
7293 lsq
= gimple_assign_lhs (new_stmt
);
7294 if (!realignment_token
)
7295 realignment_token
= dataref_ptr
;
7296 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7297 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7298 msq
, lsq
, realignment_token
);
7299 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7300 gimple_assign_set_lhs (new_stmt
, new_temp
);
7301 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7303 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7306 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7307 add_phi_arg (phi
, lsq
,
7308 loop_latch_edge (containing_loop
),
7314 /* 4. Handle invariant-load. */
7315 if (inv_p
&& !bb_vinfo
)
7317 gcc_assert (!grouped_load
);
7318 /* If we have versioned for aliasing or the loop doesn't
7319 have any data dependencies that would preclude this,
7320 then we are sure this is a loop invariant load and
7321 thus we can insert it on the preheader edge. */
7322 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7323 && !nested_in_vect_loop
7324 && hoist_defs_of_uses (stmt
, loop
))
7326 if (dump_enabled_p ())
7328 dump_printf_loc (MSG_NOTE
, vect_location
,
7329 "hoisting out of the vectorized "
7331 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7333 tree tem
= copy_ssa_name (scalar_dest
);
7334 gsi_insert_on_edge_immediate
7335 (loop_preheader_edge (loop
),
7336 gimple_build_assign (tem
,
7338 (gimple_assign_rhs1 (stmt
))));
7339 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7340 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7341 set_vinfo_for_stmt (new_stmt
,
7342 new_stmt_vec_info (new_stmt
, vinfo
));
7346 gimple_stmt_iterator gsi2
= *gsi
;
7348 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7350 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7356 tree perm_mask
= perm_mask_for_reverse (vectype
);
7357 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7358 perm_mask
, stmt
, gsi
);
7359 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7362 /* Collect vector loads and later create their permutation in
7363 vect_transform_grouped_load (). */
7364 if (grouped_load
|| slp_perm
)
7365 dr_chain
.quick_push (new_temp
);
7367 /* Store vector loads in the corresponding SLP_NODE. */
7368 if (slp
&& !slp_perm
)
7369 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7371 /* Bump the vector pointer to account for a gap or for excess
7372 elements loaded for a permuted SLP load. */
7373 if (group_gap_adj
!= 0)
7377 = wide_int_to_tree (sizetype
,
7378 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7379 group_gap_adj
, &ovf
));
7380 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7385 if (slp
&& !slp_perm
)
7390 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7391 slp_node_instance
, false))
7393 dr_chain
.release ();
7402 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7403 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7408 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7410 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7411 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7414 dr_chain
.release ();
7420 /* Function vect_is_simple_cond.
7423 LOOP - the loop that is being vectorized.
7424 COND - Condition that is checked for simple use.
7427 *COMP_VECTYPE - the vector type for the comparison.
7429 Returns whether a COND can be vectorized. Checks whether
7430 condition operands are supportable using vec_is_simple_use. */
7433 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7436 enum vect_def_type dt
;
7437 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7440 if (TREE_CODE (cond
) == SSA_NAME
7441 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7443 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7444 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7447 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7452 if (!COMPARISON_CLASS_P (cond
))
7455 lhs
= TREE_OPERAND (cond
, 0);
7456 rhs
= TREE_OPERAND (cond
, 1);
7458 if (TREE_CODE (lhs
) == SSA_NAME
)
7460 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7461 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7464 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7465 && TREE_CODE (lhs
) != FIXED_CST
)
7468 if (TREE_CODE (rhs
) == SSA_NAME
)
7470 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7471 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7474 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7475 && TREE_CODE (rhs
) != FIXED_CST
)
7478 if (vectype1
&& vectype2
7479 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7482 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7486 /* vectorizable_condition.
7488 Check if STMT is conditional modify expression that can be vectorized.
7489 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7490 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7493 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7494 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7495 else clause if it is 2).
7497 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7500 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7501 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7504 tree scalar_dest
= NULL_TREE
;
7505 tree vec_dest
= NULL_TREE
;
7506 tree cond_expr
, then_clause
, else_clause
;
7507 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7508 tree comp_vectype
= NULL_TREE
;
7509 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7510 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7513 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7514 enum vect_def_type dt
, dts
[4];
7516 enum tree_code code
;
7517 stmt_vec_info prev_stmt_info
= NULL
;
7519 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7520 vec
<tree
> vec_oprnds0
= vNULL
;
7521 vec
<tree
> vec_oprnds1
= vNULL
;
7522 vec
<tree
> vec_oprnds2
= vNULL
;
7523 vec
<tree
> vec_oprnds3
= vNULL
;
7525 bool masked
= false;
7527 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7530 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7532 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7535 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7536 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7540 /* FORNOW: not yet supported. */
7541 if (STMT_VINFO_LIVE_P (stmt_info
))
7543 if (dump_enabled_p ())
7544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7545 "value used after loop.\n");
7550 /* Is vectorizable conditional operation? */
7551 if (!is_gimple_assign (stmt
))
7554 code
= gimple_assign_rhs_code (stmt
);
7556 if (code
!= COND_EXPR
)
7559 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7560 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7561 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7563 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7566 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7568 gcc_assert (ncopies
>= 1);
7569 if (reduc_index
&& ncopies
> 1)
7570 return false; /* FORNOW */
7572 cond_expr
= gimple_assign_rhs1 (stmt
);
7573 then_clause
= gimple_assign_rhs2 (stmt
);
7574 else_clause
= gimple_assign_rhs3 (stmt
);
7576 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7581 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7584 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7588 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7591 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7594 masked
= !COMPARISON_CLASS_P (cond_expr
);
7595 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7597 if (vec_cmp_type
== NULL_TREE
)
7602 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7603 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7610 vec_oprnds0
.create (1);
7611 vec_oprnds1
.create (1);
7612 vec_oprnds2
.create (1);
7613 vec_oprnds3
.create (1);
7617 scalar_dest
= gimple_assign_lhs (stmt
);
7618 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7620 /* Handle cond expr. */
7621 for (j
= 0; j
< ncopies
; j
++)
7623 gassign
*new_stmt
= NULL
;
7628 auto_vec
<tree
, 4> ops
;
7629 auto_vec
<vec
<tree
>, 4> vec_defs
;
7632 ops
.safe_push (cond_expr
);
7635 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7636 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7638 ops
.safe_push (then_clause
);
7639 ops
.safe_push (else_clause
);
7640 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7641 vec_oprnds3
= vec_defs
.pop ();
7642 vec_oprnds2
= vec_defs
.pop ();
7644 vec_oprnds1
= vec_defs
.pop ();
7645 vec_oprnds0
= vec_defs
.pop ();
7648 vec_defs
.release ();
7656 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7658 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7664 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7665 stmt
, comp_vectype
);
7666 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7667 loop_vinfo
, >emp
, &dts
[0]);
7670 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7671 stmt
, comp_vectype
);
7672 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7673 loop_vinfo
, >emp
, &dts
[1]);
7675 if (reduc_index
== 1)
7676 vec_then_clause
= reduc_def
;
7679 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7681 vect_is_simple_use (then_clause
, loop_vinfo
,
7684 if (reduc_index
== 2)
7685 vec_else_clause
= reduc_def
;
7688 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7690 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7697 = vect_get_vec_def_for_stmt_copy (dts
[0],
7698 vec_oprnds0
.pop ());
7701 = vect_get_vec_def_for_stmt_copy (dts
[1],
7702 vec_oprnds1
.pop ());
7704 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7705 vec_oprnds2
.pop ());
7706 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7707 vec_oprnds3
.pop ());
7712 vec_oprnds0
.quick_push (vec_cond_lhs
);
7714 vec_oprnds1
.quick_push (vec_cond_rhs
);
7715 vec_oprnds2
.quick_push (vec_then_clause
);
7716 vec_oprnds3
.quick_push (vec_else_clause
);
7719 /* Arguments are ready. Create the new vector stmt. */
7720 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7722 vec_then_clause
= vec_oprnds2
[i
];
7723 vec_else_clause
= vec_oprnds3
[i
];
7726 vec_compare
= vec_cond_lhs
;
7729 vec_cond_rhs
= vec_oprnds1
[i
];
7730 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7731 vec_cond_lhs
, vec_cond_rhs
);
7733 new_temp
= make_ssa_name (vec_dest
);
7734 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7735 vec_compare
, vec_then_clause
,
7737 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7739 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7746 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7748 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7750 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7753 vec_oprnds0
.release ();
7754 vec_oprnds1
.release ();
7755 vec_oprnds2
.release ();
7756 vec_oprnds3
.release ();
7761 /* vectorizable_comparison.
7763 Check if STMT is comparison expression that can be vectorized.
7764 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7765 comparison, put it in VEC_STMT, and insert it at GSI.
7767 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7770 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7771 gimple
**vec_stmt
, tree reduc_def
,
7774 tree lhs
, rhs1
, rhs2
;
7775 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7776 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7777 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7778 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7780 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7781 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7784 enum tree_code code
;
7785 stmt_vec_info prev_stmt_info
= NULL
;
7787 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7788 vec
<tree
> vec_oprnds0
= vNULL
;
7789 vec
<tree
> vec_oprnds1
= vNULL
;
7794 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7797 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7800 mask_type
= vectype
;
7801 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7803 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7806 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7808 gcc_assert (ncopies
>= 1);
7809 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7810 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7814 if (STMT_VINFO_LIVE_P (stmt_info
))
7816 if (dump_enabled_p ())
7817 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7818 "value used after loop.\n");
7822 if (!is_gimple_assign (stmt
))
7825 code
= gimple_assign_rhs_code (stmt
);
7827 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7830 rhs1
= gimple_assign_rhs1 (stmt
);
7831 rhs2
= gimple_assign_rhs2 (stmt
);
7833 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7834 &dts
[0], &vectype1
))
7837 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7838 &dts
[1], &vectype2
))
7841 if (vectype1
&& vectype2
7842 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7845 vectype
= vectype1
? vectype1
: vectype2
;
7847 /* Invariant comparison. */
7850 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7851 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7854 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7859 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7860 vect_model_simple_cost (stmt_info
, ncopies
, dts
, NULL
, NULL
);
7861 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7867 vec_oprnds0
.create (1);
7868 vec_oprnds1
.create (1);
7872 lhs
= gimple_assign_lhs (stmt
);
7873 mask
= vect_create_destination_var (lhs
, mask_type
);
7875 /* Handle cmp expr. */
7876 for (j
= 0; j
< ncopies
; j
++)
7878 gassign
*new_stmt
= NULL
;
7883 auto_vec
<tree
, 2> ops
;
7884 auto_vec
<vec
<tree
>, 2> vec_defs
;
7886 ops
.safe_push (rhs1
);
7887 ops
.safe_push (rhs2
);
7888 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7889 vec_oprnds1
= vec_defs
.pop ();
7890 vec_oprnds0
= vec_defs
.pop ();
7894 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7895 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7900 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7901 vec_oprnds0
.pop ());
7902 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7903 vec_oprnds1
.pop ());
7908 vec_oprnds0
.quick_push (vec_rhs1
);
7909 vec_oprnds1
.quick_push (vec_rhs2
);
7912 /* Arguments are ready. Create the new vector stmt. */
7913 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7915 vec_rhs2
= vec_oprnds1
[i
];
7917 new_temp
= make_ssa_name (mask
);
7918 new_stmt
= gimple_build_assign (new_temp
, code
, vec_rhs1
, vec_rhs2
);
7919 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7921 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7928 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7930 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7932 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7935 vec_oprnds0
.release ();
7936 vec_oprnds1
.release ();
7941 /* Make sure the statement is vectorizable. */
7944 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7946 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7947 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7948 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7950 tree scalar_type
, vectype
;
7951 gimple
*pattern_stmt
;
7952 gimple_seq pattern_def_seq
;
7954 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7957 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7960 if (gimple_has_volatile_ops (stmt
))
7962 if (dump_enabled_p ())
7963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7964 "not vectorized: stmt has volatile operands\n");
7969 /* Skip stmts that do not need to be vectorized. In loops this is expected
7971 - the COND_EXPR which is the loop exit condition
7972 - any LABEL_EXPRs in the loop
7973 - computations that are used only for array indexing or loop control.
7974 In basic blocks we only analyze statements that are a part of some SLP
7975 instance, therefore, all the statements are relevant.
7977 Pattern statement needs to be analyzed instead of the original statement
7978 if the original statement is not relevant. Otherwise, we analyze both
7979 statements. In basic blocks we are called from some SLP instance
7980 traversal, don't analyze pattern stmts instead, the pattern stmts
7981 already will be part of SLP instance. */
7983 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7984 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7985 && !STMT_VINFO_LIVE_P (stmt_info
))
7987 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7989 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7990 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7992 /* Analyze PATTERN_STMT instead of the original stmt. */
7993 stmt
= pattern_stmt
;
7994 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7995 if (dump_enabled_p ())
7997 dump_printf_loc (MSG_NOTE
, vect_location
,
7998 "==> examining pattern statement: ");
7999 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8004 if (dump_enabled_p ())
8005 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8010 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8013 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8014 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8016 /* Analyze PATTERN_STMT too. */
8017 if (dump_enabled_p ())
8019 dump_printf_loc (MSG_NOTE
, vect_location
,
8020 "==> examining pattern statement: ");
8021 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8024 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8028 if (is_pattern_stmt_p (stmt_info
)
8030 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8032 gimple_stmt_iterator si
;
8034 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8036 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8037 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8038 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8040 /* Analyze def stmt of STMT if it's a pattern stmt. */
8041 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_NOTE
, vect_location
,
8044 "==> examining pattern def statement: ");
8045 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8048 if (!vect_analyze_stmt (pattern_def_stmt
,
8049 need_to_vectorize
, node
))
8055 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8057 case vect_internal_def
:
8060 case vect_reduction_def
:
8061 case vect_nested_cycle
:
8062 gcc_assert (!bb_vinfo
8063 && (relevance
== vect_used_in_outer
8064 || relevance
== vect_used_in_outer_by_reduction
8065 || relevance
== vect_used_by_reduction
8066 || relevance
== vect_unused_in_scope
));
8069 case vect_induction_def
:
8070 case vect_constant_def
:
8071 case vect_external_def
:
8072 case vect_unknown_def_type
:
8079 gcc_assert (PURE_SLP_STMT (stmt_info
));
8081 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8082 if (dump_enabled_p ())
8084 dump_printf_loc (MSG_NOTE
, vect_location
,
8085 "get vectype for scalar type: ");
8086 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8087 dump_printf (MSG_NOTE
, "\n");
8090 vectype
= get_vectype_for_scalar_type (scalar_type
);
8093 if (dump_enabled_p ())
8095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8096 "not SLPed: unsupported data-type ");
8097 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8099 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8104 if (dump_enabled_p ())
8106 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8107 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8108 dump_printf (MSG_NOTE
, "\n");
8111 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8114 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8116 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8117 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8118 || (is_gimple_call (stmt
)
8119 && gimple_call_lhs (stmt
) == NULL_TREE
));
8120 *need_to_vectorize
= true;
8123 if (PURE_SLP_STMT (stmt_info
) && !node
)
8125 dump_printf_loc (MSG_NOTE
, vect_location
,
8126 "handled only by SLP analysis\n");
8132 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8133 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8134 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8135 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8136 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8137 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8138 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8139 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8140 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8141 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8142 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8143 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8144 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8148 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8149 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8150 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8151 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8152 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8153 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8154 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8155 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8156 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8157 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8162 if (dump_enabled_p ())
8164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8165 "not vectorized: relevant stmt not ");
8166 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8167 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8176 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8177 need extra handling, except for vectorizable reductions. */
8178 if (STMT_VINFO_LIVE_P (stmt_info
)
8179 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8180 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
8184 if (dump_enabled_p ())
8186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8187 "not vectorized: live stmt not ");
8188 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8189 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8199 /* Function vect_transform_stmt.
8201 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8204 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8205 bool *grouped_store
, slp_tree slp_node
,
8206 slp_instance slp_node_instance
)
8208 bool is_store
= false;
8209 gimple
*vec_stmt
= NULL
;
8210 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8213 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8215 switch (STMT_VINFO_TYPE (stmt_info
))
8217 case type_demotion_vec_info_type
:
8218 case type_promotion_vec_info_type
:
8219 case type_conversion_vec_info_type
:
8220 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8224 case induc_vec_info_type
:
8225 gcc_assert (!slp_node
);
8226 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8230 case shift_vec_info_type
:
8231 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8235 case op_vec_info_type
:
8236 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8240 case assignment_vec_info_type
:
8241 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8245 case load_vec_info_type
:
8246 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8251 case store_vec_info_type
:
8252 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8254 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8256 /* In case of interleaving, the whole chain is vectorized when the
8257 last store in the chain is reached. Store stmts before the last
8258 one are skipped, and there vec_stmt_info shouldn't be freed
8260 *grouped_store
= true;
8261 if (STMT_VINFO_VEC_STMT (stmt_info
))
8268 case condition_vec_info_type
:
8269 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8273 case comparison_vec_info_type
:
8274 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8278 case call_vec_info_type
:
8279 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8280 stmt
= gsi_stmt (*gsi
);
8281 if (is_gimple_call (stmt
)
8282 && gimple_call_internal_p (stmt
)
8283 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8287 case call_simd_clone_vec_info_type
:
8288 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8289 stmt
= gsi_stmt (*gsi
);
8292 case reduc_vec_info_type
:
8293 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8298 if (!STMT_VINFO_LIVE_P (stmt_info
))
8300 if (dump_enabled_p ())
8301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8302 "stmt not supported.\n");
8307 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8308 This would break hybrid SLP vectorization. */
8310 gcc_assert (!vec_stmt
8311 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8313 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8314 is being vectorized, but outside the immediately enclosing loop. */
8316 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8317 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8318 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8319 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8320 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8321 || STMT_VINFO_RELEVANT (stmt_info
) ==
8322 vect_used_in_outer_by_reduction
))
8324 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8325 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8326 imm_use_iterator imm_iter
;
8327 use_operand_p use_p
;
8331 if (dump_enabled_p ())
8332 dump_printf_loc (MSG_NOTE
, vect_location
,
8333 "Record the vdef for outer-loop vectorization.\n");
8335 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8336 (to be used when vectorizing outer-loop stmts that use the DEF of
8338 if (gimple_code (stmt
) == GIMPLE_PHI
)
8339 scalar_dest
= PHI_RESULT (stmt
);
8341 scalar_dest
= gimple_assign_lhs (stmt
);
8343 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8345 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8347 exit_phi
= USE_STMT (use_p
);
8348 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8353 /* Handle stmts whose DEF is used outside the loop-nest that is
8354 being vectorized. */
8355 if (STMT_VINFO_LIVE_P (stmt_info
)
8356 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8358 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
8363 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8369 /* Remove a group of stores (for SLP or interleaving), free their
8373 vect_remove_stores (gimple
*first_stmt
)
8375 gimple
*next
= first_stmt
;
8377 gimple_stmt_iterator next_si
;
8381 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8383 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8384 if (is_pattern_stmt_p (stmt_info
))
8385 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8386 /* Free the attached stmt_vec_info and remove the stmt. */
8387 next_si
= gsi_for_stmt (next
);
8388 unlink_stmt_vdef (next
);
8389 gsi_remove (&next_si
, true);
8390 release_defs (next
);
8391 free_stmt_vec_info (next
);
8397 /* Function new_stmt_vec_info.
8399 Create and initialize a new stmt_vec_info struct for STMT. */
8402 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8405 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8407 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8408 STMT_VINFO_STMT (res
) = stmt
;
8410 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8411 STMT_VINFO_LIVE_P (res
) = false;
8412 STMT_VINFO_VECTYPE (res
) = NULL
;
8413 STMT_VINFO_VEC_STMT (res
) = NULL
;
8414 STMT_VINFO_VECTORIZABLE (res
) = true;
8415 STMT_VINFO_IN_PATTERN_P (res
) = false;
8416 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8417 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8418 STMT_VINFO_DATA_REF (res
) = NULL
;
8419 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8421 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8422 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8423 STMT_VINFO_DR_INIT (res
) = NULL
;
8424 STMT_VINFO_DR_STEP (res
) = NULL
;
8425 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8427 if (gimple_code (stmt
) == GIMPLE_PHI
8428 && is_loop_header_bb_p (gimple_bb (stmt
)))
8429 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8431 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8433 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8434 STMT_SLP_TYPE (res
) = loop_vect
;
8435 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8437 GROUP_FIRST_ELEMENT (res
) = NULL
;
8438 GROUP_NEXT_ELEMENT (res
) = NULL
;
8439 GROUP_SIZE (res
) = 0;
8440 GROUP_STORE_COUNT (res
) = 0;
8441 GROUP_GAP (res
) = 0;
8442 GROUP_SAME_DR_STMT (res
) = NULL
;
8448 /* Create a hash table for stmt_vec_info. */
8451 init_stmt_vec_info_vec (void)
8453 gcc_assert (!stmt_vec_info_vec
.exists ());
8454 stmt_vec_info_vec
.create (50);
8458 /* Free hash table for stmt_vec_info. */
8461 free_stmt_vec_info_vec (void)
8465 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8467 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8468 gcc_assert (stmt_vec_info_vec
.exists ());
8469 stmt_vec_info_vec
.release ();
8473 /* Free stmt vectorization related info. */
8476 free_stmt_vec_info (gimple
*stmt
)
8478 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8483 /* Check if this statement has a related "pattern stmt"
8484 (introduced by the vectorizer during the pattern recognition
8485 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8487 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8489 stmt_vec_info patt_info
8490 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8493 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8494 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8495 gimple_set_bb (patt_stmt
, NULL
);
8496 tree lhs
= gimple_get_lhs (patt_stmt
);
8497 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8498 release_ssa_name (lhs
);
8501 gimple_stmt_iterator si
;
8502 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8504 gimple
*seq_stmt
= gsi_stmt (si
);
8505 gimple_set_bb (seq_stmt
, NULL
);
8506 lhs
= gimple_get_lhs (seq_stmt
);
8507 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8508 release_ssa_name (lhs
);
8509 free_stmt_vec_info (seq_stmt
);
8512 free_stmt_vec_info (patt_stmt
);
8516 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8517 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8518 set_vinfo_for_stmt (stmt
, NULL
);
8523 /* Function get_vectype_for_scalar_type_and_size.
8525 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8529 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8531 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8532 machine_mode simd_mode
;
8533 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8540 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8541 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8544 /* For vector types of elements whose mode precision doesn't
8545 match their types precision we use a element type of mode
8546 precision. The vectorization routines will have to make sure
8547 they support the proper result truncation/extension.
8548 We also make sure to build vector types with INTEGER_TYPE
8549 component type only. */
8550 if (INTEGRAL_TYPE_P (scalar_type
)
8551 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8552 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8553 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8554 TYPE_UNSIGNED (scalar_type
));
8556 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8557 When the component mode passes the above test simply use a type
8558 corresponding to that mode. The theory is that any use that
8559 would cause problems with this will disable vectorization anyway. */
8560 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8561 && !INTEGRAL_TYPE_P (scalar_type
))
8562 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8564 /* We can't build a vector type of elements with alignment bigger than
8566 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8567 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8568 TYPE_UNSIGNED (scalar_type
));
8570 /* If we felt back to using the mode fail if there was
8571 no scalar type for it. */
8572 if (scalar_type
== NULL_TREE
)
8575 /* If no size was supplied use the mode the target prefers. Otherwise
8576 lookup a vector mode of the specified size. */
8578 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8580 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8581 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8585 vectype
= build_vector_type (scalar_type
, nunits
);
8587 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8588 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8594 unsigned int current_vector_size
;
8596 /* Function get_vectype_for_scalar_type.
8598 Returns the vector type corresponding to SCALAR_TYPE as supported
8602 get_vectype_for_scalar_type (tree scalar_type
)
8605 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8606 current_vector_size
);
8608 && current_vector_size
== 0)
8609 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8613 /* Function get_mask_type_for_scalar_type.
8615 Returns the mask type corresponding to a result of comparison
8616 of vectors of specified SCALAR_TYPE as supported by target. */
8619 get_mask_type_for_scalar_type (tree scalar_type
)
8621 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8626 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8627 current_vector_size
);
8630 /* Function get_same_sized_vectype
8632 Returns a vector type corresponding to SCALAR_TYPE of size
8633 VECTOR_TYPE if supported by the target. */
8636 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8638 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8639 return build_same_sized_truth_vector_type (vector_type
);
8641 return get_vectype_for_scalar_type_and_size
8642 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8645 /* Function vect_is_simple_use.
8648 VINFO - the vect info of the loop or basic block that is being vectorized.
8649 OPERAND - operand in the loop or bb.
8651 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8652 DT - the type of definition
8654 Returns whether a stmt with OPERAND can be vectorized.
8655 For loops, supportable operands are constants, loop invariants, and operands
8656 that are defined by the current iteration of the loop. Unsupportable
8657 operands are those that are defined by a previous iteration of the loop (as
8658 is the case in reduction/induction computations).
8659 For basic blocks, supportable operands are constants and bb invariants.
8660 For now, operands defined outside the basic block are not supported. */
8663 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8664 gimple
**def_stmt
, enum vect_def_type
*dt
)
8667 *dt
= vect_unknown_def_type
;
8669 if (dump_enabled_p ())
8671 dump_printf_loc (MSG_NOTE
, vect_location
,
8672 "vect_is_simple_use: operand ");
8673 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8674 dump_printf (MSG_NOTE
, "\n");
8677 if (CONSTANT_CLASS_P (operand
))
8679 *dt
= vect_constant_def
;
8683 if (is_gimple_min_invariant (operand
))
8685 *dt
= vect_external_def
;
8689 if (TREE_CODE (operand
) != SSA_NAME
)
8691 if (dump_enabled_p ())
8692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8697 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8699 *dt
= vect_external_def
;
8703 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8704 if (dump_enabled_p ())
8706 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8707 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8710 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8711 *dt
= vect_external_def
;
8714 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8715 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8718 if (dump_enabled_p ())
8720 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8723 case vect_uninitialized_def
:
8724 dump_printf (MSG_NOTE
, "uninitialized\n");
8726 case vect_constant_def
:
8727 dump_printf (MSG_NOTE
, "constant\n");
8729 case vect_external_def
:
8730 dump_printf (MSG_NOTE
, "external\n");
8732 case vect_internal_def
:
8733 dump_printf (MSG_NOTE
, "internal\n");
8735 case vect_induction_def
:
8736 dump_printf (MSG_NOTE
, "induction\n");
8738 case vect_reduction_def
:
8739 dump_printf (MSG_NOTE
, "reduction\n");
8741 case vect_double_reduction_def
:
8742 dump_printf (MSG_NOTE
, "double reduction\n");
8744 case vect_nested_cycle
:
8745 dump_printf (MSG_NOTE
, "nested cycle\n");
8747 case vect_unknown_def_type
:
8748 dump_printf (MSG_NOTE
, "unknown\n");
8753 if (*dt
== vect_unknown_def_type
)
8755 if (dump_enabled_p ())
8756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8757 "Unsupported pattern.\n");
8761 switch (gimple_code (*def_stmt
))
8768 if (dump_enabled_p ())
8769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8770 "unsupported defining stmt:\n");
8777 /* Function vect_is_simple_use.
8779 Same as vect_is_simple_use but also determines the vector operand
8780 type of OPERAND and stores it to *VECTYPE. If the definition of
8781 OPERAND is vect_uninitialized_def, vect_constant_def or
8782 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8783 is responsible to compute the best suited vector type for the
8787 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8788 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8790 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8793 /* Now get a vector type if the def is internal, otherwise supply
8794 NULL_TREE and leave it up to the caller to figure out a proper
8795 type for the use stmt. */
8796 if (*dt
== vect_internal_def
8797 || *dt
== vect_induction_def
8798 || *dt
== vect_reduction_def
8799 || *dt
== vect_double_reduction_def
8800 || *dt
== vect_nested_cycle
)
8802 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8804 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8805 && !STMT_VINFO_RELEVANT (stmt_info
)
8806 && !STMT_VINFO_LIVE_P (stmt_info
))
8807 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8809 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8810 gcc_assert (*vectype
!= NULL_TREE
);
8812 else if (*dt
== vect_uninitialized_def
8813 || *dt
== vect_constant_def
8814 || *dt
== vect_external_def
)
8815 *vectype
= NULL_TREE
;
8823 /* Function supportable_widening_operation
8825 Check whether an operation represented by the code CODE is a
8826 widening operation that is supported by the target platform in
8827 vector form (i.e., when operating on arguments of type VECTYPE_IN
8828 producing a result of type VECTYPE_OUT).
8830 Widening operations we currently support are NOP (CONVERT), FLOAT
8831 and WIDEN_MULT. This function checks if these operations are supported
8832 by the target platform either directly (via vector tree-codes), or via
8836 - CODE1 and CODE2 are codes of vector operations to be used when
8837 vectorizing the operation, if available.
8838 - MULTI_STEP_CVT determines the number of required intermediate steps in
8839 case of multi-step conversion (like char->short->int - in that case
8840 MULTI_STEP_CVT will be 1).
8841 - INTERM_TYPES contains the intermediate type required to perform the
8842 widening operation (short in the above example). */
8845 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8846 tree vectype_out
, tree vectype_in
,
8847 enum tree_code
*code1
, enum tree_code
*code2
,
8848 int *multi_step_cvt
,
8849 vec
<tree
> *interm_types
)
8851 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8852 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8853 struct loop
*vect_loop
= NULL
;
8854 machine_mode vec_mode
;
8855 enum insn_code icode1
, icode2
;
8856 optab optab1
, optab2
;
8857 tree vectype
= vectype_in
;
8858 tree wide_vectype
= vectype_out
;
8859 enum tree_code c1
, c2
;
8861 tree prev_type
, intermediate_type
;
8862 machine_mode intermediate_mode
, prev_mode
;
8863 optab optab3
, optab4
;
8865 *multi_step_cvt
= 0;
8867 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8871 case WIDEN_MULT_EXPR
:
8872 /* The result of a vectorized widening operation usually requires
8873 two vectors (because the widened results do not fit into one vector).
8874 The generated vector results would normally be expected to be
8875 generated in the same order as in the original scalar computation,
8876 i.e. if 8 results are generated in each vector iteration, they are
8877 to be organized as follows:
8878 vect1: [res1,res2,res3,res4],
8879 vect2: [res5,res6,res7,res8].
8881 However, in the special case that the result of the widening
8882 operation is used in a reduction computation only, the order doesn't
8883 matter (because when vectorizing a reduction we change the order of
8884 the computation). Some targets can take advantage of this and
8885 generate more efficient code. For example, targets like Altivec,
8886 that support widen_mult using a sequence of {mult_even,mult_odd}
8887 generate the following vectors:
8888 vect1: [res1,res3,res5,res7],
8889 vect2: [res2,res4,res6,res8].
8891 When vectorizing outer-loops, we execute the inner-loop sequentially
8892 (each vectorized inner-loop iteration contributes to VF outer-loop
8893 iterations in parallel). We therefore don't allow to change the
8894 order of the computation in the inner-loop during outer-loop
8896 /* TODO: Another case in which order doesn't *really* matter is when we
8897 widen and then contract again, e.g. (short)((int)x * y >> 8).
8898 Normally, pack_trunc performs an even/odd permute, whereas the
8899 repack from an even/odd expansion would be an interleave, which
8900 would be significantly simpler for e.g. AVX2. */
8901 /* In any case, in order to avoid duplicating the code below, recurse
8902 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8903 are properly set up for the caller. If we fail, we'll continue with
8904 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8906 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8907 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8908 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8909 stmt
, vectype_out
, vectype_in
,
8910 code1
, code2
, multi_step_cvt
,
8913 /* Elements in a vector with vect_used_by_reduction property cannot
8914 be reordered if the use chain with this property does not have the
8915 same operation. One such an example is s += a * b, where elements
8916 in a and b cannot be reordered. Here we check if the vector defined
8917 by STMT is only directly used in the reduction statement. */
8918 tree lhs
= gimple_assign_lhs (stmt
);
8919 use_operand_p dummy
;
8921 stmt_vec_info use_stmt_info
= NULL
;
8922 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8923 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8924 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8927 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8928 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8941 case VEC_WIDEN_MULT_EVEN_EXPR
:
8942 /* Support the recursion induced just above. */
8943 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8944 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8947 case WIDEN_LSHIFT_EXPR
:
8948 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8949 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8953 c1
= VEC_UNPACK_LO_EXPR
;
8954 c2
= VEC_UNPACK_HI_EXPR
;
8958 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8959 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8962 case FIX_TRUNC_EXPR
:
8963 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8964 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8965 computing the operation. */
8972 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8975 if (code
== FIX_TRUNC_EXPR
)
8977 /* The signedness is determined from output operand. */
8978 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8979 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8983 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8984 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8987 if (!optab1
|| !optab2
)
8990 vec_mode
= TYPE_MODE (vectype
);
8991 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8992 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8998 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8999 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9000 /* For scalar masks we may have different boolean
9001 vector types having the same QImode. Thus we
9002 add additional check for elements number. */
9003 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9004 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9005 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9007 /* Check if it's a multi-step conversion that can be done using intermediate
9010 prev_type
= vectype
;
9011 prev_mode
= vec_mode
;
9013 if (!CONVERT_EXPR_CODE_P (code
))
9016 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9017 intermediate steps in promotion sequence. We try
9018 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9020 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9021 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9023 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9024 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9027 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9028 current_vector_size
);
9029 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9034 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9035 TYPE_UNSIGNED (prev_type
));
9037 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9038 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9040 if (!optab3
|| !optab4
9041 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9042 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9043 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9044 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9045 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9046 == CODE_FOR_nothing
)
9047 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9048 == CODE_FOR_nothing
))
9051 interm_types
->quick_push (intermediate_type
);
9052 (*multi_step_cvt
)++;
9054 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9055 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9056 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9057 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9058 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9060 prev_type
= intermediate_type
;
9061 prev_mode
= intermediate_mode
;
9064 interm_types
->release ();
9069 /* Function supportable_narrowing_operation
9071 Check whether an operation represented by the code CODE is a
9072 narrowing operation that is supported by the target platform in
9073 vector form (i.e., when operating on arguments of type VECTYPE_IN
9074 and producing a result of type VECTYPE_OUT).
9076 Narrowing operations we currently support are NOP (CONVERT) and
9077 FIX_TRUNC. This function checks if these operations are supported by
9078 the target platform directly via vector tree-codes.
9081 - CODE1 is the code of a vector operation to be used when
9082 vectorizing the operation, if available.
9083 - MULTI_STEP_CVT determines the number of required intermediate steps in
9084 case of multi-step conversion (like int->short->char - in that case
9085 MULTI_STEP_CVT will be 1).
9086 - INTERM_TYPES contains the intermediate type required to perform the
9087 narrowing operation (short in the above example). */
9090 supportable_narrowing_operation (enum tree_code code
,
9091 tree vectype_out
, tree vectype_in
,
9092 enum tree_code
*code1
, int *multi_step_cvt
,
9093 vec
<tree
> *interm_types
)
9095 machine_mode vec_mode
;
9096 enum insn_code icode1
;
9097 optab optab1
, interm_optab
;
9098 tree vectype
= vectype_in
;
9099 tree narrow_vectype
= vectype_out
;
9101 tree intermediate_type
, prev_type
;
9102 machine_mode intermediate_mode
, prev_mode
;
9106 *multi_step_cvt
= 0;
9110 c1
= VEC_PACK_TRUNC_EXPR
;
9113 case FIX_TRUNC_EXPR
:
9114 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9118 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9119 tree code and optabs used for computing the operation. */
9126 if (code
== FIX_TRUNC_EXPR
)
9127 /* The signedness is determined from output operand. */
9128 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9130 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9135 vec_mode
= TYPE_MODE (vectype
);
9136 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9141 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9142 /* For scalar masks we may have different boolean
9143 vector types having the same QImode. Thus we
9144 add additional check for elements number. */
9145 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9146 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9147 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9149 /* Check if it's a multi-step conversion that can be done using intermediate
9151 prev_mode
= vec_mode
;
9152 prev_type
= vectype
;
9153 if (code
== FIX_TRUNC_EXPR
)
9154 uns
= TYPE_UNSIGNED (vectype_out
);
9156 uns
= TYPE_UNSIGNED (vectype
);
9158 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9159 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9160 costly than signed. */
9161 if (code
== FIX_TRUNC_EXPR
&& uns
)
9163 enum insn_code icode2
;
9166 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9168 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9169 if (interm_optab
!= unknown_optab
9170 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9171 && insn_data
[icode1
].operand
[0].mode
9172 == insn_data
[icode2
].operand
[0].mode
)
9175 optab1
= interm_optab
;
9180 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9181 intermediate steps in promotion sequence. We try
9182 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9183 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9184 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9186 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9187 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9190 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9191 current_vector_size
);
9192 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9197 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9199 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9202 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9203 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9204 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9205 == CODE_FOR_nothing
))
9208 interm_types
->quick_push (intermediate_type
);
9209 (*multi_step_cvt
)++;
9211 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9212 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9213 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9214 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9216 prev_mode
= intermediate_mode
;
9217 prev_type
= intermediate_type
;
9218 optab1
= interm_optab
;
9221 interm_types
->release ();