1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 stmt_info_for_cost si
= { count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
110 count
, kind
, stmt_info
, misalign
, where
);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
118 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
129 tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
155 tree array
, unsigned HOST_WIDE_INT n
)
160 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
161 build_int_cst (size_type_node
, n
),
162 NULL_TREE
, NULL_TREE
);
164 new_stmt
= gimple_build_assign (array_ref
, vect
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
177 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
191 enum vect_relevant relevant
, bool live_p
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
196 gimple
*pattern_stmt
;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE
, vect_location
,
201 "mark relevant %d, live %d: ", relevant
, live_p
);
202 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE
, vect_location
,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info
= vinfo_for_stmt (pattern_stmt
);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
224 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
225 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
229 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
230 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
231 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
233 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE
, vect_location
,
238 "already marked relevant/live.\n");
242 worklist
->safe_push (stmt
);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
257 if (!is_gimple_assign (stmt
))
260 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
262 enum vect_def_type dt
= vect_uninitialized_def
;
264 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
268 "use not simple.\n");
272 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
292 enum vect_relevant
*relevant
, bool *live_p
)
294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
296 imm_use_iterator imm_iter
;
300 *relevant
= vect_unused_in_scope
;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt
)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
306 != loop_exit_ctrl_vec_info_type
)
307 *relevant
= vect_used_in_scope
;
309 /* changing memory. */
310 if (gimple_code (stmt
) != GIMPLE_PHI
)
311 if (gimple_vdef (stmt
)
312 && !gimple_clobber_p (stmt
))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE
, vect_location
,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant
= vect_used_in_scope
;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
323 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
325 basic_block bb
= gimple_bb (USE_STMT (use_p
));
326 if (!flow_bb_inside_loop_p (loop
, bb
))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE
, vect_location
,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p
)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
338 gcc_assert (bb
== single_exit (loop
)->dest
);
345 if (*live_p
&& *relevant
== vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE
, vect_location
,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant
= vect_used_only_live
;
354 return (*live_p
|| *relevant
);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info
))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt
))
390 if (is_gimple_call (stmt
)
391 && gimple_call_internal_p (stmt
))
392 switch (gimple_call_internal_fn (stmt
))
395 operand
= gimple_call_arg (stmt
, 3);
400 operand
= gimple_call_arg (stmt
, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
412 operand
= gimple_assign_rhs1 (stmt
);
413 if (TREE_CODE (operand
) != SSA_NAME
)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
452 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
455 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
456 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
457 stmt_vec_info dstmt_vinfo
;
458 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
467 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt
|| gimple_nop_p (def_stmt
))
478 def_bb
= gimple_bb (def_stmt
);
479 if (!flow_bb_inside_loop_p (loop
, def_bb
))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
492 bb
= gimple_bb (stmt
);
493 if (gimple_code (stmt
) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
495 && gimple_code (def_stmt
) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
497 && bb
->loop_father
== def_bb
->loop_father
)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE
, vect_location
,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
503 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE
, vect_location
,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope
:
526 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
527 vect_used_in_scope
: vect_unused_in_scope
;
530 case vect_used_in_outer_by_reduction
:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
532 relevant
= vect_used_by_reduction
;
535 case vect_used_in_outer
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_in_scope
;
540 case vect_used_in_scope
:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE
, vect_location
,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope
:
564 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
566 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
569 case vect_used_by_reduction
:
570 case vect_used_only_live
:
571 relevant
= vect_used_in_outer_by_reduction
;
574 case vect_used_in_scope
:
575 relevant
= vect_used_in_outer
;
583 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
607 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
608 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
609 unsigned int nbbs
= loop
->num_nodes
;
610 gimple_stmt_iterator si
;
613 stmt_vec_info stmt_vinfo
;
617 enum vect_relevant relevant
;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE
, vect_location
,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec
<gimple
*, 64> worklist
;
625 /* 1. Init worklist. */
626 for (i
= 0; i
< nbbs
; i
++)
629 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
638 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
639 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
641 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
643 stmt
= gsi_stmt (si
);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
650 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
651 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
655 /* 2. Process_worklist */
656 while (worklist
.length () > 0)
661 stmt
= worklist
.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
671 stmt_vinfo
= vinfo_for_stmt (stmt
);
672 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
687 case vect_reduction_def
:
688 gcc_assert (relevant
!= vect_unused_in_scope
);
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_scope
691 && relevant
!= vect_used_by_reduction
692 && relevant
!= vect_used_only_live
)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
696 "unsupported use of reduction.\n");
701 case vect_nested_cycle
:
702 if (relevant
!= vect_unused_in_scope
703 && relevant
!= vect_used_in_outer_by_reduction
704 && relevant
!= vect_used_in_outer
)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
708 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def
:
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_by_reduction
717 && relevant
!= vect_used_only_live
)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
721 "unsupported use of double reduction.\n");
731 if (is_pattern_stmt_p (stmt_vinfo
))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt
))
738 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
739 tree op
= gimple_assign_rhs1 (stmt
);
742 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
744 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
745 relevant
, &worklist
, false)
746 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
747 relevant
, &worklist
, false))
751 for (; i
< gimple_num_ops (stmt
); i
++)
753 op
= gimple_op (stmt
, i
);
754 if (TREE_CODE (op
) == SSA_NAME
755 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
760 else if (is_gimple_call (stmt
))
762 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
764 tree arg
= gimple_call_arg (stmt
, i
);
765 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
772 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
774 tree op
= USE_FROM_PTR (use_p
);
775 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
782 gather_scatter_info gs_info
;
783 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
785 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
789 } /* while worklist */
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
802 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
803 enum vect_def_type
*dt
,
804 stmt_vector_for_cost
*prologue_cost_vec
,
805 stmt_vector_for_cost
*body_cost_vec
)
808 int inside_cost
= 0, prologue_cost
= 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info
))
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i
= 0; i
< 2; i
++)
816 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
817 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
818 stmt_info
, 0, vect_prologue
);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
822 stmt_info
, 0, vect_body
);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE
, vect_location
,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
838 enum vect_def_type
*dt
, int pwr
)
841 int inside_cost
= 0, prologue_cost
= 0;
842 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
843 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
844 void *target_cost_data
;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info
))
851 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
853 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
855 for (i
= 0; i
< pwr
+ 1; i
++)
857 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
859 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
860 vec_promote_demote
, stmt_info
, 0,
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i
= 0; i
< 2; i
++)
866 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
867 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
868 stmt_info
, 0, vect_prologue
);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE
, vect_location
,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
882 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
883 vect_memory_access_type memory_access_type
,
884 enum vect_def_type dt
, slp_tree slp_node
,
885 stmt_vector_for_cost
*prologue_cost_vec
,
886 stmt_vector_for_cost
*body_cost_vec
)
888 unsigned int inside_cost
= 0, prologue_cost
= 0;
889 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
890 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
891 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
893 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
894 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
895 stmt_info
, 0, vect_prologue
);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node
&& grouped_access_p
)
901 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
902 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
915 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
917 /* Uses a high and low interleave or shuffle operations for each
919 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
920 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
921 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
922 stmt_info
, 0, vect_body
);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE
, vect_location
,
926 "vect_model_store_cost: strided group_size = %d .\n",
930 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
931 /* Costs of the stores. */
932 if (memory_access_type
== VMAT_ELEMENTWISE
)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost
+= record_stmt_cost (body_cost_vec
,
935 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
936 scalar_store
, stmt_info
, 0, vect_body
);
938 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
940 if (memory_access_type
== VMAT_ELEMENTWISE
941 || memory_access_type
== VMAT_STRIDED_SLP
)
942 inside_cost
+= record_stmt_cost (body_cost_vec
,
943 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
944 vec_to_scalar
, stmt_info
, 0, vect_body
);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE
, vect_location
,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
953 /* Calculate cost of DR's memory access. */
955 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
956 unsigned int *inside_cost
,
957 stmt_vector_for_cost
*body_cost_vec
)
959 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
960 gimple
*stmt
= DR_STMT (dr
);
961 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
963 switch (alignment_support_scheme
)
967 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
968 vector_store
, stmt_info
, 0,
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: aligned.\n");
977 case dr_unaligned_supported
:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
981 unaligned_store
, stmt_info
,
982 DR_MISALIGNMENT (dr
), vect_body
);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_model_store_cost: unaligned supported by "
990 case dr_unaligned_unsupported
:
992 *inside_cost
= VECT_MAX_COST
;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
996 "vect_model_store_cost: unsupported access.\n");
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1014 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1015 vect_memory_access_type memory_access_type
,
1017 stmt_vector_for_cost
*prologue_cost_vec
,
1018 stmt_vector_for_cost
*body_cost_vec
)
1020 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1021 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1022 unsigned int inside_cost
= 0, prologue_cost
= 0;
1023 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node
&& grouped_access_p
)
1029 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1030 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1043 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1048 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1049 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1050 stmt_info
, 0, vect_body
);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE
, vect_location
,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1058 /* The loads themselves. */
1059 if (memory_access_type
== VMAT_ELEMENTWISE
)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1063 inside_cost
+= record_stmt_cost (body_cost_vec
,
1064 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1065 scalar_load
, stmt_info
, 0, vect_body
);
1068 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1069 &inside_cost
, &prologue_cost
,
1070 prologue_cost_vec
, body_cost_vec
, true);
1071 if (memory_access_type
== VMAT_ELEMENTWISE
1072 || memory_access_type
== VMAT_STRIDED_SLP
)
1073 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1074 stmt_info
, 0, vect_body
);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE
, vect_location
,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1083 /* Calculate cost of DR's memory access. */
1085 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1086 bool add_realign_cost
, unsigned int *inside_cost
,
1087 unsigned int *prologue_cost
,
1088 stmt_vector_for_cost
*prologue_cost_vec
,
1089 stmt_vector_for_cost
*body_cost_vec
,
1090 bool record_prologue_costs
)
1092 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1093 gimple
*stmt
= DR_STMT (dr
);
1094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1096 switch (alignment_support_scheme
)
1100 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: aligned.\n");
1109 case dr_unaligned_supported
:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1113 unaligned_load
, stmt_info
,
1114 DR_MISALIGNMENT (dr
), vect_body
);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE
, vect_location
,
1118 "vect_model_load_cost: unaligned supported by "
1123 case dr_explicit_realign
:
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1126 vector_load
, stmt_info
, 0, vect_body
);
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1128 vec_perm
, stmt_info
, 0, vect_body
);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1133 if (targetm
.vectorize
.builtin_mask_for_load
)
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: explicit realign\n");
1143 case dr_explicit_realign_optimized
:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned software "
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost
&& record_prologue_costs
)
1159 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1160 vector_stmt
, stmt_info
,
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1164 vector_stmt
, stmt_info
,
1168 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1169 stmt_info
, 0, vect_body
);
1170 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1171 stmt_info
, 0, vect_body
);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: explicit realign optimized"
1181 case dr_unaligned_unsupported
:
1183 *inside_cost
= VECT_MAX_COST
;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1187 "vect_model_load_cost: unsupported access.\n");
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1200 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1215 if (nested_in_vect_loop_p (loop
, stmt
))
1218 pe
= loop_preheader_edge (loop
);
1219 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1220 gcc_assert (!new_bb
);
1224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1226 gimple_stmt_iterator gsi_bb_start
;
1228 gcc_assert (bb_vinfo
);
1229 bb
= BB_VINFO_BB (bb_vinfo
);
1230 gsi_bb_start
= gsi_after_labels (bb
);
1231 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1262 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1263 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type
))
1269 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1270 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1272 if (CONSTANT_CLASS_P (val
))
1273 val
= integer_zerop (val
) ? false_val
: true_val
;
1276 new_temp
= make_ssa_name (TREE_TYPE (type
));
1277 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1278 val
, true_val
, false_val
);
1279 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1283 else if (CONSTANT_CLASS_P (val
))
1284 val
= fold_convert (TREE_TYPE (type
), val
);
1287 new_temp
= make_ssa_name (TREE_TYPE (type
));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1289 init_stmt
= gimple_build_assign (new_temp
,
1290 fold_build1 (VIEW_CONVERT_EXPR
,
1294 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1295 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1299 val
= build_vector_from_val (type
, val
);
1302 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1303 init_stmt
= gimple_build_assign (new_temp
, val
);
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1314 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1318 stmt_vec_info def_stmt_info
= NULL
;
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def
:
1324 case vect_external_def
:
1325 /* Code should use vect_get_vec_def_for_operand. */
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def
:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1334 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1335 /* Get vectorized pattern statement. */
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1339 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1341 gcc_assert (vec_stmt
);
1342 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1343 vec_oprnd
= PHI_RESULT (vec_stmt
);
1344 else if (is_gimple_call (vec_stmt
))
1345 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1347 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def
:
1353 case vect_double_reduction_def
:
1354 case vect_nested_cycle
:
1355 /* Code should use get_initial_def_for_reduction. */
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def
:
1361 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1365 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1366 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1367 vec_oprnd
= PHI_RESULT (vec_stmt
);
1369 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1392 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1395 enum vect_def_type dt
;
1397 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1398 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1405 dump_printf (MSG_NOTE
, "\n");
1408 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1409 gcc_assert (is_simple_use
);
1410 if (def_stmt
&& dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1416 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1418 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1422 vector_type
= vectype
;
1423 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1425 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1427 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1429 gcc_assert (vector_type
);
1430 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1433 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1496 gimple
*vec_stmt_for_operand
;
1497 stmt_vec_info def_stmt_info
;
1499 /* Do nothing; can reuse same def. */
1500 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1503 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1504 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1505 gcc_assert (def_stmt_info
);
1506 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1507 gcc_assert (vec_stmt_for_operand
);
1508 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1509 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1521 vec
<tree
> *vec_oprnds0
,
1522 vec
<tree
> *vec_oprnds1
)
1524 tree vec_oprnd
= vec_oprnds0
->pop ();
1526 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1527 vec_oprnds0
->quick_push (vec_oprnd
);
1529 if (vec_oprnds1
&& vec_oprnds1
->length ())
1531 vec_oprnd
= vec_oprnds1
->pop ();
1532 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1533 vec_oprnds1
->quick_push (vec_oprnd
);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1543 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1544 vec
<tree
> *vec_oprnds0
,
1545 vec
<tree
> *vec_oprnds1
,
1546 slp_tree slp_node
, int reduc_index
)
1550 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1551 auto_vec
<tree
> ops (nops
);
1552 auto_vec
<vec
<tree
> > vec_defs (nops
);
1554 ops
.quick_push (op0
);
1556 ops
.quick_push (op1
);
1558 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1560 *vec_oprnds0
= vec_defs
[0];
1562 *vec_oprnds1
= vec_defs
[1];
1568 vec_oprnds0
->create (1);
1569 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1570 vec_oprnds0
->quick_push (vec_oprnd
);
1574 vec_oprnds1
->create (1);
1575 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1576 vec_oprnds1
->quick_push (vec_oprnd
);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1587 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1588 gimple_stmt_iterator
*gsi
)
1590 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1591 vec_info
*vinfo
= stmt_info
->vinfo
;
1593 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1595 if (!gsi_end_p (*gsi
)
1596 && gimple_has_mem_ops (vec_stmt
))
1598 gimple
*at_stmt
= gsi_stmt (*gsi
);
1599 tree vuse
= gimple_vuse (at_stmt
);
1600 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1602 tree vdef
= gimple_vdef (at_stmt
);
1603 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1609 && ((is_gimple_assign (vec_stmt
)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1611 || (is_gimple_call (vec_stmt
)
1612 && !(gimple_call_flags (vec_stmt
)
1613 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1615 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1616 gimple_set_vdef (vec_stmt
, new_vdef
);
1617 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1621 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1623 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1631 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1637 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1638 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1647 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1648 tree vectype_out
, tree vectype_in
)
1651 if (internal_fn_p (cfn
))
1652 ifn
= as_internal_fn (cfn
);
1654 ifn
= associated_internal_fn (fndecl
);
1655 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1657 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1658 if (info
.vectorizable
)
1660 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1661 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1662 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1663 OPTIMIZE_FOR_SPEED
))
1671 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1672 gimple_stmt_iterator
*);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1679 compare_step_with_zero (gimple
*stmt
)
1681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1682 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1684 if (loop_vinfo
&& nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), stmt
))
1685 step
= STMT_VINFO_DR_STEP (stmt_info
);
1687 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1688 return tree_int_cst_compare (step
, size_zero_node
);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1695 perm_mask_for_reverse (tree vectype
)
1700 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1701 sel
= XALLOCAVEC (unsigned char, nunits
);
1703 for (i
= 0; i
< nunits
; ++i
)
1704 sel
[i
] = nunits
- 1 - i
;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
1708 return vect_gen_perm_mask_checked (vectype
, sel
);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1721 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1722 vec_load_store_type vls_type
,
1723 vect_memory_access_type
*memory_access_type
)
1725 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1726 vec_info
*vinfo
= stmt_info
->vinfo
;
1727 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1728 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1729 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1730 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1731 bool single_element_p
= (stmt
== first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info
));
1733 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1734 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p
= false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1753 if (STMT_VINFO_STRIDED_P (stmt_info
))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits
% group_size
== 0)
1759 *memory_access_type
= VMAT_STRIDED_SLP
;
1761 *memory_access_type
= VMAT_ELEMENTWISE
;
1765 overrun_p
= loop_vinfo
&& gap
!= 0;
1766 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1773 /* If the access is aligned an overrun is fine. */
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
))))
1778 if (overrun_p
&& !can_overrun_p
)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1782 "Peeling for outer loop is not supported\n");
1785 *memory_access_type
= VMAT_CONTIGUOUS
;
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type
= VMAT_ELEMENTWISE
;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p
= (gap
!= 0);
1797 /* If the access is aligned an overrun is fine, but only if the
1798 overrun is not inside an unused vector (if the gap is as large
1799 or larger than a vector). */
1803 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
))))
1804 would_overrun_p
= false;
1805 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1806 && (can_overrun_p
|| !would_overrun_p
)
1807 && compare_step_with_zero (stmt
) > 0)
1809 /* First try using LOAD/STORE_LANES. */
1810 if (vls_type
== VLS_LOAD
1811 ? vect_load_lanes_supported (vectype
, group_size
)
1812 : vect_store_lanes_supported (vectype
, group_size
))
1814 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1815 overrun_p
= would_overrun_p
;
1818 /* If that fails, try using permuting loads. */
1819 if (*memory_access_type
== VMAT_ELEMENTWISE
1820 && (vls_type
== VLS_LOAD
1821 ? vect_grouped_load_supported (vectype
, single_element_p
,
1823 : vect_grouped_store_supported (vectype
, group_size
)))
1825 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1826 overrun_p
= would_overrun_p
;
1831 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1833 /* STMT is the leader of the group. Check the operands of all the
1834 stmts of the group. */
1835 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1838 gcc_assert (gimple_assign_single_p (next_stmt
));
1839 tree op
= gimple_assign_rhs1 (next_stmt
);
1841 enum vect_def_type dt
;
1842 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1846 "use not simple.\n");
1849 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1855 gcc_assert (can_overrun_p
);
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1858 "Data access with gaps requires scalar "
1860 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1866 /* A subroutine of get_load_store_type, with a subset of the same
1867 arguments. Handle the case where STMT is a load or store that
1868 accesses consecutive elements with a negative step. */
1870 static vect_memory_access_type
1871 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1872 vec_load_store_type vls_type
,
1873 unsigned int ncopies
)
1875 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1876 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1877 dr_alignment_support alignment_support_scheme
;
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1883 "multiple types with negative step.\n");
1884 return VMAT_ELEMENTWISE
;
1887 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1888 if (alignment_support_scheme
!= dr_aligned
1889 && alignment_support_scheme
!= dr_unaligned_supported
)
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1893 "negative step but alignment required.\n");
1894 return VMAT_ELEMENTWISE
;
1897 if (vls_type
== VLS_STORE_INVARIANT
)
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE
, vect_location
,
1901 "negative step with invariant source;"
1902 " no permute needed.\n");
1903 return VMAT_CONTIGUOUS_DOWN
;
1906 if (!perm_mask_for_reverse (vectype
))
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1910 "negative step and reversing not supported.\n");
1911 return VMAT_ELEMENTWISE
;
1914 return VMAT_CONTIGUOUS_REVERSE
;
1917 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1918 if there is a memory access type that the vectorized form can use,
1919 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1920 or scatters, fill in GS_INFO accordingly.
1922 SLP says whether we're performing SLP rather than loop vectorization.
1923 VECTYPE is the vector type that the vectorized statements will use.
1924 NCOPIES is the number of vector statements that will be needed. */
1927 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1928 vec_load_store_type vls_type
, unsigned int ncopies
,
1929 vect_memory_access_type
*memory_access_type
,
1930 gather_scatter_info
*gs_info
)
1932 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1933 vec_info
*vinfo
= stmt_info
->vinfo
;
1934 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1935 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1937 *memory_access_type
= VMAT_GATHER_SCATTER
;
1939 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1941 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1942 &gs_info
->offset_dt
,
1943 &gs_info
->offset_vectype
))
1945 if (dump_enabled_p ())
1946 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1947 "%s index use not simple.\n",
1948 vls_type
== VLS_LOAD
? "gather" : "scatter");
1952 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1954 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1955 memory_access_type
))
1958 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1961 *memory_access_type
= VMAT_ELEMENTWISE
;
1965 int cmp
= compare_step_with_zero (stmt
);
1967 *memory_access_type
= get_negative_load_store_type
1968 (stmt
, vectype
, vls_type
, ncopies
);
1971 gcc_assert (vls_type
== VLS_LOAD
);
1972 *memory_access_type
= VMAT_INVARIANT
;
1975 *memory_access_type
= VMAT_CONTIGUOUS
;
1978 /* FIXME: At the moment the cost model seems to underestimate the
1979 cost of using elementwise accesses. This check preserves the
1980 traditional behavior until that can be fixed. */
1981 if (*memory_access_type
== VMAT_ELEMENTWISE
1982 && !STMT_VINFO_STRIDED_P (stmt_info
))
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1986 "not falling back to elementwise accesses\n");
1992 /* Function vectorizable_mask_load_store.
1994 Check if STMT performs a conditional load or store that can be vectorized.
1995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1996 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2000 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2001 gimple
**vec_stmt
, slp_tree slp_node
)
2003 tree vec_dest
= NULL
;
2004 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2005 stmt_vec_info prev_stmt_info
;
2006 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2007 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2008 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2009 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2010 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2011 tree rhs_vectype
= NULL_TREE
;
2016 tree dataref_ptr
= NULL_TREE
;
2018 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2022 gather_scatter_info gs_info
;
2023 vec_load_store_type vls_type
;
2026 enum vect_def_type dt
;
2028 if (slp_node
!= NULL
)
2031 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2032 gcc_assert (ncopies
>= 1);
2034 mask
= gimple_call_arg (stmt
, 2);
2036 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2039 /* FORNOW. This restriction should be relaxed. */
2040 if (nested_in_vect_loop
&& ncopies
> 1)
2042 if (dump_enabled_p ())
2043 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2044 "multiple types in nested loop.");
2048 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2051 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2055 if (!STMT_VINFO_DATA_REF (stmt_info
))
2058 elem_type
= TREE_TYPE (vectype
);
2060 if (TREE_CODE (mask
) != SSA_NAME
)
2063 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2067 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2069 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2070 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2073 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2075 tree rhs
= gimple_call_arg (stmt
, 3);
2076 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2078 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2079 vls_type
= VLS_STORE_INVARIANT
;
2081 vls_type
= VLS_STORE
;
2084 vls_type
= VLS_LOAD
;
2086 vect_memory_access_type memory_access_type
;
2087 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2088 &memory_access_type
, &gs_info
))
2091 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2093 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2095 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2096 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2098 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2100 "masked gather with integer mask not supported.");
2104 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2108 "unsupported access type for masked %s.\n",
2109 vls_type
== VLS_LOAD
? "load" : "store");
2112 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2113 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2114 TYPE_MODE (mask_vectype
),
2115 vls_type
== VLS_LOAD
)
2117 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2120 if (!vec_stmt
) /* transformation not required. */
2122 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2123 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2124 if (vls_type
== VLS_LOAD
)
2125 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2128 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2129 dt
, NULL
, NULL
, NULL
);
2132 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2136 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2138 tree vec_oprnd0
= NULL_TREE
, op
;
2139 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2140 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2141 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2142 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2143 tree mask_perm_mask
= NULL_TREE
;
2144 edge pe
= loop_preheader_edge (loop
);
2147 enum { NARROW
, NONE
, WIDEN
} modifier
;
2148 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2150 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2151 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2152 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2153 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2154 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2155 scaletype
= TREE_VALUE (arglist
);
2156 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2157 && types_compatible_p (srctype
, masktype
));
2159 if (nunits
== gather_off_nunits
)
2161 else if (nunits
== gather_off_nunits
/ 2)
2163 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
2166 for (i
= 0; i
< gather_off_nunits
; ++i
)
2167 sel
[i
] = i
| nunits
;
2169 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2171 else if (nunits
== gather_off_nunits
* 2)
2173 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
2176 for (i
= 0; i
< nunits
; ++i
)
2177 sel
[i
] = i
< gather_off_nunits
2178 ? i
: i
+ nunits
- gather_off_nunits
;
2180 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2182 for (i
= 0; i
< nunits
; ++i
)
2183 sel
[i
] = i
| gather_off_nunits
;
2184 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2189 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2191 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2192 if (!is_gimple_min_invariant (ptr
))
2194 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2195 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2196 gcc_assert (!new_bb
);
2199 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2201 prev_stmt_info
= NULL
;
2202 for (j
= 0; j
< ncopies
; ++j
)
2204 if (modifier
== WIDEN
&& (j
& 1))
2205 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2206 perm_mask
, stmt
, gsi
);
2209 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2212 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2214 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2216 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2217 == TYPE_VECTOR_SUBPARTS (idxtype
));
2218 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2219 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2221 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2222 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2226 if (mask_perm_mask
&& (j
& 1))
2227 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2228 mask_perm_mask
, stmt
, gsi
);
2232 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2235 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2236 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2240 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2242 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2243 == TYPE_VECTOR_SUBPARTS (masktype
));
2244 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2245 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2247 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2248 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2254 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2257 if (!useless_type_conversion_p (vectype
, rettype
))
2259 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2260 == TYPE_VECTOR_SUBPARTS (rettype
));
2261 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2262 gimple_call_set_lhs (new_stmt
, op
);
2263 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2264 var
= make_ssa_name (vec_dest
);
2265 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2266 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2270 var
= make_ssa_name (vec_dest
, new_stmt
);
2271 gimple_call_set_lhs (new_stmt
, var
);
2274 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2276 if (modifier
== NARROW
)
2283 var
= permute_vec_elements (prev_res
, var
,
2284 perm_mask
, stmt
, gsi
);
2285 new_stmt
= SSA_NAME_DEF_STMT (var
);
2288 if (prev_stmt_info
== NULL
)
2289 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2291 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2292 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2295 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2297 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2299 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2300 stmt_info
= vinfo_for_stmt (stmt
);
2302 tree lhs
= gimple_call_lhs (stmt
);
2303 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2304 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2305 set_vinfo_for_stmt (stmt
, NULL
);
2306 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2307 gsi_replace (gsi
, new_stmt
, true);
2310 else if (vls_type
!= VLS_LOAD
)
2312 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2313 prev_stmt_info
= NULL
;
2314 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2315 for (i
= 0; i
< ncopies
; i
++)
2317 unsigned align
, misalign
;
2321 tree rhs
= gimple_call_arg (stmt
, 3);
2322 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2323 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2324 /* We should have catched mismatched types earlier. */
2325 gcc_assert (useless_type_conversion_p (vectype
,
2326 TREE_TYPE (vec_rhs
)));
2327 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2328 NULL_TREE
, &dummy
, gsi
,
2329 &ptr_incr
, false, &inv_p
);
2330 gcc_assert (!inv_p
);
2334 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2335 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2336 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2337 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2338 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2339 TYPE_SIZE_UNIT (vectype
));
2342 align
= TYPE_ALIGN_UNIT (vectype
);
2343 if (aligned_access_p (dr
))
2345 else if (DR_MISALIGNMENT (dr
) == -1)
2347 align
= TYPE_ALIGN_UNIT (elem_type
);
2351 misalign
= DR_MISALIGNMENT (dr
);
2352 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2354 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2355 misalign
? least_bit_hwi (misalign
) : align
);
2357 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2358 ptr
, vec_mask
, vec_rhs
);
2359 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2361 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2363 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2364 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2369 tree vec_mask
= NULL_TREE
;
2370 prev_stmt_info
= NULL
;
2371 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2372 for (i
= 0; i
< ncopies
; i
++)
2374 unsigned align
, misalign
;
2378 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2379 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2380 NULL_TREE
, &dummy
, gsi
,
2381 &ptr_incr
, false, &inv_p
);
2382 gcc_assert (!inv_p
);
2386 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2387 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2388 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2389 TYPE_SIZE_UNIT (vectype
));
2392 align
= TYPE_ALIGN_UNIT (vectype
);
2393 if (aligned_access_p (dr
))
2395 else if (DR_MISALIGNMENT (dr
) == -1)
2397 align
= TYPE_ALIGN_UNIT (elem_type
);
2401 misalign
= DR_MISALIGNMENT (dr
);
2402 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2404 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2405 misalign
? least_bit_hwi (misalign
) : align
);
2407 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2409 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2410 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2412 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2414 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2415 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2419 if (vls_type
== VLS_LOAD
)
2421 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2423 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2425 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2426 stmt_info
= vinfo_for_stmt (stmt
);
2428 tree lhs
= gimple_call_lhs (stmt
);
2429 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2430 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2431 set_vinfo_for_stmt (stmt
, NULL
);
2432 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2433 gsi_replace (gsi
, new_stmt
, true);
2439 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2442 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2443 gimple
**vec_stmt
, slp_tree slp_node
,
2444 tree vectype_in
, enum vect_def_type
*dt
)
2447 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2448 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2449 unsigned ncopies
, nunits
;
2451 op
= gimple_call_arg (stmt
, 0);
2452 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2453 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2455 /* Multiple types in SLP are handled by creating the appropriate number of
2456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2461 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2463 gcc_assert (ncopies
>= 1);
2465 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2470 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype
));
2471 unsigned char *elt
= elts
;
2472 unsigned word_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
) / nunits
;
2473 for (unsigned i
= 0; i
< nunits
; ++i
)
2474 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2475 *elt
++ = (i
+ 1) * word_bytes
- j
- 1;
2477 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, elts
))
2482 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2486 if (! PURE_SLP_STMT (stmt_info
))
2488 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2489 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2490 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2491 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2496 tree
*telts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (char_vectype
));
2497 for (unsigned i
= 0; i
< TYPE_VECTOR_SUBPARTS (char_vectype
); ++i
)
2498 telts
[i
] = build_int_cst (char_type_node
, elts
[i
]);
2499 tree bswap_vconst
= build_vector (char_vectype
, telts
);
2502 vec
<tree
> vec_oprnds
= vNULL
;
2503 gimple
*new_stmt
= NULL
;
2504 stmt_vec_info prev_stmt_info
= NULL
;
2505 for (unsigned j
= 0; j
< ncopies
; j
++)
2509 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2511 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2513 /* Arguments are ready. create the new vector stmt. */
2516 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2518 tree tem
= make_ssa_name (char_vectype
);
2519 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2520 char_vectype
, vop
));
2521 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2522 tree tem2
= make_ssa_name (char_vectype
);
2523 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2524 tem
, tem
, bswap_vconst
);
2525 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2526 tem
= make_ssa_name (vectype
);
2527 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2529 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2531 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2538 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2540 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2542 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2545 vec_oprnds
.release ();
2549 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2550 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2551 in a single step. On success, store the binary pack code in
2555 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2556 tree_code
*convert_code
)
2558 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2559 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2563 int multi_step_cvt
= 0;
2564 auto_vec
<tree
, 8> interm_types
;
2565 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2566 &code
, &multi_step_cvt
,
2571 *convert_code
= code
;
2575 /* Function vectorizable_call.
2577 Check if GS performs a function call that can be vectorized.
2578 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2579 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2580 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2583 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2590 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2591 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2592 tree vectype_out
, vectype_in
;
2595 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2596 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2597 vec_info
*vinfo
= stmt_info
->vinfo
;
2598 tree fndecl
, new_temp
, rhs_type
;
2600 enum vect_def_type dt
[3]
2601 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2602 gimple
*new_stmt
= NULL
;
2604 vec
<tree
> vargs
= vNULL
;
2605 enum { NARROW
, NONE
, WIDEN
} modifier
;
2609 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2612 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2616 /* Is GS a vectorizable call? */
2617 stmt
= dyn_cast
<gcall
*> (gs
);
2621 if (gimple_call_internal_p (stmt
)
2622 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2623 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2624 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2627 if (gimple_call_lhs (stmt
) == NULL_TREE
2628 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2631 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2633 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2635 /* Process function arguments. */
2636 rhs_type
= NULL_TREE
;
2637 vectype_in
= NULL_TREE
;
2638 nargs
= gimple_call_num_args (stmt
);
2640 /* Bail out if the function has more than three arguments, we do not have
2641 interesting builtin functions to vectorize with more than two arguments
2642 except for fma. No arguments is also not good. */
2643 if (nargs
== 0 || nargs
> 3)
2646 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2647 if (gimple_call_internal_p (stmt
)
2648 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2651 rhs_type
= unsigned_type_node
;
2654 for (i
= 0; i
< nargs
; i
++)
2658 op
= gimple_call_arg (stmt
, i
);
2660 /* We can only handle calls with arguments of the same type. */
2662 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2664 if (dump_enabled_p ())
2665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2666 "argument types differ.\n");
2670 rhs_type
= TREE_TYPE (op
);
2672 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2674 if (dump_enabled_p ())
2675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2676 "use not simple.\n");
2681 vectype_in
= opvectype
;
2683 && opvectype
!= vectype_in
)
2685 if (dump_enabled_p ())
2686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2687 "argument vector types differ.\n");
2691 /* If all arguments are external or constant defs use a vector type with
2692 the same size as the output vector type. */
2694 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2696 gcc_assert (vectype_in
);
2699 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2702 "no vectype for scalar type ");
2703 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2704 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2711 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2712 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2713 if (nunits_in
== nunits_out
/ 2)
2715 else if (nunits_out
== nunits_in
)
2717 else if (nunits_out
== nunits_in
/ 2)
2722 /* We only handle functions that do not read or clobber memory. */
2723 if (gimple_vuse (stmt
))
2725 if (dump_enabled_p ())
2726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2727 "function reads from or writes to memory.\n");
2731 /* For now, we only vectorize functions if a target specific builtin
2732 is available. TODO -- in some cases, it might be profitable to
2733 insert the calls for pieces of the vector, in order to be able
2734 to vectorize other operations in the loop. */
2736 internal_fn ifn
= IFN_LAST
;
2737 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2738 tree callee
= gimple_call_fndecl (stmt
);
2740 /* First try using an internal function. */
2741 tree_code convert_code
= ERROR_MARK
;
2743 && (modifier
== NONE
2744 || (modifier
== NARROW
2745 && simple_integer_narrowing (vectype_out
, vectype_in
,
2747 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2750 /* If that fails, try asking for a target-specific built-in function. */
2751 if (ifn
== IFN_LAST
)
2753 if (cfn
!= CFN_LAST
)
2754 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2755 (cfn
, vectype_out
, vectype_in
);
2757 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2758 (callee
, vectype_out
, vectype_in
);
2761 if (ifn
== IFN_LAST
&& !fndecl
)
2763 if (cfn
== CFN_GOMP_SIMD_LANE
2766 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2767 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2768 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2769 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2771 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2772 { 0, 1, 2, ... vf - 1 } vector. */
2773 gcc_assert (nargs
== 0);
2775 else if (modifier
== NONE
2776 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2777 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2778 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2779 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2785 "function is not vectorizable.\n");
2792 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2793 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2795 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2797 /* Sanity check: make sure that at least one copy of the vectorized stmt
2798 needs to be generated. */
2799 gcc_assert (ncopies
>= 1);
2801 if (!vec_stmt
) /* transformation not required. */
2803 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2804 if (dump_enabled_p ())
2805 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2807 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2808 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2809 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2810 vec_promote_demote
, stmt_info
, 0, vect_body
);
2817 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2821 scalar_dest
= gimple_call_lhs (stmt
);
2822 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2824 prev_stmt_info
= NULL
;
2825 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2827 tree prev_res
= NULL_TREE
;
2828 for (j
= 0; j
< ncopies
; ++j
)
2830 /* Build argument list for the vectorized call. */
2832 vargs
.create (nargs
);
2838 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2839 vec
<tree
> vec_oprnds0
;
2841 for (i
= 0; i
< nargs
; i
++)
2842 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2843 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2844 vec_oprnds0
= vec_defs
[0];
2846 /* Arguments are ready. Create the new vector stmt. */
2847 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2850 for (k
= 0; k
< nargs
; k
++)
2852 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2853 vargs
[k
] = vec_oprndsk
[i
];
2855 if (modifier
== NARROW
)
2857 tree half_res
= make_ssa_name (vectype_in
);
2858 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2859 gimple_call_set_lhs (new_stmt
, half_res
);
2860 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2863 prev_res
= half_res
;
2866 new_temp
= make_ssa_name (vec_dest
);
2867 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2868 prev_res
, half_res
);
2872 if (ifn
!= IFN_LAST
)
2873 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2875 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2876 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2877 gimple_call_set_lhs (new_stmt
, new_temp
);
2879 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2880 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2883 for (i
= 0; i
< nargs
; i
++)
2885 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2886 vec_oprndsi
.release ();
2891 for (i
= 0; i
< nargs
; i
++)
2893 op
= gimple_call_arg (stmt
, i
);
2896 = vect_get_vec_def_for_operand (op
, stmt
);
2899 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2901 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2904 vargs
.quick_push (vec_oprnd0
);
2907 if (gimple_call_internal_p (stmt
)
2908 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2910 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2912 for (k
= 0; k
< nunits_out
; ++k
)
2913 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2914 tree cst
= build_vector (vectype_out
, v
);
2916 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2917 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2918 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2919 new_temp
= make_ssa_name (vec_dest
);
2920 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2922 else if (modifier
== NARROW
)
2924 tree half_res
= make_ssa_name (vectype_in
);
2925 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2926 gimple_call_set_lhs (new_stmt
, half_res
);
2927 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2930 prev_res
= half_res
;
2933 new_temp
= make_ssa_name (vec_dest
);
2934 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2935 prev_res
, half_res
);
2939 if (ifn
!= IFN_LAST
)
2940 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2942 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2943 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2944 gimple_call_set_lhs (new_stmt
, new_temp
);
2946 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2948 if (j
== (modifier
== NARROW
? 1 : 0))
2949 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2953 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2956 else if (modifier
== NARROW
)
2958 for (j
= 0; j
< ncopies
; ++j
)
2960 /* Build argument list for the vectorized call. */
2962 vargs
.create (nargs
* 2);
2968 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2969 vec
<tree
> vec_oprnds0
;
2971 for (i
= 0; i
< nargs
; i
++)
2972 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2973 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2974 vec_oprnds0
= vec_defs
[0];
2976 /* Arguments are ready. Create the new vector stmt. */
2977 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2981 for (k
= 0; k
< nargs
; k
++)
2983 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2984 vargs
.quick_push (vec_oprndsk
[i
]);
2985 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2987 if (ifn
!= IFN_LAST
)
2988 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2990 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2991 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2992 gimple_call_set_lhs (new_stmt
, new_temp
);
2993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2994 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2997 for (i
= 0; i
< nargs
; i
++)
2999 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3000 vec_oprndsi
.release ();
3005 for (i
= 0; i
< nargs
; i
++)
3007 op
= gimple_call_arg (stmt
, i
);
3011 = vect_get_vec_def_for_operand (op
, stmt
);
3013 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3017 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3019 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3021 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3024 vargs
.quick_push (vec_oprnd0
);
3025 vargs
.quick_push (vec_oprnd1
);
3028 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3029 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3030 gimple_call_set_lhs (new_stmt
, new_temp
);
3031 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3034 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3036 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3038 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3041 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3044 /* No current target implements this case. */
3049 /* The call in STMT might prevent it from being removed in dce.
3050 We however cannot remove it here, due to the way the ssa name
3051 it defines is mapped to the new definition. So just replace
3052 rhs of the statement with something harmless. */
3057 type
= TREE_TYPE (scalar_dest
);
3058 if (is_pattern_stmt_p (stmt_info
))
3059 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3061 lhs
= gimple_call_lhs (stmt
);
3063 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3064 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3065 set_vinfo_for_stmt (stmt
, NULL
);
3066 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3067 gsi_replace (gsi
, new_stmt
, false);
3073 struct simd_call_arg_info
3077 HOST_WIDE_INT linear_step
;
3078 enum vect_def_type dt
;
3080 bool simd_lane_linear
;
3083 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3084 is linear within simd lane (but not within whole loop), note it in
3088 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3089 struct simd_call_arg_info
*arginfo
)
3091 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3093 if (!is_gimple_assign (def_stmt
)
3094 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3095 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3098 tree base
= gimple_assign_rhs1 (def_stmt
);
3099 HOST_WIDE_INT linear_step
= 0;
3100 tree v
= gimple_assign_rhs2 (def_stmt
);
3101 while (TREE_CODE (v
) == SSA_NAME
)
3104 def_stmt
= SSA_NAME_DEF_STMT (v
);
3105 if (is_gimple_assign (def_stmt
))
3106 switch (gimple_assign_rhs_code (def_stmt
))
3109 t
= gimple_assign_rhs2 (def_stmt
);
3110 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3112 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3113 v
= gimple_assign_rhs1 (def_stmt
);
3116 t
= gimple_assign_rhs2 (def_stmt
);
3117 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3119 linear_step
= tree_to_shwi (t
);
3120 v
= gimple_assign_rhs1 (def_stmt
);
3123 t
= gimple_assign_rhs1 (def_stmt
);
3124 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3125 || (TYPE_PRECISION (TREE_TYPE (v
))
3126 < TYPE_PRECISION (TREE_TYPE (t
))))
3135 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3137 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3138 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3143 arginfo
->linear_step
= linear_step
;
3145 arginfo
->simd_lane_linear
= true;
3151 /* Function vectorizable_simd_clone_call.
3153 Check if STMT performs a function call that can be vectorized
3154 by calling a simd clone of the function.
3155 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3156 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3157 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3160 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3161 gimple
**vec_stmt
, slp_tree slp_node
)
3166 tree vec_oprnd0
= NULL_TREE
;
3167 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3169 unsigned int nunits
;
3170 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3171 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3172 vec_info
*vinfo
= stmt_info
->vinfo
;
3173 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3174 tree fndecl
, new_temp
;
3176 gimple
*new_stmt
= NULL
;
3178 auto_vec
<simd_call_arg_info
> arginfo
;
3179 vec
<tree
> vargs
= vNULL
;
3181 tree lhs
, rtype
, ratype
;
3182 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3184 /* Is STMT a vectorizable call? */
3185 if (!is_gimple_call (stmt
))
3188 fndecl
= gimple_call_fndecl (stmt
);
3189 if (fndecl
== NULL_TREE
)
3192 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3193 if (node
== NULL
|| node
->simd_clones
== NULL
)
3196 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3199 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3203 if (gimple_call_lhs (stmt
)
3204 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3207 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3209 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3211 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3218 /* Process function arguments. */
3219 nargs
= gimple_call_num_args (stmt
);
3221 /* Bail out if the function has zero arguments. */
3225 arginfo
.reserve (nargs
, true);
3227 for (i
= 0; i
< nargs
; i
++)
3229 simd_call_arg_info thisarginfo
;
3232 thisarginfo
.linear_step
= 0;
3233 thisarginfo
.align
= 0;
3234 thisarginfo
.op
= NULL_TREE
;
3235 thisarginfo
.simd_lane_linear
= false;
3237 op
= gimple_call_arg (stmt
, i
);
3238 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3239 &thisarginfo
.vectype
)
3240 || thisarginfo
.dt
== vect_uninitialized_def
)
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3244 "use not simple.\n");
3248 if (thisarginfo
.dt
== vect_constant_def
3249 || thisarginfo
.dt
== vect_external_def
)
3250 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3252 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3254 /* For linear arguments, the analyze phase should have saved
3255 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3256 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3257 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3259 gcc_assert (vec_stmt
);
3260 thisarginfo
.linear_step
3261 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3263 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3264 thisarginfo
.simd_lane_linear
3265 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3266 == boolean_true_node
);
3267 /* If loop has been peeled for alignment, we need to adjust it. */
3268 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3269 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3270 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3272 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3273 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3274 tree opt
= TREE_TYPE (thisarginfo
.op
);
3275 bias
= fold_convert (TREE_TYPE (step
), bias
);
3276 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3278 = fold_build2 (POINTER_TYPE_P (opt
)
3279 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3280 thisarginfo
.op
, bias
);
3284 && thisarginfo
.dt
!= vect_constant_def
3285 && thisarginfo
.dt
!= vect_external_def
3287 && TREE_CODE (op
) == SSA_NAME
3288 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3290 && tree_fits_shwi_p (iv
.step
))
3292 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3293 thisarginfo
.op
= iv
.base
;
3295 else if ((thisarginfo
.dt
== vect_constant_def
3296 || thisarginfo
.dt
== vect_external_def
)
3297 && POINTER_TYPE_P (TREE_TYPE (op
)))
3298 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3299 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3301 if (POINTER_TYPE_P (TREE_TYPE (op
))
3302 && !thisarginfo
.linear_step
3304 && thisarginfo
.dt
!= vect_constant_def
3305 && thisarginfo
.dt
!= vect_external_def
3308 && TREE_CODE (op
) == SSA_NAME
)
3309 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3311 arginfo
.quick_push (thisarginfo
);
3314 unsigned int badness
= 0;
3315 struct cgraph_node
*bestn
= NULL
;
3316 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3317 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3319 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3320 n
= n
->simdclone
->next_clone
)
3322 unsigned int this_badness
= 0;
3323 if (n
->simdclone
->simdlen
3324 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3325 || n
->simdclone
->nargs
!= nargs
)
3327 if (n
->simdclone
->simdlen
3328 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3329 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3330 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3331 if (n
->simdclone
->inbranch
)
3332 this_badness
+= 2048;
3333 int target_badness
= targetm
.simd_clone
.usable (n
);
3334 if (target_badness
< 0)
3336 this_badness
+= target_badness
* 512;
3337 /* FORNOW: Have to add code to add the mask argument. */
3338 if (n
->simdclone
->inbranch
)
3340 for (i
= 0; i
< nargs
; i
++)
3342 switch (n
->simdclone
->args
[i
].arg_type
)
3344 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3345 if (!useless_type_conversion_p
3346 (n
->simdclone
->args
[i
].orig_type
,
3347 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3349 else if (arginfo
[i
].dt
== vect_constant_def
3350 || arginfo
[i
].dt
== vect_external_def
3351 || arginfo
[i
].linear_step
)
3354 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3355 if (arginfo
[i
].dt
!= vect_constant_def
3356 && arginfo
[i
].dt
!= vect_external_def
)
3359 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3360 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3361 if (arginfo
[i
].dt
== vect_constant_def
3362 || arginfo
[i
].dt
== vect_external_def
3363 || (arginfo
[i
].linear_step
3364 != n
->simdclone
->args
[i
].linear_step
))
3367 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3368 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3369 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3371 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3372 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3376 case SIMD_CLONE_ARG_TYPE_MASK
:
3379 if (i
== (size_t) -1)
3381 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3386 if (arginfo
[i
].align
)
3387 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3388 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3390 if (i
== (size_t) -1)
3392 if (bestn
== NULL
|| this_badness
< badness
)
3395 badness
= this_badness
;
3402 for (i
= 0; i
< nargs
; i
++)
3403 if ((arginfo
[i
].dt
== vect_constant_def
3404 || arginfo
[i
].dt
== vect_external_def
)
3405 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3408 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3410 if (arginfo
[i
].vectype
== NULL
3411 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3412 > bestn
->simdclone
->simdlen
))
3416 fndecl
= bestn
->decl
;
3417 nunits
= bestn
->simdclone
->simdlen
;
3418 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3420 /* If the function isn't const, only allow it in simd loops where user
3421 has asserted that at least nunits consecutive iterations can be
3422 performed using SIMD instructions. */
3423 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3424 && gimple_vuse (stmt
))
3427 /* Sanity check: make sure that at least one copy of the vectorized stmt
3428 needs to be generated. */
3429 gcc_assert (ncopies
>= 1);
3431 if (!vec_stmt
) /* transformation not required. */
3433 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3434 for (i
= 0; i
< nargs
; i
++)
3435 if ((bestn
->simdclone
->args
[i
].arg_type
3436 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3437 || (bestn
->simdclone
->args
[i
].arg_type
3438 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3440 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3443 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3444 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3445 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3446 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3447 tree sll
= arginfo
[i
].simd_lane_linear
3448 ? boolean_true_node
: boolean_false_node
;
3449 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3451 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE
, vect_location
,
3454 "=== vectorizable_simd_clone_call ===\n");
3455 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3461 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3465 scalar_dest
= gimple_call_lhs (stmt
);
3466 vec_dest
= NULL_TREE
;
3471 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3472 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3473 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3476 rtype
= TREE_TYPE (ratype
);
3480 prev_stmt_info
= NULL
;
3481 for (j
= 0; j
< ncopies
; ++j
)
3483 /* Build argument list for the vectorized call. */
3485 vargs
.create (nargs
);
3489 for (i
= 0; i
< nargs
; i
++)
3491 unsigned int k
, l
, m
, o
;
3493 op
= gimple_call_arg (stmt
, i
);
3494 switch (bestn
->simdclone
->args
[i
].arg_type
)
3496 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3497 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3498 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3499 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3501 if (TYPE_VECTOR_SUBPARTS (atype
)
3502 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3504 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3505 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3506 / TYPE_VECTOR_SUBPARTS (atype
));
3507 gcc_assert ((k
& (k
- 1)) == 0);
3510 = vect_get_vec_def_for_operand (op
, stmt
);
3513 vec_oprnd0
= arginfo
[i
].op
;
3514 if ((m
& (k
- 1)) == 0)
3516 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3519 arginfo
[i
].op
= vec_oprnd0
;
3521 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3523 bitsize_int ((m
& (k
- 1)) * prec
));
3525 = gimple_build_assign (make_ssa_name (atype
),
3527 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3528 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3532 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3533 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3534 gcc_assert ((k
& (k
- 1)) == 0);
3535 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3537 vec_alloc (ctor_elts
, k
);
3540 for (l
= 0; l
< k
; l
++)
3542 if (m
== 0 && l
== 0)
3544 = vect_get_vec_def_for_operand (op
, stmt
);
3547 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3549 arginfo
[i
].op
= vec_oprnd0
;
3552 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3556 vargs
.safe_push (vec_oprnd0
);
3559 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3561 = gimple_build_assign (make_ssa_name (atype
),
3563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3564 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3569 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3570 vargs
.safe_push (op
);
3572 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3573 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3578 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3583 edge pe
= loop_preheader_edge (loop
);
3584 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3585 gcc_assert (!new_bb
);
3587 if (arginfo
[i
].simd_lane_linear
)
3589 vargs
.safe_push (arginfo
[i
].op
);
3592 tree phi_res
= copy_ssa_name (op
);
3593 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3594 set_vinfo_for_stmt (new_phi
,
3595 new_stmt_vec_info (new_phi
, loop_vinfo
));
3596 add_phi_arg (new_phi
, arginfo
[i
].op
,
3597 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3599 = POINTER_TYPE_P (TREE_TYPE (op
))
3600 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3601 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3602 ? sizetype
: TREE_TYPE (op
);
3604 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3606 tree tcst
= wide_int_to_tree (type
, cst
);
3607 tree phi_arg
= copy_ssa_name (op
);
3609 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3610 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3611 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3612 set_vinfo_for_stmt (new_stmt
,
3613 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3614 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3616 arginfo
[i
].op
= phi_res
;
3617 vargs
.safe_push (phi_res
);
3622 = POINTER_TYPE_P (TREE_TYPE (op
))
3623 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3624 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3625 ? sizetype
: TREE_TYPE (op
);
3627 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3629 tree tcst
= wide_int_to_tree (type
, cst
);
3630 new_temp
= make_ssa_name (TREE_TYPE (op
));
3631 new_stmt
= gimple_build_assign (new_temp
, code
,
3632 arginfo
[i
].op
, tcst
);
3633 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3634 vargs
.safe_push (new_temp
);
3637 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3639 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3640 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3641 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3642 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3648 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3651 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3653 new_temp
= create_tmp_var (ratype
);
3654 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3655 == TYPE_VECTOR_SUBPARTS (rtype
))
3656 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3658 new_temp
= make_ssa_name (rtype
, new_stmt
);
3659 gimple_call_set_lhs (new_stmt
, new_temp
);
3661 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3665 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3668 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3669 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3670 gcc_assert ((k
& (k
- 1)) == 0);
3671 for (l
= 0; l
< k
; l
++)
3676 t
= build_fold_addr_expr (new_temp
);
3677 t
= build2 (MEM_REF
, vectype
, t
,
3678 build_int_cst (TREE_TYPE (t
),
3679 l
* prec
/ BITS_PER_UNIT
));
3682 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3683 size_int (prec
), bitsize_int (l
* prec
));
3685 = gimple_build_assign (make_ssa_name (vectype
), t
);
3686 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3687 if (j
== 0 && l
== 0)
3688 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3690 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3692 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3697 tree clobber
= build_constructor (ratype
, NULL
);
3698 TREE_THIS_VOLATILE (clobber
) = 1;
3699 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3700 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3706 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3707 / TYPE_VECTOR_SUBPARTS (rtype
));
3708 gcc_assert ((k
& (k
- 1)) == 0);
3709 if ((j
& (k
- 1)) == 0)
3710 vec_alloc (ret_ctor_elts
, k
);
3713 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3714 for (m
= 0; m
< o
; m
++)
3716 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3717 size_int (m
), NULL_TREE
, NULL_TREE
);
3719 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3721 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3722 gimple_assign_lhs (new_stmt
));
3724 tree clobber
= build_constructor (ratype
, NULL
);
3725 TREE_THIS_VOLATILE (clobber
) = 1;
3726 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3727 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3730 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3731 if ((j
& (k
- 1)) != k
- 1)
3733 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3735 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3736 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3738 if ((unsigned) j
== k
- 1)
3739 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3741 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3743 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3748 tree t
= build_fold_addr_expr (new_temp
);
3749 t
= build2 (MEM_REF
, vectype
, t
,
3750 build_int_cst (TREE_TYPE (t
), 0));
3752 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3753 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3754 tree clobber
= build_constructor (ratype
, NULL
);
3755 TREE_THIS_VOLATILE (clobber
) = 1;
3756 vect_finish_stmt_generation (stmt
,
3757 gimple_build_assign (new_temp
,
3763 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3765 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3767 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3772 /* The call in STMT might prevent it from being removed in dce.
3773 We however cannot remove it here, due to the way the ssa name
3774 it defines is mapped to the new definition. So just replace
3775 rhs of the statement with something harmless. */
3782 type
= TREE_TYPE (scalar_dest
);
3783 if (is_pattern_stmt_p (stmt_info
))
3784 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3786 lhs
= gimple_call_lhs (stmt
);
3787 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3790 new_stmt
= gimple_build_nop ();
3791 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3792 set_vinfo_for_stmt (stmt
, NULL
);
3793 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3794 gsi_replace (gsi
, new_stmt
, true);
3795 unlink_stmt_vdef (stmt
);
3801 /* Function vect_gen_widened_results_half
3803 Create a vector stmt whose code, type, number of arguments, and result
3804 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3805 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3806 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3807 needs to be created (DECL is a function-decl of a target-builtin).
3808 STMT is the original scalar stmt that we are vectorizing. */
3811 vect_gen_widened_results_half (enum tree_code code
,
3813 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3814 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3820 /* Generate half of the widened result: */
3821 if (code
== CALL_EXPR
)
3823 /* Target specific support */
3824 if (op_type
== binary_op
)
3825 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3827 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3828 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3829 gimple_call_set_lhs (new_stmt
, new_temp
);
3833 /* Generic support */
3834 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3835 if (op_type
!= binary_op
)
3837 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3838 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3839 gimple_assign_set_lhs (new_stmt
, new_temp
);
3841 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3847 /* Get vectorized definitions for loop-based vectorization. For the first
3848 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3849 scalar operand), and for the rest we get a copy with
3850 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3851 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3852 The vectors are collected into VEC_OPRNDS. */
3855 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3856 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3860 /* Get first vector operand. */
3861 /* All the vector operands except the very first one (that is scalar oprnd)
3863 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3864 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3866 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3868 vec_oprnds
->quick_push (vec_oprnd
);
3870 /* Get second vector operand. */
3871 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3872 vec_oprnds
->quick_push (vec_oprnd
);
3876 /* For conversion in multiple steps, continue to get operands
3879 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3883 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3884 For multi-step conversions store the resulting vectors and call the function
3888 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3889 int multi_step_cvt
, gimple
*stmt
,
3891 gimple_stmt_iterator
*gsi
,
3892 slp_tree slp_node
, enum tree_code code
,
3893 stmt_vec_info
*prev_stmt_info
)
3896 tree vop0
, vop1
, new_tmp
, vec_dest
;
3898 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3900 vec_dest
= vec_dsts
.pop ();
3902 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3904 /* Create demotion operation. */
3905 vop0
= (*vec_oprnds
)[i
];
3906 vop1
= (*vec_oprnds
)[i
+ 1];
3907 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3908 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3909 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3910 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3913 /* Store the resulting vector for next recursive call. */
3914 (*vec_oprnds
)[i
/2] = new_tmp
;
3917 /* This is the last step of the conversion sequence. Store the
3918 vectors in SLP_NODE or in vector info of the scalar statement
3919 (or in STMT_VINFO_RELATED_STMT chain). */
3921 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3924 if (!*prev_stmt_info
)
3925 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3927 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3929 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3934 /* For multi-step demotion operations we first generate demotion operations
3935 from the source type to the intermediate types, and then combine the
3936 results (stored in VEC_OPRNDS) in demotion operation to the destination
3940 /* At each level of recursion we have half of the operands we had at the
3942 vec_oprnds
->truncate ((i
+1)/2);
3943 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3944 stmt
, vec_dsts
, gsi
, slp_node
,
3945 VEC_PACK_TRUNC_EXPR
,
3949 vec_dsts
.quick_push (vec_dest
);
3953 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3954 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3955 the resulting vectors and call the function recursively. */
3958 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3959 vec
<tree
> *vec_oprnds1
,
3960 gimple
*stmt
, tree vec_dest
,
3961 gimple_stmt_iterator
*gsi
,
3962 enum tree_code code1
,
3963 enum tree_code code2
, tree decl1
,
3964 tree decl2
, int op_type
)
3967 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3968 gimple
*new_stmt1
, *new_stmt2
;
3969 vec
<tree
> vec_tmp
= vNULL
;
3971 vec_tmp
.create (vec_oprnds0
->length () * 2);
3972 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3974 if (op_type
== binary_op
)
3975 vop1
= (*vec_oprnds1
)[i
];
3979 /* Generate the two halves of promotion operation. */
3980 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3981 op_type
, vec_dest
, gsi
, stmt
);
3982 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3983 op_type
, vec_dest
, gsi
, stmt
);
3984 if (is_gimple_call (new_stmt1
))
3986 new_tmp1
= gimple_call_lhs (new_stmt1
);
3987 new_tmp2
= gimple_call_lhs (new_stmt2
);
3991 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3992 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3995 /* Store the results for the next step. */
3996 vec_tmp
.quick_push (new_tmp1
);
3997 vec_tmp
.quick_push (new_tmp2
);
4000 vec_oprnds0
->release ();
4001 *vec_oprnds0
= vec_tmp
;
4005 /* Check if STMT performs a conversion operation, that can be vectorized.
4006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4007 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4011 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4012 gimple
**vec_stmt
, slp_tree slp_node
)
4016 tree op0
, op1
= NULL_TREE
;
4017 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4019 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4020 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4021 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4022 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4025 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4026 gimple
*new_stmt
= NULL
;
4027 stmt_vec_info prev_stmt_info
;
4030 tree vectype_out
, vectype_in
;
4032 tree lhs_type
, rhs_type
;
4033 enum { NARROW
, NONE
, WIDEN
} modifier
;
4034 vec
<tree
> vec_oprnds0
= vNULL
;
4035 vec
<tree
> vec_oprnds1
= vNULL
;
4037 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4038 vec_info
*vinfo
= stmt_info
->vinfo
;
4039 int multi_step_cvt
= 0;
4040 vec
<tree
> interm_types
= vNULL
;
4041 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4043 machine_mode rhs_mode
;
4044 unsigned short fltsz
;
4046 /* Is STMT a vectorizable conversion? */
4048 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4051 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4055 if (!is_gimple_assign (stmt
))
4058 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4061 code
= gimple_assign_rhs_code (stmt
);
4062 if (!CONVERT_EXPR_CODE_P (code
)
4063 && code
!= FIX_TRUNC_EXPR
4064 && code
!= FLOAT_EXPR
4065 && code
!= WIDEN_MULT_EXPR
4066 && code
!= WIDEN_LSHIFT_EXPR
)
4069 op_type
= TREE_CODE_LENGTH (code
);
4071 /* Check types of lhs and rhs. */
4072 scalar_dest
= gimple_assign_lhs (stmt
);
4073 lhs_type
= TREE_TYPE (scalar_dest
);
4074 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4076 op0
= gimple_assign_rhs1 (stmt
);
4077 rhs_type
= TREE_TYPE (op0
);
4079 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4080 && !((INTEGRAL_TYPE_P (lhs_type
)
4081 && INTEGRAL_TYPE_P (rhs_type
))
4082 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4083 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4086 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4087 && ((INTEGRAL_TYPE_P (lhs_type
)
4088 && (TYPE_PRECISION (lhs_type
)
4089 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
4090 || (INTEGRAL_TYPE_P (rhs_type
)
4091 && (TYPE_PRECISION (rhs_type
)
4092 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
4094 if (dump_enabled_p ())
4095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4096 "type conversion to/from bit-precision unsupported."
4101 /* Check the operands of the operation. */
4102 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4104 if (dump_enabled_p ())
4105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4106 "use not simple.\n");
4109 if (op_type
== binary_op
)
4113 op1
= gimple_assign_rhs2 (stmt
);
4114 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4115 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4117 if (CONSTANT_CLASS_P (op0
))
4118 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4120 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4124 if (dump_enabled_p ())
4125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4126 "use not simple.\n");
4131 /* If op0 is an external or constant defs use a vector type of
4132 the same size as the output vector type. */
4134 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4136 gcc_assert (vectype_in
);
4139 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4142 "no vectype for scalar type ");
4143 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4144 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4150 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4151 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4153 if (dump_enabled_p ())
4155 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4156 "can't convert between boolean and non "
4158 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4159 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4165 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4166 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4167 if (nunits_in
< nunits_out
)
4169 else if (nunits_out
== nunits_in
)
4174 /* Multiple types in SLP are handled by creating the appropriate number of
4175 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4179 else if (modifier
== NARROW
)
4180 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4182 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4184 /* Sanity check: make sure that at least one copy of the vectorized stmt
4185 needs to be generated. */
4186 gcc_assert (ncopies
>= 1);
4188 /* Supportable by target? */
4192 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4194 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4201 "conversion not supported by target.\n");
4205 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4206 &code1
, &code2
, &multi_step_cvt
,
4209 /* Binary widening operation can only be supported directly by the
4211 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4215 if (code
!= FLOAT_EXPR
4216 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4217 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4220 rhs_mode
= TYPE_MODE (rhs_type
);
4221 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
4222 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
4223 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
4224 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
4227 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4228 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4229 if (cvt_type
== NULL_TREE
)
4232 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4234 if (!supportable_convert_operation (code
, vectype_out
,
4235 cvt_type
, &decl1
, &codecvt1
))
4238 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4239 cvt_type
, &codecvt1
,
4240 &codecvt2
, &multi_step_cvt
,
4244 gcc_assert (multi_step_cvt
== 0);
4246 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4247 vectype_in
, &code1
, &code2
,
4248 &multi_step_cvt
, &interm_types
))
4252 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
4255 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4256 codecvt2
= ERROR_MARK
;
4260 interm_types
.safe_push (cvt_type
);
4261 cvt_type
= NULL_TREE
;
4266 gcc_assert (op_type
== unary_op
);
4267 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4268 &code1
, &multi_step_cvt
,
4272 if (code
!= FIX_TRUNC_EXPR
4273 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4274 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4277 rhs_mode
= TYPE_MODE (rhs_type
);
4279 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4280 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4281 if (cvt_type
== NULL_TREE
)
4283 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4286 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4287 &code1
, &multi_step_cvt
,
4296 if (!vec_stmt
) /* transformation not required. */
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_NOTE
, vect_location
,
4300 "=== vectorizable_conversion ===\n");
4301 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4303 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4304 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4306 else if (modifier
== NARROW
)
4308 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4309 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4313 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4314 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4316 interm_types
.release ();
4321 if (dump_enabled_p ())
4322 dump_printf_loc (MSG_NOTE
, vect_location
,
4323 "transform conversion. ncopies = %d.\n", ncopies
);
4325 if (op_type
== binary_op
)
4327 if (CONSTANT_CLASS_P (op0
))
4328 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4329 else if (CONSTANT_CLASS_P (op1
))
4330 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4333 /* In case of multi-step conversion, we first generate conversion operations
4334 to the intermediate types, and then from that types to the final one.
4335 We create vector destinations for the intermediate type (TYPES) received
4336 from supportable_*_operation, and store them in the correct order
4337 for future use in vect_create_vectorized_*_stmts (). */
4338 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4339 vec_dest
= vect_create_destination_var (scalar_dest
,
4340 (cvt_type
&& modifier
== WIDEN
)
4341 ? cvt_type
: vectype_out
);
4342 vec_dsts
.quick_push (vec_dest
);
4346 for (i
= interm_types
.length () - 1;
4347 interm_types
.iterate (i
, &intermediate_type
); i
--)
4349 vec_dest
= vect_create_destination_var (scalar_dest
,
4351 vec_dsts
.quick_push (vec_dest
);
4356 vec_dest
= vect_create_destination_var (scalar_dest
,
4358 ? vectype_out
: cvt_type
);
4362 if (modifier
== WIDEN
)
4364 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4365 if (op_type
== binary_op
)
4366 vec_oprnds1
.create (1);
4368 else if (modifier
== NARROW
)
4369 vec_oprnds0
.create (
4370 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4372 else if (code
== WIDEN_LSHIFT_EXPR
)
4373 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4376 prev_stmt_info
= NULL
;
4380 for (j
= 0; j
< ncopies
; j
++)
4383 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4386 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4388 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4390 /* Arguments are ready, create the new vector stmt. */
4391 if (code1
== CALL_EXPR
)
4393 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4394 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4395 gimple_call_set_lhs (new_stmt
, new_temp
);
4399 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4400 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4401 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4402 gimple_assign_set_lhs (new_stmt
, new_temp
);
4405 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4407 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4410 if (!prev_stmt_info
)
4411 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4413 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4414 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4421 /* In case the vectorization factor (VF) is bigger than the number
4422 of elements that we can fit in a vectype (nunits), we have to
4423 generate more than one vector stmt - i.e - we need to "unroll"
4424 the vector stmt by a factor VF/nunits. */
4425 for (j
= 0; j
< ncopies
; j
++)
4432 if (code
== WIDEN_LSHIFT_EXPR
)
4437 /* Store vec_oprnd1 for every vector stmt to be created
4438 for SLP_NODE. We check during the analysis that all
4439 the shift arguments are the same. */
4440 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4441 vec_oprnds1
.quick_push (vec_oprnd1
);
4443 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4447 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4448 &vec_oprnds1
, slp_node
, -1);
4452 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4453 vec_oprnds0
.quick_push (vec_oprnd0
);
4454 if (op_type
== binary_op
)
4456 if (code
== WIDEN_LSHIFT_EXPR
)
4459 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4460 vec_oprnds1
.quick_push (vec_oprnd1
);
4466 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4467 vec_oprnds0
.truncate (0);
4468 vec_oprnds0
.quick_push (vec_oprnd0
);
4469 if (op_type
== binary_op
)
4471 if (code
== WIDEN_LSHIFT_EXPR
)
4474 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4476 vec_oprnds1
.truncate (0);
4477 vec_oprnds1
.quick_push (vec_oprnd1
);
4481 /* Arguments are ready. Create the new vector stmts. */
4482 for (i
= multi_step_cvt
; i
>= 0; i
--)
4484 tree this_dest
= vec_dsts
[i
];
4485 enum tree_code c1
= code1
, c2
= code2
;
4486 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4491 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4493 stmt
, this_dest
, gsi
,
4494 c1
, c2
, decl1
, decl2
,
4498 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4502 if (codecvt1
== CALL_EXPR
)
4504 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4505 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4506 gimple_call_set_lhs (new_stmt
, new_temp
);
4510 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4511 new_temp
= make_ssa_name (vec_dest
);
4512 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4516 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4519 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4522 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4525 if (!prev_stmt_info
)
4526 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4528 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4529 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4534 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4538 /* In case the vectorization factor (VF) is bigger than the number
4539 of elements that we can fit in a vectype (nunits), we have to
4540 generate more than one vector stmt - i.e - we need to "unroll"
4541 the vector stmt by a factor VF/nunits. */
4542 for (j
= 0; j
< ncopies
; j
++)
4546 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4550 vec_oprnds0
.truncate (0);
4551 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4552 vect_pow2 (multi_step_cvt
) - 1);
4555 /* Arguments are ready. Create the new vector stmts. */
4557 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4559 if (codecvt1
== CALL_EXPR
)
4561 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4562 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4563 gimple_call_set_lhs (new_stmt
, new_temp
);
4567 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4568 new_temp
= make_ssa_name (vec_dest
);
4569 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4573 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4574 vec_oprnds0
[i
] = new_temp
;
4577 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4578 stmt
, vec_dsts
, gsi
,
4583 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4587 vec_oprnds0
.release ();
4588 vec_oprnds1
.release ();
4589 interm_types
.release ();
4595 /* Function vectorizable_assignment.
4597 Check if STMT performs an assignment (copy) that can be vectorized.
4598 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4599 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4600 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4603 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4604 gimple
**vec_stmt
, slp_tree slp_node
)
4609 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4610 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4613 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4616 vec
<tree
> vec_oprnds
= vNULL
;
4618 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4619 vec_info
*vinfo
= stmt_info
->vinfo
;
4620 gimple
*new_stmt
= NULL
;
4621 stmt_vec_info prev_stmt_info
= NULL
;
4622 enum tree_code code
;
4625 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4628 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4632 /* Is vectorizable assignment? */
4633 if (!is_gimple_assign (stmt
))
4636 scalar_dest
= gimple_assign_lhs (stmt
);
4637 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4640 code
= gimple_assign_rhs_code (stmt
);
4641 if (gimple_assign_single_p (stmt
)
4642 || code
== PAREN_EXPR
4643 || CONVERT_EXPR_CODE_P (code
))
4644 op
= gimple_assign_rhs1 (stmt
);
4648 if (code
== VIEW_CONVERT_EXPR
)
4649 op
= TREE_OPERAND (op
, 0);
4651 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4652 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4654 /* Multiple types in SLP are handled by creating the appropriate number of
4655 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4660 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4662 gcc_assert (ncopies
>= 1);
4664 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4668 "use not simple.\n");
4672 /* We can handle NOP_EXPR conversions that do not change the number
4673 of elements or the vector size. */
4674 if ((CONVERT_EXPR_CODE_P (code
)
4675 || code
== VIEW_CONVERT_EXPR
)
4677 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4678 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4679 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4682 /* We do not handle bit-precision changes. */
4683 if ((CONVERT_EXPR_CODE_P (code
)
4684 || code
== VIEW_CONVERT_EXPR
)
4685 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4686 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4687 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4688 || ((TYPE_PRECISION (TREE_TYPE (op
))
4689 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4690 /* But a conversion that does not change the bit-pattern is ok. */
4691 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4692 > TYPE_PRECISION (TREE_TYPE (op
)))
4693 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4694 /* Conversion between boolean types of different sizes is
4695 a simple assignment in case their vectypes are same
4697 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4698 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4700 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4702 "type conversion to/from bit-precision "
4707 if (!vec_stmt
) /* transformation not required. */
4709 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4710 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_NOTE
, vect_location
,
4712 "=== vectorizable_assignment ===\n");
4713 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4718 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4722 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4725 for (j
= 0; j
< ncopies
; j
++)
4729 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4731 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4733 /* Arguments are ready. create the new vector stmt. */
4734 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4736 if (CONVERT_EXPR_CODE_P (code
)
4737 || code
== VIEW_CONVERT_EXPR
)
4738 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4739 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4740 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4741 gimple_assign_set_lhs (new_stmt
, new_temp
);
4742 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4744 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4751 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4753 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4755 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4758 vec_oprnds
.release ();
4763 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4764 either as shift by a scalar or by a vector. */
4767 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4770 machine_mode vec_mode
;
4775 vectype
= get_vectype_for_scalar_type (scalar_type
);
4779 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4781 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4783 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4785 || (optab_handler (optab
, TYPE_MODE (vectype
))
4786 == CODE_FOR_nothing
))
4790 vec_mode
= TYPE_MODE (vectype
);
4791 icode
= (int) optab_handler (optab
, vec_mode
);
4792 if (icode
== CODE_FOR_nothing
)
4799 /* Function vectorizable_shift.
4801 Check if STMT performs a shift operation that can be vectorized.
4802 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4803 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4804 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4807 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4808 gimple
**vec_stmt
, slp_tree slp_node
)
4812 tree op0
, op1
= NULL
;
4813 tree vec_oprnd1
= NULL_TREE
;
4814 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4816 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4817 enum tree_code code
;
4818 machine_mode vec_mode
;
4822 machine_mode optab_op2_mode
;
4824 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4825 gimple
*new_stmt
= NULL
;
4826 stmt_vec_info prev_stmt_info
;
4833 vec
<tree
> vec_oprnds0
= vNULL
;
4834 vec
<tree
> vec_oprnds1
= vNULL
;
4837 bool scalar_shift_arg
= true;
4838 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4839 vec_info
*vinfo
= stmt_info
->vinfo
;
4842 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4845 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4849 /* Is STMT a vectorizable binary/unary operation? */
4850 if (!is_gimple_assign (stmt
))
4853 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4856 code
= gimple_assign_rhs_code (stmt
);
4858 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4859 || code
== RROTATE_EXPR
))
4862 scalar_dest
= gimple_assign_lhs (stmt
);
4863 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4864 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4865 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4869 "bit-precision shifts not supported.\n");
4873 op0
= gimple_assign_rhs1 (stmt
);
4874 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4876 if (dump_enabled_p ())
4877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4878 "use not simple.\n");
4881 /* If op0 is an external or constant def use a vector type with
4882 the same size as the output vector type. */
4884 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4886 gcc_assert (vectype
);
4889 if (dump_enabled_p ())
4890 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4891 "no vectype for scalar type\n");
4895 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4896 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4897 if (nunits_out
!= nunits_in
)
4900 op1
= gimple_assign_rhs2 (stmt
);
4901 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4903 if (dump_enabled_p ())
4904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4905 "use not simple.\n");
4910 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4914 /* Multiple types in SLP are handled by creating the appropriate number of
4915 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4920 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4922 gcc_assert (ncopies
>= 1);
4924 /* Determine whether the shift amount is a vector, or scalar. If the
4925 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4927 if ((dt
[1] == vect_internal_def
4928 || dt
[1] == vect_induction_def
)
4930 scalar_shift_arg
= false;
4931 else if (dt
[1] == vect_constant_def
4932 || dt
[1] == vect_external_def
4933 || dt
[1] == vect_internal_def
)
4935 /* In SLP, need to check whether the shift count is the same,
4936 in loops if it is a constant or invariant, it is always
4940 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4943 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4944 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4945 scalar_shift_arg
= false;
4948 /* If the shift amount is computed by a pattern stmt we cannot
4949 use the scalar amount directly thus give up and use a vector
4951 if (dt
[1] == vect_internal_def
)
4953 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4954 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4955 scalar_shift_arg
= false;
4960 if (dump_enabled_p ())
4961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4962 "operand mode requires invariant argument.\n");
4966 /* Vector shifted by vector. */
4967 if (!scalar_shift_arg
)
4969 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4970 if (dump_enabled_p ())
4971 dump_printf_loc (MSG_NOTE
, vect_location
,
4972 "vector/vector shift/rotate found.\n");
4975 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4976 if (op1_vectype
== NULL_TREE
4977 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4981 "unusable type for last operand in"
4982 " vector/vector shift/rotate.\n");
4986 /* See if the machine has a vector shifted by scalar insn and if not
4987 then see if it has a vector shifted by vector insn. */
4990 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4992 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4994 if (dump_enabled_p ())
4995 dump_printf_loc (MSG_NOTE
, vect_location
,
4996 "vector/scalar shift/rotate found.\n");
5000 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5002 && (optab_handler (optab
, TYPE_MODE (vectype
))
5003 != CODE_FOR_nothing
))
5005 scalar_shift_arg
= false;
5007 if (dump_enabled_p ())
5008 dump_printf_loc (MSG_NOTE
, vect_location
,
5009 "vector/vector shift/rotate found.\n");
5011 /* Unlike the other binary operators, shifts/rotates have
5012 the rhs being int, instead of the same type as the lhs,
5013 so make sure the scalar is the right type if we are
5014 dealing with vectors of long long/long/short/char. */
5015 if (dt
[1] == vect_constant_def
)
5016 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5017 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5021 && TYPE_MODE (TREE_TYPE (vectype
))
5022 != TYPE_MODE (TREE_TYPE (op1
)))
5024 if (dump_enabled_p ())
5025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5026 "unusable type for last operand in"
5027 " vector/vector shift/rotate.\n");
5030 if (vec_stmt
&& !slp_node
)
5032 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5033 op1
= vect_init_vector (stmt
, op1
,
5034 TREE_TYPE (vectype
), NULL
);
5041 /* Supportable by target? */
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5049 vec_mode
= TYPE_MODE (vectype
);
5050 icode
= (int) optab_handler (optab
, vec_mode
);
5051 if (icode
== CODE_FOR_nothing
)
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5055 "op not supported by target.\n");
5056 /* Check only during analysis. */
5057 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5058 || (vf
< vect_min_worthwhile_factor (code
)
5061 if (dump_enabled_p ())
5062 dump_printf_loc (MSG_NOTE
, vect_location
,
5063 "proceeding using word mode.\n");
5066 /* Worthwhile without SIMD support? Check only during analysis. */
5067 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5068 && vf
< vect_min_worthwhile_factor (code
)
5071 if (dump_enabled_p ())
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5073 "not worthwhile without SIMD support.\n");
5077 if (!vec_stmt
) /* transformation not required. */
5079 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_NOTE
, vect_location
,
5082 "=== vectorizable_shift ===\n");
5083 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_NOTE
, vect_location
,
5091 "transform binary/unary operation.\n");
5094 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5096 prev_stmt_info
= NULL
;
5097 for (j
= 0; j
< ncopies
; j
++)
5102 if (scalar_shift_arg
)
5104 /* Vector shl and shr insn patterns can be defined with scalar
5105 operand 2 (shift operand). In this case, use constant or loop
5106 invariant op1 directly, without extending it to vector mode
5108 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5109 if (!VECTOR_MODE_P (optab_op2_mode
))
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE
, vect_location
,
5113 "operand 1 using scalar mode.\n");
5115 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5116 vec_oprnds1
.quick_push (vec_oprnd1
);
5119 /* Store vec_oprnd1 for every vector stmt to be created
5120 for SLP_NODE. We check during the analysis that all
5121 the shift arguments are the same.
5122 TODO: Allow different constants for different vector
5123 stmts generated for an SLP instance. */
5124 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5125 vec_oprnds1
.quick_push (vec_oprnd1
);
5130 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5131 (a special case for certain kind of vector shifts); otherwise,
5132 operand 1 should be of a vector type (the usual case). */
5134 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5137 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5141 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5143 /* Arguments are ready. Create the new vector stmt. */
5144 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5146 vop1
= vec_oprnds1
[i
];
5147 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5148 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5149 gimple_assign_set_lhs (new_stmt
, new_temp
);
5150 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5152 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5159 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5161 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5162 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5165 vec_oprnds0
.release ();
5166 vec_oprnds1
.release ();
5172 /* Function vectorizable_operation.
5174 Check if STMT performs a binary, unary or ternary operation that can
5176 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5177 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5178 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5181 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5182 gimple
**vec_stmt
, slp_tree slp_node
)
5186 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5189 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5190 enum tree_code code
;
5191 machine_mode vec_mode
;
5195 bool target_support_p
;
5197 enum vect_def_type dt
[3]
5198 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5199 gimple
*new_stmt
= NULL
;
5200 stmt_vec_info prev_stmt_info
;
5206 vec
<tree
> vec_oprnds0
= vNULL
;
5207 vec
<tree
> vec_oprnds1
= vNULL
;
5208 vec
<tree
> vec_oprnds2
= vNULL
;
5209 tree vop0
, vop1
, vop2
;
5210 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5211 vec_info
*vinfo
= stmt_info
->vinfo
;
5214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5221 /* Is STMT a vectorizable binary/unary operation? */
5222 if (!is_gimple_assign (stmt
))
5225 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5228 code
= gimple_assign_rhs_code (stmt
);
5230 /* For pointer addition, we should use the normal plus for
5231 the vector addition. */
5232 if (code
== POINTER_PLUS_EXPR
)
5235 /* Support only unary or binary operations. */
5236 op_type
= TREE_CODE_LENGTH (code
);
5237 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5239 if (dump_enabled_p ())
5240 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5241 "num. args = %d (not unary/binary/ternary op).\n",
5246 scalar_dest
= gimple_assign_lhs (stmt
);
5247 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5249 /* Most operations cannot handle bit-precision types without extra
5251 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5252 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5253 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
5254 /* Exception are bitwise binary operations. */
5255 && code
!= BIT_IOR_EXPR
5256 && code
!= BIT_XOR_EXPR
5257 && code
!= BIT_AND_EXPR
)
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5261 "bit-precision arithmetic not supported.\n");
5265 op0
= gimple_assign_rhs1 (stmt
);
5266 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5268 if (dump_enabled_p ())
5269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5270 "use not simple.\n");
5273 /* If op0 is an external or constant def use a vector type with
5274 the same size as the output vector type. */
5277 /* For boolean type we cannot determine vectype by
5278 invariant value (don't know whether it is a vector
5279 of booleans or vector of integers). We use output
5280 vectype because operations on boolean don't change
5282 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5284 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5286 if (dump_enabled_p ())
5287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5288 "not supported operation on bool value.\n");
5291 vectype
= vectype_out
;
5294 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5297 gcc_assert (vectype
);
5300 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5303 "no vectype for scalar type ");
5304 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5306 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5312 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5313 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5314 if (nunits_out
!= nunits_in
)
5317 if (op_type
== binary_op
|| op_type
== ternary_op
)
5319 op1
= gimple_assign_rhs2 (stmt
);
5320 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5324 "use not simple.\n");
5328 if (op_type
== ternary_op
)
5330 op2
= gimple_assign_rhs3 (stmt
);
5331 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5335 "use not simple.\n");
5341 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5345 /* Multiple types in SLP are handled by creating the appropriate number of
5346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5351 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
5353 gcc_assert (ncopies
>= 1);
5355 /* Shifts are handled in vectorizable_shift (). */
5356 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5357 || code
== RROTATE_EXPR
)
5360 /* Supportable by target? */
5362 vec_mode
= TYPE_MODE (vectype
);
5363 if (code
== MULT_HIGHPART_EXPR
)
5364 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5367 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5375 target_support_p
= (optab_handler (optab
, vec_mode
)
5376 != CODE_FOR_nothing
);
5379 if (!target_support_p
)
5381 if (dump_enabled_p ())
5382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5383 "op not supported by target.\n");
5384 /* Check only during analysis. */
5385 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5386 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5388 if (dump_enabled_p ())
5389 dump_printf_loc (MSG_NOTE
, vect_location
,
5390 "proceeding using word mode.\n");
5393 /* Worthwhile without SIMD support? Check only during analysis. */
5394 if (!VECTOR_MODE_P (vec_mode
)
5396 && vf
< vect_min_worthwhile_factor (code
))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5400 "not worthwhile without SIMD support.\n");
5404 if (!vec_stmt
) /* transformation not required. */
5406 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE
, vect_location
,
5409 "=== vectorizable_operation ===\n");
5410 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_NOTE
, vect_location
,
5418 "transform binary/unary operation.\n");
5421 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5423 /* In case the vectorization factor (VF) is bigger than the number
5424 of elements that we can fit in a vectype (nunits), we have to generate
5425 more than one vector stmt - i.e - we need to "unroll" the
5426 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5427 from one copy of the vector stmt to the next, in the field
5428 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5429 stages to find the correct vector defs to be used when vectorizing
5430 stmts that use the defs of the current stmt. The example below
5431 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5432 we need to create 4 vectorized stmts):
5434 before vectorization:
5435 RELATED_STMT VEC_STMT
5439 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5441 RELATED_STMT VEC_STMT
5442 VS1_0: vx0 = memref0 VS1_1 -
5443 VS1_1: vx1 = memref1 VS1_2 -
5444 VS1_2: vx2 = memref2 VS1_3 -
5445 VS1_3: vx3 = memref3 - -
5446 S1: x = load - VS1_0
5449 step2: vectorize stmt S2 (done here):
5450 To vectorize stmt S2 we first need to find the relevant vector
5451 def for the first operand 'x'. This is, as usual, obtained from
5452 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5453 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5454 relevant vector def 'vx0'. Having found 'vx0' we can generate
5455 the vector stmt VS2_0, and as usual, record it in the
5456 STMT_VINFO_VEC_STMT of stmt S2.
5457 When creating the second copy (VS2_1), we obtain the relevant vector
5458 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5459 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5460 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5461 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5462 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5463 chain of stmts and pointers:
5464 RELATED_STMT VEC_STMT
5465 VS1_0: vx0 = memref0 VS1_1 -
5466 VS1_1: vx1 = memref1 VS1_2 -
5467 VS1_2: vx2 = memref2 VS1_3 -
5468 VS1_3: vx3 = memref3 - -
5469 S1: x = load - VS1_0
5470 VS2_0: vz0 = vx0 + v1 VS2_1 -
5471 VS2_1: vz1 = vx1 + v1 VS2_2 -
5472 VS2_2: vz2 = vx2 + v1 VS2_3 -
5473 VS2_3: vz3 = vx3 + v1 - -
5474 S2: z = x + 1 - VS2_0 */
5476 prev_stmt_info
= NULL
;
5477 for (j
= 0; j
< ncopies
; j
++)
5482 if (op_type
== binary_op
|| op_type
== ternary_op
)
5483 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5486 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5488 if (op_type
== ternary_op
)
5489 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5494 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5495 if (op_type
== ternary_op
)
5497 tree vec_oprnd
= vec_oprnds2
.pop ();
5498 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5503 /* Arguments are ready. Create the new vector stmt. */
5504 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5506 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5507 ? vec_oprnds1
[i
] : NULL_TREE
);
5508 vop2
= ((op_type
== ternary_op
)
5509 ? vec_oprnds2
[i
] : NULL_TREE
);
5510 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5511 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5512 gimple_assign_set_lhs (new_stmt
, new_temp
);
5513 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5515 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5522 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5524 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5525 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5528 vec_oprnds0
.release ();
5529 vec_oprnds1
.release ();
5530 vec_oprnds2
.release ();
5535 /* A helper function to ensure data reference DR's base alignment
5539 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5544 if (DR_VECT_AUX (dr
)->base_misaligned
)
5546 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5547 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5549 if (decl_in_symtab_p (base_decl
))
5550 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5553 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5554 DECL_USER_ALIGN (base_decl
) = 1;
5556 DR_VECT_AUX (dr
)->base_misaligned
= false;
5561 /* Function get_group_alias_ptr_type.
5563 Return the alias type for the group starting at FIRST_STMT. */
5566 get_group_alias_ptr_type (gimple
*first_stmt
)
5568 struct data_reference
*first_dr
, *next_dr
;
5571 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5572 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5575 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5576 if (get_alias_set (DR_REF (first_dr
))
5577 != get_alias_set (DR_REF (next_dr
)))
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE
, vect_location
,
5581 "conflicting alias set types.\n");
5582 return ptr_type_node
;
5584 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5586 return reference_alias_ptr_type (DR_REF (first_dr
));
5590 /* Function vectorizable_store.
5592 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5599 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5605 tree vec_oprnd
= NULL_TREE
;
5606 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5607 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5609 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5610 struct loop
*loop
= NULL
;
5611 machine_mode vec_mode
;
5613 enum dr_alignment_support alignment_support_scheme
;
5615 enum vect_def_type dt
;
5616 stmt_vec_info prev_stmt_info
= NULL
;
5617 tree dataref_ptr
= NULL_TREE
;
5618 tree dataref_offset
= NULL_TREE
;
5619 gimple
*ptr_incr
= NULL
;
5622 gimple
*next_stmt
, *first_stmt
;
5624 unsigned int group_size
, i
;
5625 vec
<tree
> oprnds
= vNULL
;
5626 vec
<tree
> result_chain
= vNULL
;
5628 tree offset
= NULL_TREE
;
5629 vec
<tree
> vec_oprnds
= vNULL
;
5630 bool slp
= (slp_node
!= NULL
);
5631 unsigned int vec_num
;
5632 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5633 vec_info
*vinfo
= stmt_info
->vinfo
;
5635 gather_scatter_info gs_info
;
5636 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5639 vec_load_store_type vls_type
;
5642 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5645 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5649 /* Is vectorizable store? */
5651 if (!is_gimple_assign (stmt
))
5654 scalar_dest
= gimple_assign_lhs (stmt
);
5655 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5656 && is_pattern_stmt_p (stmt_info
))
5657 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5658 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5659 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5660 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5661 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5662 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5663 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5664 && TREE_CODE (scalar_dest
) != MEM_REF
)
5667 /* Cannot have hybrid store SLP -- that would mean storing to the
5668 same location twice. */
5669 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5671 gcc_assert (gimple_assign_single_p (stmt
));
5673 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5674 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5678 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5679 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5684 /* Multiple types in SLP are handled by creating the appropriate number of
5685 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5690 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5692 gcc_assert (ncopies
>= 1);
5694 /* FORNOW. This restriction should be relaxed. */
5695 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5699 "multiple types in nested loop.\n");
5703 op
= gimple_assign_rhs1 (stmt
);
5705 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5709 "use not simple.\n");
5713 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5714 vls_type
= VLS_STORE_INVARIANT
;
5716 vls_type
= VLS_STORE
;
5718 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5721 elem_type
= TREE_TYPE (vectype
);
5722 vec_mode
= TYPE_MODE (vectype
);
5724 /* FORNOW. In some cases can vectorize even if data-type not supported
5725 (e.g. - array initialization with 0). */
5726 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5729 if (!STMT_VINFO_DATA_REF (stmt_info
))
5732 vect_memory_access_type memory_access_type
;
5733 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5734 &memory_access_type
, &gs_info
))
5737 if (!vec_stmt
) /* transformation not required. */
5739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5740 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5741 /* The SLP costs are calculated during SLP analysis. */
5742 if (!PURE_SLP_STMT (stmt_info
))
5743 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5747 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5751 ensure_base_align (stmt_info
, dr
);
5753 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5755 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5756 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5757 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5758 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5759 edge pe
= loop_preheader_edge (loop
);
5762 enum { NARROW
, NONE
, WIDEN
} modifier
;
5763 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5765 if (nunits
== (unsigned int) scatter_off_nunits
)
5767 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5769 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5772 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5773 sel
[i
] = i
| nunits
;
5775 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5776 gcc_assert (perm_mask
!= NULL_TREE
);
5778 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5780 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5783 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5784 sel
[i
] = i
| scatter_off_nunits
;
5786 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5787 gcc_assert (perm_mask
!= NULL_TREE
);
5793 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5794 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5795 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5796 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5797 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5798 scaletype
= TREE_VALUE (arglist
);
5800 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5801 && TREE_CODE (rettype
) == VOID_TYPE
);
5803 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5804 if (!is_gimple_min_invariant (ptr
))
5806 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5807 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5808 gcc_assert (!new_bb
);
5811 /* Currently we support only unconditional scatter stores,
5812 so mask should be all ones. */
5813 mask
= build_int_cst (masktype
, -1);
5814 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5816 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5818 prev_stmt_info
= NULL
;
5819 for (j
= 0; j
< ncopies
; ++j
)
5824 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5826 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5828 else if (modifier
!= NONE
&& (j
& 1))
5830 if (modifier
== WIDEN
)
5833 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5834 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5837 else if (modifier
== NARROW
)
5839 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5842 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5851 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5853 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5857 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5859 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5860 == TYPE_VECTOR_SUBPARTS (srctype
));
5861 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5862 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5863 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5864 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5868 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5870 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5871 == TYPE_VECTOR_SUBPARTS (idxtype
));
5872 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5873 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5874 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5875 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5880 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5882 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5884 if (prev_stmt_info
== NULL
)
5885 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5887 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5888 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5893 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5896 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5897 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5898 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5900 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5903 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5905 /* We vectorize all the stmts of the interleaving group when we
5906 reach the last stmt in the group. */
5907 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5908 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5917 grouped_store
= false;
5918 /* VEC_NUM is the number of vect stmts to be created for this
5920 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5921 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5922 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5923 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5924 op
= gimple_assign_rhs1 (first_stmt
);
5927 /* VEC_NUM is the number of vect stmts to be created for this
5929 vec_num
= group_size
;
5931 ref_type
= get_group_alias_ptr_type (first_stmt
);
5937 group_size
= vec_num
= 1;
5938 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5941 if (dump_enabled_p ())
5942 dump_printf_loc (MSG_NOTE
, vect_location
,
5943 "transform store. ncopies = %d\n", ncopies
);
5945 if (memory_access_type
== VMAT_ELEMENTWISE
5946 || memory_access_type
== VMAT_STRIDED_SLP
)
5948 gimple_stmt_iterator incr_gsi
;
5954 gimple_seq stmts
= NULL
;
5955 tree stride_base
, stride_step
, alias_off
;
5959 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5962 = fold_build_pointer_plus
5963 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5964 size_binop (PLUS_EXPR
,
5965 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5966 convert_to_ptrofftype (DR_INIT (first_dr
))));
5967 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5969 /* For a store with loop-invariant (but other than power-of-2)
5970 stride (i.e. not a grouped access) like so:
5972 for (i = 0; i < n; i += stride)
5975 we generate a new induction variable and new stores from
5976 the components of the (vectorized) rhs:
5978 for (j = 0; ; j += VF*stride)
5983 array[j + stride] = tmp2;
5987 unsigned nstores
= nunits
;
5989 tree ltype
= elem_type
;
5992 if (group_size
< nunits
5993 && nunits
% group_size
== 0)
5995 nstores
= nunits
/ group_size
;
5997 ltype
= build_vector_type (elem_type
, group_size
);
5999 else if (group_size
>= nunits
6000 && group_size
% nunits
== 0)
6006 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6007 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6010 ivstep
= stride_step
;
6011 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6012 build_int_cst (TREE_TYPE (ivstep
), vf
));
6014 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6016 create_iv (stride_base
, ivstep
, NULL
,
6017 loop
, &incr_gsi
, insert_after
,
6019 incr
= gsi_stmt (incr_gsi
);
6020 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6022 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6024 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6026 prev_stmt_info
= NULL
;
6027 alias_off
= build_int_cst (ref_type
, 0);
6028 next_stmt
= first_stmt
;
6029 for (g
= 0; g
< group_size
; g
++)
6031 running_off
= offvar
;
6034 tree size
= TYPE_SIZE_UNIT (ltype
);
6035 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6037 tree newoff
= copy_ssa_name (running_off
, NULL
);
6038 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6040 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6041 running_off
= newoff
;
6043 unsigned int group_el
= 0;
6044 unsigned HOST_WIDE_INT
6045 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6046 for (j
= 0; j
< ncopies
; j
++)
6048 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6049 and first_stmt == stmt. */
6054 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6056 vec_oprnd
= vec_oprnds
[0];
6060 gcc_assert (gimple_assign_single_p (next_stmt
));
6061 op
= gimple_assign_rhs1 (next_stmt
);
6062 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6068 vec_oprnd
= vec_oprnds
[j
];
6071 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6072 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6076 for (i
= 0; i
< nstores
; i
++)
6078 tree newref
, newoff
;
6079 gimple
*incr
, *assign
;
6080 tree size
= TYPE_SIZE (ltype
);
6081 /* Extract the i'th component. */
6082 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6083 bitsize_int (i
), size
);
6084 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6087 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6091 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6093 newref
= build2 (MEM_REF
, ltype
,
6094 running_off
, this_off
);
6096 /* And store it to *running_off. */
6097 assign
= gimple_build_assign (newref
, elem
);
6098 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6102 || group_el
== group_size
)
6104 newoff
= copy_ssa_name (running_off
, NULL
);
6105 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6106 running_off
, stride_step
);
6107 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6109 running_off
= newoff
;
6112 if (g
== group_size
- 1
6115 if (j
== 0 && i
== 0)
6116 STMT_VINFO_VEC_STMT (stmt_info
)
6117 = *vec_stmt
= assign
;
6119 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6120 prev_stmt_info
= vinfo_for_stmt (assign
);
6124 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6129 vec_oprnds
.release ();
6133 auto_vec
<tree
> dr_chain (group_size
);
6134 oprnds
.create (group_size
);
6136 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6137 gcc_assert (alignment_support_scheme
);
6138 /* Targets with store-lane instructions must not require explicit
6140 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6141 || alignment_support_scheme
== dr_aligned
6142 || alignment_support_scheme
== dr_unaligned_supported
);
6144 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6145 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6146 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6148 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6149 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6151 aggr_type
= vectype
;
6153 /* In case the vectorization factor (VF) is bigger than the number
6154 of elements that we can fit in a vectype (nunits), we have to generate
6155 more than one vector stmt - i.e - we need to "unroll" the
6156 vector stmt by a factor VF/nunits. For more details see documentation in
6157 vect_get_vec_def_for_copy_stmt. */
6159 /* In case of interleaving (non-unit grouped access):
6166 We create vectorized stores starting from base address (the access of the
6167 first stmt in the chain (S2 in the above example), when the last store stmt
6168 of the chain (S4) is reached:
6171 VS2: &base + vec_size*1 = vx0
6172 VS3: &base + vec_size*2 = vx1
6173 VS4: &base + vec_size*3 = vx3
6175 Then permutation statements are generated:
6177 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6178 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6181 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6182 (the order of the data-refs in the output of vect_permute_store_chain
6183 corresponds to the order of scalar stmts in the interleaving chain - see
6184 the documentation of vect_permute_store_chain()).
6186 In case of both multiple types and interleaving, above vector stores and
6187 permutation stmts are created for every copy. The result vector stmts are
6188 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6189 STMT_VINFO_RELATED_STMT for the next copies.
6192 prev_stmt_info
= NULL
;
6193 for (j
= 0; j
< ncopies
; j
++)
6200 /* Get vectorized arguments for SLP_NODE. */
6201 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6202 NULL
, slp_node
, -1);
6204 vec_oprnd
= vec_oprnds
[0];
6208 /* For interleaved stores we collect vectorized defs for all the
6209 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6210 used as an input to vect_permute_store_chain(), and OPRNDS as
6211 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6213 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6214 OPRNDS are of size 1. */
6215 next_stmt
= first_stmt
;
6216 for (i
= 0; i
< group_size
; i
++)
6218 /* Since gaps are not supported for interleaved stores,
6219 GROUP_SIZE is the exact number of stmts in the chain.
6220 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6221 there is no interleaving, GROUP_SIZE is 1, and only one
6222 iteration of the loop will be executed. */
6223 gcc_assert (next_stmt
6224 && gimple_assign_single_p (next_stmt
));
6225 op
= gimple_assign_rhs1 (next_stmt
);
6227 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6228 dr_chain
.quick_push (vec_oprnd
);
6229 oprnds
.quick_push (vec_oprnd
);
6230 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6234 /* We should have catched mismatched types earlier. */
6235 gcc_assert (useless_type_conversion_p (vectype
,
6236 TREE_TYPE (vec_oprnd
)));
6237 bool simd_lane_access_p
6238 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6239 if (simd_lane_access_p
6240 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6241 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6242 && integer_zerop (DR_OFFSET (first_dr
))
6243 && integer_zerop (DR_INIT (first_dr
))
6244 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6245 get_alias_set (TREE_TYPE (ref_type
))))
6247 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6248 dataref_offset
= build_int_cst (ref_type
, 0);
6253 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6254 simd_lane_access_p
? loop
: NULL
,
6255 offset
, &dummy
, gsi
, &ptr_incr
,
6256 simd_lane_access_p
, &inv_p
);
6257 gcc_assert (bb_vinfo
|| !inv_p
);
6261 /* For interleaved stores we created vectorized defs for all the
6262 defs stored in OPRNDS in the previous iteration (previous copy).
6263 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6264 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6266 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6267 OPRNDS are of size 1. */
6268 for (i
= 0; i
< group_size
; i
++)
6271 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6272 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6273 dr_chain
[i
] = vec_oprnd
;
6274 oprnds
[i
] = vec_oprnd
;
6278 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6279 TYPE_SIZE_UNIT (aggr_type
));
6281 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6282 TYPE_SIZE_UNIT (aggr_type
));
6285 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6289 /* Combine all the vectors into an array. */
6290 vec_array
= create_vector_array (vectype
, vec_num
);
6291 for (i
= 0; i
< vec_num
; i
++)
6293 vec_oprnd
= dr_chain
[i
];
6294 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6298 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6299 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6300 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
6301 gimple_call_set_lhs (new_stmt
, data_ref
);
6302 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6310 result_chain
.create (group_size
);
6312 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6316 next_stmt
= first_stmt
;
6317 for (i
= 0; i
< vec_num
; i
++)
6319 unsigned align
, misalign
;
6322 /* Bump the vector pointer. */
6323 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6327 vec_oprnd
= vec_oprnds
[i
];
6328 else if (grouped_store
)
6329 /* For grouped stores vectorized defs are interleaved in
6330 vect_permute_store_chain(). */
6331 vec_oprnd
= result_chain
[i
];
6333 data_ref
= fold_build2 (MEM_REF
, vectype
,
6337 : build_int_cst (ref_type
, 0));
6338 align
= TYPE_ALIGN_UNIT (vectype
);
6339 if (aligned_access_p (first_dr
))
6341 else if (DR_MISALIGNMENT (first_dr
) == -1)
6343 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6344 align
= TYPE_ALIGN_UNIT (elem_type
);
6346 align
= get_object_alignment (DR_REF (first_dr
))
6349 TREE_TYPE (data_ref
)
6350 = build_aligned_type (TREE_TYPE (data_ref
),
6351 align
* BITS_PER_UNIT
);
6355 TREE_TYPE (data_ref
)
6356 = build_aligned_type (TREE_TYPE (data_ref
),
6357 TYPE_ALIGN (elem_type
));
6358 misalign
= DR_MISALIGNMENT (first_dr
);
6360 if (dataref_offset
== NULL_TREE
6361 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6362 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6365 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6367 tree perm_mask
= perm_mask_for_reverse (vectype
);
6369 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6371 tree new_temp
= make_ssa_name (perm_dest
);
6373 /* Generate the permute statement. */
6375 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6376 vec_oprnd
, perm_mask
);
6377 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6379 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6380 vec_oprnd
= new_temp
;
6383 /* Arguments are ready. Create the new vector stmt. */
6384 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6385 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6390 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6398 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6400 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6401 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6406 result_chain
.release ();
6407 vec_oprnds
.release ();
6412 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6413 VECTOR_CST mask. No checks are made that the target platform supports the
6414 mask, so callers may wish to test can_vec_perm_p separately, or use
6415 vect_gen_perm_mask_checked. */
6418 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6420 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6423 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6425 mask_elt_type
= lang_hooks
.types
.type_for_mode
6426 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6427 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6429 mask_elts
= XALLOCAVEC (tree
, nunits
);
6430 for (i
= nunits
- 1; i
>= 0; i
--)
6431 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6432 mask_vec
= build_vector (mask_type
, mask_elts
);
6437 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6438 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6441 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6443 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6444 return vect_gen_perm_mask_any (vectype
, sel
);
6447 /* Given a vector variable X and Y, that was generated for the scalar
6448 STMT, generate instructions to permute the vector elements of X and Y
6449 using permutation mask MASK_VEC, insert them at *GSI and return the
6450 permuted vector variable. */
6453 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6454 gimple_stmt_iterator
*gsi
)
6456 tree vectype
= TREE_TYPE (x
);
6457 tree perm_dest
, data_ref
;
6460 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6461 data_ref
= make_ssa_name (perm_dest
);
6463 /* Generate the permute statement. */
6464 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6465 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6470 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6471 inserting them on the loops preheader edge. Returns true if we
6472 were successful in doing so (and thus STMT can be moved then),
6473 otherwise returns false. */
6476 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6482 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6484 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6485 if (!gimple_nop_p (def_stmt
)
6486 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6488 /* Make sure we don't need to recurse. While we could do
6489 so in simple cases when there are more complex use webs
6490 we don't have an easy way to preserve stmt order to fulfil
6491 dependencies within them. */
6494 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6496 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6498 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6499 if (!gimple_nop_p (def_stmt2
)
6500 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6510 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6512 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6513 if (!gimple_nop_p (def_stmt
)
6514 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6516 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6517 gsi_remove (&gsi
, false);
6518 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6525 /* vectorizable_load.
6527 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6529 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6530 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6531 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6534 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6535 slp_tree slp_node
, slp_instance slp_node_instance
)
6538 tree vec_dest
= NULL
;
6539 tree data_ref
= NULL
;
6540 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6541 stmt_vec_info prev_stmt_info
;
6542 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6543 struct loop
*loop
= NULL
;
6544 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6545 bool nested_in_vect_loop
= false;
6546 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6550 gimple
*new_stmt
= NULL
;
6552 enum dr_alignment_support alignment_support_scheme
;
6553 tree dataref_ptr
= NULL_TREE
;
6554 tree dataref_offset
= NULL_TREE
;
6555 gimple
*ptr_incr
= NULL
;
6557 int i
, j
, group_size
, group_gap_adj
;
6558 tree msq
= NULL_TREE
, lsq
;
6559 tree offset
= NULL_TREE
;
6560 tree byte_offset
= NULL_TREE
;
6561 tree realignment_token
= NULL_TREE
;
6563 vec
<tree
> dr_chain
= vNULL
;
6564 bool grouped_load
= false;
6566 gimple
*first_stmt_for_drptr
= NULL
;
6568 bool compute_in_loop
= false;
6569 struct loop
*at_loop
;
6571 bool slp
= (slp_node
!= NULL
);
6572 bool slp_perm
= false;
6573 enum tree_code code
;
6574 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6577 gather_scatter_info gs_info
;
6578 vec_info
*vinfo
= stmt_info
->vinfo
;
6581 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6584 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6588 /* Is vectorizable load? */
6589 if (!is_gimple_assign (stmt
))
6592 scalar_dest
= gimple_assign_lhs (stmt
);
6593 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6596 code
= gimple_assign_rhs_code (stmt
);
6597 if (code
!= ARRAY_REF
6598 && code
!= BIT_FIELD_REF
6599 && code
!= INDIRECT_REF
6600 && code
!= COMPONENT_REF
6601 && code
!= IMAGPART_EXPR
6602 && code
!= REALPART_EXPR
6604 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6607 if (!STMT_VINFO_DATA_REF (stmt_info
))
6610 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6611 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6615 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6616 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6617 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6622 /* Multiple types in SLP are handled by creating the appropriate number of
6623 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6628 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6630 gcc_assert (ncopies
>= 1);
6632 /* FORNOW. This restriction should be relaxed. */
6633 if (nested_in_vect_loop
&& ncopies
> 1)
6635 if (dump_enabled_p ())
6636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6637 "multiple types in nested loop.\n");
6641 /* Invalidate assumptions made by dependence analysis when vectorization
6642 on the unrolled body effectively re-orders stmts. */
6644 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6645 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6646 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6648 if (dump_enabled_p ())
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6650 "cannot perform implicit CSE when unrolling "
6651 "with negative dependence distance\n");
6655 elem_type
= TREE_TYPE (vectype
);
6656 mode
= TYPE_MODE (vectype
);
6658 /* FORNOW. In some cases can vectorize even if data-type not supported
6659 (e.g. - data copies). */
6660 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6662 if (dump_enabled_p ())
6663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6664 "Aligned load, but unsupported type.\n");
6668 /* Check if the load is a part of an interleaving chain. */
6669 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6671 grouped_load
= true;
6673 gcc_assert (!nested_in_vect_loop
);
6674 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6676 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6677 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6679 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6682 /* Invalidate assumptions made by dependence analysis when vectorization
6683 on the unrolled body effectively re-orders stmts. */
6684 if (!PURE_SLP_STMT (stmt_info
)
6685 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6686 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6687 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6689 if (dump_enabled_p ())
6690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6691 "cannot perform implicit CSE when performing "
6692 "group loads with negative dependence distance\n");
6696 /* Similarly when the stmt is a load that is both part of a SLP
6697 instance and a loop vectorized stmt via the same-dr mechanism
6698 we have to give up. */
6699 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6700 && (STMT_SLP_TYPE (stmt_info
)
6701 != STMT_SLP_TYPE (vinfo_for_stmt
6702 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6704 if (dump_enabled_p ())
6705 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6706 "conflicting SLP types for CSEd load\n");
6711 vect_memory_access_type memory_access_type
;
6712 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6713 &memory_access_type
, &gs_info
))
6716 if (!vec_stmt
) /* transformation not required. */
6719 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6720 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6721 /* The SLP costs are calculated during SLP analysis. */
6722 if (!PURE_SLP_STMT (stmt_info
))
6723 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6729 gcc_assert (memory_access_type
6730 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6732 if (dump_enabled_p ())
6733 dump_printf_loc (MSG_NOTE
, vect_location
,
6734 "transform load. ncopies = %d\n", ncopies
);
6738 ensure_base_align (stmt_info
, dr
);
6740 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6742 tree vec_oprnd0
= NULL_TREE
, op
;
6743 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6744 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6745 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6746 edge pe
= loop_preheader_edge (loop
);
6749 enum { NARROW
, NONE
, WIDEN
} modifier
;
6750 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6752 if (nunits
== gather_off_nunits
)
6754 else if (nunits
== gather_off_nunits
/ 2)
6756 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6759 for (i
= 0; i
< gather_off_nunits
; ++i
)
6760 sel
[i
] = i
| nunits
;
6762 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6764 else if (nunits
== gather_off_nunits
* 2)
6766 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6769 for (i
= 0; i
< nunits
; ++i
)
6770 sel
[i
] = i
< gather_off_nunits
6771 ? i
: i
+ nunits
- gather_off_nunits
;
6773 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6779 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6780 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6781 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6782 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6783 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6784 scaletype
= TREE_VALUE (arglist
);
6785 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6787 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6789 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6790 if (!is_gimple_min_invariant (ptr
))
6792 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6793 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6794 gcc_assert (!new_bb
);
6797 /* Currently we support only unconditional gather loads,
6798 so mask should be all ones. */
6799 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6800 mask
= build_int_cst (masktype
, -1);
6801 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6803 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6804 mask
= build_vector_from_val (masktype
, mask
);
6805 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6807 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6811 for (j
= 0; j
< 6; ++j
)
6813 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6814 mask
= build_real (TREE_TYPE (masktype
), r
);
6815 mask
= build_vector_from_val (masktype
, mask
);
6816 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6821 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6823 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6824 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6825 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6829 for (j
= 0; j
< 6; ++j
)
6831 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6832 merge
= build_real (TREE_TYPE (rettype
), r
);
6836 merge
= build_vector_from_val (rettype
, merge
);
6837 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6839 prev_stmt_info
= NULL
;
6840 for (j
= 0; j
< ncopies
; ++j
)
6842 if (modifier
== WIDEN
&& (j
& 1))
6843 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6844 perm_mask
, stmt
, gsi
);
6847 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6850 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6852 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6854 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6855 == TYPE_VECTOR_SUBPARTS (idxtype
));
6856 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6857 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6859 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6860 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6865 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6867 if (!useless_type_conversion_p (vectype
, rettype
))
6869 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6870 == TYPE_VECTOR_SUBPARTS (rettype
));
6871 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6872 gimple_call_set_lhs (new_stmt
, op
);
6873 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6874 var
= make_ssa_name (vec_dest
);
6875 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6877 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6881 var
= make_ssa_name (vec_dest
, new_stmt
);
6882 gimple_call_set_lhs (new_stmt
, var
);
6885 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6887 if (modifier
== NARROW
)
6894 var
= permute_vec_elements (prev_res
, var
,
6895 perm_mask
, stmt
, gsi
);
6896 new_stmt
= SSA_NAME_DEF_STMT (var
);
6899 if (prev_stmt_info
== NULL
)
6900 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6902 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6903 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6908 if (memory_access_type
== VMAT_ELEMENTWISE
6909 || memory_access_type
== VMAT_STRIDED_SLP
)
6911 gimple_stmt_iterator incr_gsi
;
6917 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6918 gimple_seq stmts
= NULL
;
6919 tree stride_base
, stride_step
, alias_off
;
6921 gcc_assert (!nested_in_vect_loop
);
6923 if (slp
&& grouped_load
)
6925 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6926 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6927 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6928 ref_type
= get_group_alias_ptr_type (first_stmt
);
6935 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6939 = fold_build_pointer_plus
6940 (DR_BASE_ADDRESS (first_dr
),
6941 size_binop (PLUS_EXPR
,
6942 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6943 convert_to_ptrofftype (DR_INIT (first_dr
))));
6944 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6946 /* For a load with loop-invariant (but other than power-of-2)
6947 stride (i.e. not a grouped access) like so:
6949 for (i = 0; i < n; i += stride)
6952 we generate a new induction variable and new accesses to
6953 form a new vector (or vectors, depending on ncopies):
6955 for (j = 0; ; j += VF*stride)
6957 tmp2 = array[j + stride];
6959 vectemp = {tmp1, tmp2, ...}
6962 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6963 build_int_cst (TREE_TYPE (stride_step
), vf
));
6965 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6967 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6968 loop
, &incr_gsi
, insert_after
,
6970 incr
= gsi_stmt (incr_gsi
);
6971 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6973 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6974 &stmts
, true, NULL_TREE
);
6976 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6978 prev_stmt_info
= NULL
;
6979 running_off
= offvar
;
6980 alias_off
= build_int_cst (ref_type
, 0);
6981 int nloads
= nunits
;
6983 tree ltype
= TREE_TYPE (vectype
);
6984 tree lvectype
= vectype
;
6985 auto_vec
<tree
> dr_chain
;
6986 if (memory_access_type
== VMAT_STRIDED_SLP
)
6988 if (group_size
< nunits
)
6990 /* Avoid emitting a constructor of vector elements by performing
6991 the loads using an integer type of the same size,
6992 constructing a vector of those and then re-interpreting it
6993 as the original vector type. This works around the fact
6994 that the vec_init optab was only designed for scalar
6995 element modes and thus expansion goes through memory.
6996 This avoids a huge runtime penalty due to the general
6997 inability to perform store forwarding from smaller stores
6998 to a larger load. */
7000 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7001 enum machine_mode elmode
= mode_for_size (lsize
, MODE_INT
, 0);
7002 enum machine_mode vmode
= mode_for_vector (elmode
,
7003 nunits
/ group_size
);
7004 /* If we can't construct such a vector fall back to
7005 element loads of the original vector type. */
7006 if (VECTOR_MODE_P (vmode
)
7007 && optab_handler (vec_init_optab
, vmode
) != CODE_FOR_nothing
)
7009 nloads
= nunits
/ group_size
;
7011 ltype
= build_nonstandard_integer_type (lsize
, 1);
7012 lvectype
= build_vector_type (ltype
, nloads
);
7021 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7025 /* For SLP permutation support we need to load the whole group,
7026 not only the number of vector stmts the permutation result
7030 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7031 dr_chain
.create (ncopies
);
7034 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7037 unsigned HOST_WIDE_INT
7038 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7039 for (j
= 0; j
< ncopies
; j
++)
7042 vec_alloc (v
, nloads
);
7043 for (i
= 0; i
< nloads
; i
++)
7045 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7047 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7048 build2 (MEM_REF
, ltype
,
7049 running_off
, this_off
));
7050 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7052 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7053 gimple_assign_lhs (new_stmt
));
7057 || group_el
== group_size
)
7059 tree newoff
= copy_ssa_name (running_off
);
7060 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7061 running_off
, stride_step
);
7062 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7064 running_off
= newoff
;
7070 tree vec_inv
= build_constructor (lvectype
, v
);
7071 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7072 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7073 if (lvectype
!= vectype
)
7075 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7077 build1 (VIEW_CONVERT_EXPR
,
7078 vectype
, new_temp
));
7079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7086 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7088 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7093 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7095 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7096 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7102 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7103 slp_node_instance
, false, &n_perms
);
7110 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7111 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7112 /* For SLP vectorization we directly vectorize a subchain
7113 without permutation. */
7114 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7115 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7116 /* For BB vectorization always use the first stmt to base
7117 the data ref pointer on. */
7119 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7121 /* Check if the chain of loads is already vectorized. */
7122 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7123 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7124 ??? But we can only do so if there is exactly one
7125 as we have no way to get at the rest. Leave the CSE
7127 ??? With the group load eventually participating
7128 in multiple different permutations (having multiple
7129 slp nodes which refer to the same group) the CSE
7130 is even wrong code. See PR56270. */
7133 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7136 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7139 /* VEC_NUM is the number of vect stmts to be created for this group. */
7142 grouped_load
= false;
7143 /* For SLP permutation support we need to load the whole group,
7144 not only the number of vector stmts the permutation result
7147 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7149 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7150 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7153 vec_num
= group_size
;
7155 ref_type
= get_group_alias_ptr_type (first_stmt
);
7161 group_size
= vec_num
= 1;
7163 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7166 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7167 gcc_assert (alignment_support_scheme
);
7168 /* Targets with load-lane instructions must not require explicit
7170 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7171 || alignment_support_scheme
== dr_aligned
7172 || alignment_support_scheme
== dr_unaligned_supported
);
7174 /* In case the vectorization factor (VF) is bigger than the number
7175 of elements that we can fit in a vectype (nunits), we have to generate
7176 more than one vector stmt - i.e - we need to "unroll" the
7177 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7178 from one copy of the vector stmt to the next, in the field
7179 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7180 stages to find the correct vector defs to be used when vectorizing
7181 stmts that use the defs of the current stmt. The example below
7182 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7183 need to create 4 vectorized stmts):
7185 before vectorization:
7186 RELATED_STMT VEC_STMT
7190 step 1: vectorize stmt S1:
7191 We first create the vector stmt VS1_0, and, as usual, record a
7192 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7193 Next, we create the vector stmt VS1_1, and record a pointer to
7194 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7195 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7197 RELATED_STMT VEC_STMT
7198 VS1_0: vx0 = memref0 VS1_1 -
7199 VS1_1: vx1 = memref1 VS1_2 -
7200 VS1_2: vx2 = memref2 VS1_3 -
7201 VS1_3: vx3 = memref3 - -
7202 S1: x = load - VS1_0
7205 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7206 information we recorded in RELATED_STMT field is used to vectorize
7209 /* In case of interleaving (non-unit grouped access):
7216 Vectorized loads are created in the order of memory accesses
7217 starting from the access of the first stmt of the chain:
7220 VS2: vx1 = &base + vec_size*1
7221 VS3: vx3 = &base + vec_size*2
7222 VS4: vx4 = &base + vec_size*3
7224 Then permutation statements are generated:
7226 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7227 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7230 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7231 (the order of the data-refs in the output of vect_permute_load_chain
7232 corresponds to the order of scalar stmts in the interleaving chain - see
7233 the documentation of vect_permute_load_chain()).
7234 The generation of permutation stmts and recording them in
7235 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7237 In case of both multiple types and interleaving, the vector loads and
7238 permutation stmts above are created for every copy. The result vector
7239 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7240 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7242 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7243 on a target that supports unaligned accesses (dr_unaligned_supported)
7244 we generate the following code:
7248 p = p + indx * vectype_size;
7253 Otherwise, the data reference is potentially unaligned on a target that
7254 does not support unaligned accesses (dr_explicit_realign_optimized) -
7255 then generate the following code, in which the data in each iteration is
7256 obtained by two vector loads, one from the previous iteration, and one
7257 from the current iteration:
7259 msq_init = *(floor(p1))
7260 p2 = initial_addr + VS - 1;
7261 realignment_token = call target_builtin;
7264 p2 = p2 + indx * vectype_size
7266 vec_dest = realign_load (msq, lsq, realignment_token)
7271 /* If the misalignment remains the same throughout the execution of the
7272 loop, we can create the init_addr and permutation mask at the loop
7273 preheader. Otherwise, it needs to be created inside the loop.
7274 This can only occur when vectorizing memory accesses in the inner-loop
7275 nested within an outer-loop that is being vectorized. */
7277 if (nested_in_vect_loop
7278 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7279 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7281 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7282 compute_in_loop
= true;
7285 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7286 || alignment_support_scheme
== dr_explicit_realign
)
7287 && !compute_in_loop
)
7289 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7290 alignment_support_scheme
, NULL_TREE
,
7292 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7294 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7295 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7302 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7303 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7305 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7306 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7308 aggr_type
= vectype
;
7310 prev_stmt_info
= NULL
;
7311 for (j
= 0; j
< ncopies
; j
++)
7313 /* 1. Create the vector or array pointer update chain. */
7316 bool simd_lane_access_p
7317 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7318 if (simd_lane_access_p
7319 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7320 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7321 && integer_zerop (DR_OFFSET (first_dr
))
7322 && integer_zerop (DR_INIT (first_dr
))
7323 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7324 get_alias_set (TREE_TYPE (ref_type
)))
7325 && (alignment_support_scheme
== dr_aligned
7326 || alignment_support_scheme
== dr_unaligned_supported
))
7328 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7329 dataref_offset
= build_int_cst (ref_type
, 0);
7332 else if (first_stmt_for_drptr
7333 && first_stmt
!= first_stmt_for_drptr
)
7336 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7337 at_loop
, offset
, &dummy
, gsi
,
7338 &ptr_incr
, simd_lane_access_p
,
7339 &inv_p
, byte_offset
);
7340 /* Adjust the pointer by the difference to first_stmt. */
7341 data_reference_p ptrdr
7342 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7343 tree diff
= fold_convert (sizetype
,
7344 size_binop (MINUS_EXPR
,
7347 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7352 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7353 offset
, &dummy
, gsi
, &ptr_incr
,
7354 simd_lane_access_p
, &inv_p
,
7357 else if (dataref_offset
)
7358 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7359 TYPE_SIZE_UNIT (aggr_type
));
7361 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7362 TYPE_SIZE_UNIT (aggr_type
));
7364 if (grouped_load
|| slp_perm
)
7365 dr_chain
.create (vec_num
);
7367 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7371 vec_array
= create_vector_array (vectype
, vec_num
);
7374 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7375 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7376 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7377 gimple_call_set_lhs (new_stmt
, vec_array
);
7378 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7380 /* Extract each vector into an SSA_NAME. */
7381 for (i
= 0; i
< vec_num
; i
++)
7383 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7385 dr_chain
.quick_push (new_temp
);
7388 /* Record the mapping between SSA_NAMEs and statements. */
7389 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7393 for (i
= 0; i
< vec_num
; i
++)
7396 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7399 /* 2. Create the vector-load in the loop. */
7400 switch (alignment_support_scheme
)
7403 case dr_unaligned_supported
:
7405 unsigned int align
, misalign
;
7408 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7411 : build_int_cst (ref_type
, 0));
7412 align
= TYPE_ALIGN_UNIT (vectype
);
7413 if (alignment_support_scheme
== dr_aligned
)
7415 gcc_assert (aligned_access_p (first_dr
));
7418 else if (DR_MISALIGNMENT (first_dr
) == -1)
7420 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7421 align
= TYPE_ALIGN_UNIT (elem_type
);
7423 align
= (get_object_alignment (DR_REF (first_dr
))
7426 TREE_TYPE (data_ref
)
7427 = build_aligned_type (TREE_TYPE (data_ref
),
7428 align
* BITS_PER_UNIT
);
7432 TREE_TYPE (data_ref
)
7433 = build_aligned_type (TREE_TYPE (data_ref
),
7434 TYPE_ALIGN (elem_type
));
7435 misalign
= DR_MISALIGNMENT (first_dr
);
7437 if (dataref_offset
== NULL_TREE
7438 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7439 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7443 case dr_explicit_realign
:
7447 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7449 if (compute_in_loop
)
7450 msq
= vect_setup_realignment (first_stmt
, gsi
,
7452 dr_explicit_realign
,
7455 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7456 ptr
= copy_ssa_name (dataref_ptr
);
7458 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7459 new_stmt
= gimple_build_assign
7460 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7462 (TREE_TYPE (dataref_ptr
),
7463 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7464 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7466 = build2 (MEM_REF
, vectype
, ptr
,
7467 build_int_cst (ref_type
, 0));
7468 vec_dest
= vect_create_destination_var (scalar_dest
,
7470 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7471 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7472 gimple_assign_set_lhs (new_stmt
, new_temp
);
7473 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7474 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7475 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7478 bump
= size_binop (MULT_EXPR
, vs
,
7479 TYPE_SIZE_UNIT (elem_type
));
7480 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7481 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7482 new_stmt
= gimple_build_assign
7483 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7486 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7487 ptr
= copy_ssa_name (ptr
, new_stmt
);
7488 gimple_assign_set_lhs (new_stmt
, ptr
);
7489 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7491 = build2 (MEM_REF
, vectype
, ptr
,
7492 build_int_cst (ref_type
, 0));
7495 case dr_explicit_realign_optimized
:
7496 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7497 new_temp
= copy_ssa_name (dataref_ptr
);
7499 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7500 new_stmt
= gimple_build_assign
7501 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7503 (TREE_TYPE (dataref_ptr
),
7504 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7505 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7507 = build2 (MEM_REF
, vectype
, new_temp
,
7508 build_int_cst (ref_type
, 0));
7513 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7514 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7515 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7516 gimple_assign_set_lhs (new_stmt
, new_temp
);
7517 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7519 /* 3. Handle explicit realignment if necessary/supported.
7521 vec_dest = realign_load (msq, lsq, realignment_token) */
7522 if (alignment_support_scheme
== dr_explicit_realign_optimized
7523 || alignment_support_scheme
== dr_explicit_realign
)
7525 lsq
= gimple_assign_lhs (new_stmt
);
7526 if (!realignment_token
)
7527 realignment_token
= dataref_ptr
;
7528 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7529 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7530 msq
, lsq
, realignment_token
);
7531 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7532 gimple_assign_set_lhs (new_stmt
, new_temp
);
7533 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7535 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7538 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7539 add_phi_arg (phi
, lsq
,
7540 loop_latch_edge (containing_loop
),
7546 /* 4. Handle invariant-load. */
7547 if (inv_p
&& !bb_vinfo
)
7549 gcc_assert (!grouped_load
);
7550 /* If we have versioned for aliasing or the loop doesn't
7551 have any data dependencies that would preclude this,
7552 then we are sure this is a loop invariant load and
7553 thus we can insert it on the preheader edge. */
7554 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7555 && !nested_in_vect_loop
7556 && hoist_defs_of_uses (stmt
, loop
))
7558 if (dump_enabled_p ())
7560 dump_printf_loc (MSG_NOTE
, vect_location
,
7561 "hoisting out of the vectorized "
7563 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7565 tree tem
= copy_ssa_name (scalar_dest
);
7566 gsi_insert_on_edge_immediate
7567 (loop_preheader_edge (loop
),
7568 gimple_build_assign (tem
,
7570 (gimple_assign_rhs1 (stmt
))));
7571 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7572 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7573 set_vinfo_for_stmt (new_stmt
,
7574 new_stmt_vec_info (new_stmt
, vinfo
));
7578 gimple_stmt_iterator gsi2
= *gsi
;
7580 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7582 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7586 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7588 tree perm_mask
= perm_mask_for_reverse (vectype
);
7589 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7590 perm_mask
, stmt
, gsi
);
7591 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7594 /* Collect vector loads and later create their permutation in
7595 vect_transform_grouped_load (). */
7596 if (grouped_load
|| slp_perm
)
7597 dr_chain
.quick_push (new_temp
);
7599 /* Store vector loads in the corresponding SLP_NODE. */
7600 if (slp
&& !slp_perm
)
7601 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7603 /* Bump the vector pointer to account for a gap or for excess
7604 elements loaded for a permuted SLP load. */
7605 if (group_gap_adj
!= 0)
7609 = wide_int_to_tree (sizetype
,
7610 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7611 group_gap_adj
, &ovf
));
7612 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7617 if (slp
&& !slp_perm
)
7623 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7624 slp_node_instance
, false,
7627 dr_chain
.release ();
7635 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7636 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7637 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7642 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7644 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7645 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7648 dr_chain
.release ();
7654 /* Function vect_is_simple_cond.
7657 LOOP - the loop that is being vectorized.
7658 COND - Condition that is checked for simple use.
7661 *COMP_VECTYPE - the vector type for the comparison.
7663 Returns whether a COND can be vectorized. Checks whether
7664 condition operands are supportable using vec_is_simple_use. */
7667 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7670 enum vect_def_type dt
;
7671 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7674 if (TREE_CODE (cond
) == SSA_NAME
7675 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7677 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7678 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7681 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7686 if (!COMPARISON_CLASS_P (cond
))
7689 lhs
= TREE_OPERAND (cond
, 0);
7690 rhs
= TREE_OPERAND (cond
, 1);
7692 if (TREE_CODE (lhs
) == SSA_NAME
)
7694 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7695 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7698 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7699 && TREE_CODE (lhs
) != FIXED_CST
)
7702 if (TREE_CODE (rhs
) == SSA_NAME
)
7704 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7705 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7708 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7709 && TREE_CODE (rhs
) != FIXED_CST
)
7712 if (vectype1
&& vectype2
7713 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7716 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7720 /* vectorizable_condition.
7722 Check if STMT is conditional modify expression that can be vectorized.
7723 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7724 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7727 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7728 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7729 else clause if it is 2).
7731 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7734 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7735 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7738 tree scalar_dest
= NULL_TREE
;
7739 tree vec_dest
= NULL_TREE
;
7740 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7741 tree then_clause
, else_clause
;
7742 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7743 tree comp_vectype
= NULL_TREE
;
7744 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7745 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7748 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7749 enum vect_def_type dt
, dts
[4];
7751 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7752 stmt_vec_info prev_stmt_info
= NULL
;
7754 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7755 vec
<tree
> vec_oprnds0
= vNULL
;
7756 vec
<tree
> vec_oprnds1
= vNULL
;
7757 vec
<tree
> vec_oprnds2
= vNULL
;
7758 vec
<tree
> vec_oprnds3
= vNULL
;
7760 bool masked
= false;
7762 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7765 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7767 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7770 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7771 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7775 /* FORNOW: not yet supported. */
7776 if (STMT_VINFO_LIVE_P (stmt_info
))
7778 if (dump_enabled_p ())
7779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7780 "value used after loop.\n");
7785 /* Is vectorizable conditional operation? */
7786 if (!is_gimple_assign (stmt
))
7789 code
= gimple_assign_rhs_code (stmt
);
7791 if (code
!= COND_EXPR
)
7794 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7795 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7796 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7801 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7803 gcc_assert (ncopies
>= 1);
7804 if (reduc_index
&& ncopies
> 1)
7805 return false; /* FORNOW */
7807 cond_expr
= gimple_assign_rhs1 (stmt
);
7808 then_clause
= gimple_assign_rhs2 (stmt
);
7809 else_clause
= gimple_assign_rhs3 (stmt
);
7811 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7816 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7819 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7823 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7826 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7829 masked
= !COMPARISON_CLASS_P (cond_expr
);
7830 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7832 if (vec_cmp_type
== NULL_TREE
)
7835 cond_code
= TREE_CODE (cond_expr
);
7838 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7839 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7842 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7844 /* Boolean values may have another representation in vectors
7845 and therefore we prefer bit operations over comparison for
7846 them (which also works for scalar masks). We store opcodes
7847 to use in bitop1 and bitop2. Statement is vectorized as
7848 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7849 depending on bitop1 and bitop2 arity. */
7853 bitop1
= BIT_NOT_EXPR
;
7854 bitop2
= BIT_AND_EXPR
;
7857 bitop1
= BIT_NOT_EXPR
;
7858 bitop2
= BIT_IOR_EXPR
;
7861 bitop1
= BIT_NOT_EXPR
;
7862 bitop2
= BIT_AND_EXPR
;
7863 std::swap (cond_expr0
, cond_expr1
);
7866 bitop1
= BIT_NOT_EXPR
;
7867 bitop2
= BIT_IOR_EXPR
;
7868 std::swap (cond_expr0
, cond_expr1
);
7871 bitop1
= BIT_XOR_EXPR
;
7874 bitop1
= BIT_XOR_EXPR
;
7875 bitop2
= BIT_NOT_EXPR
;
7880 cond_code
= SSA_NAME
;
7885 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7886 if (bitop1
!= NOP_EXPR
)
7888 machine_mode mode
= TYPE_MODE (comp_vectype
);
7891 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
7892 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7895 if (bitop2
!= NOP_EXPR
)
7897 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
7899 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7903 return expand_vec_cond_expr_p (vectype
, comp_vectype
,
7911 vec_oprnds0
.create (1);
7912 vec_oprnds1
.create (1);
7913 vec_oprnds2
.create (1);
7914 vec_oprnds3
.create (1);
7918 scalar_dest
= gimple_assign_lhs (stmt
);
7919 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7921 /* Handle cond expr. */
7922 for (j
= 0; j
< ncopies
; j
++)
7924 gassign
*new_stmt
= NULL
;
7929 auto_vec
<tree
, 4> ops
;
7930 auto_vec
<vec
<tree
>, 4> vec_defs
;
7933 ops
.safe_push (cond_expr
);
7936 ops
.safe_push (cond_expr0
);
7937 ops
.safe_push (cond_expr1
);
7939 ops
.safe_push (then_clause
);
7940 ops
.safe_push (else_clause
);
7941 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7942 vec_oprnds3
= vec_defs
.pop ();
7943 vec_oprnds2
= vec_defs
.pop ();
7945 vec_oprnds1
= vec_defs
.pop ();
7946 vec_oprnds0
= vec_defs
.pop ();
7954 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7956 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7962 = vect_get_vec_def_for_operand (cond_expr0
,
7963 stmt
, comp_vectype
);
7964 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
7967 = vect_get_vec_def_for_operand (cond_expr1
,
7968 stmt
, comp_vectype
);
7969 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
7971 if (reduc_index
== 1)
7972 vec_then_clause
= reduc_def
;
7975 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7977 vect_is_simple_use (then_clause
, loop_vinfo
,
7980 if (reduc_index
== 2)
7981 vec_else_clause
= reduc_def
;
7984 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7986 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7993 = vect_get_vec_def_for_stmt_copy (dts
[0],
7994 vec_oprnds0
.pop ());
7997 = vect_get_vec_def_for_stmt_copy (dts
[1],
7998 vec_oprnds1
.pop ());
8000 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8001 vec_oprnds2
.pop ());
8002 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8003 vec_oprnds3
.pop ());
8008 vec_oprnds0
.quick_push (vec_cond_lhs
);
8010 vec_oprnds1
.quick_push (vec_cond_rhs
);
8011 vec_oprnds2
.quick_push (vec_then_clause
);
8012 vec_oprnds3
.quick_push (vec_else_clause
);
8015 /* Arguments are ready. Create the new vector stmt. */
8016 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8018 vec_then_clause
= vec_oprnds2
[i
];
8019 vec_else_clause
= vec_oprnds3
[i
];
8022 vec_compare
= vec_cond_lhs
;
8025 vec_cond_rhs
= vec_oprnds1
[i
];
8026 if (bitop1
== NOP_EXPR
)
8027 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8028 vec_cond_lhs
, vec_cond_rhs
);
8031 new_temp
= make_ssa_name (vec_cmp_type
);
8032 if (bitop1
== BIT_NOT_EXPR
)
8033 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8037 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8039 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8040 if (bitop2
== NOP_EXPR
)
8041 vec_compare
= new_temp
;
8042 else if (bitop2
== BIT_NOT_EXPR
)
8044 /* Instead of doing ~x ? y : z do x ? z : y. */
8045 vec_compare
= new_temp
;
8046 std::swap (vec_then_clause
, vec_else_clause
);
8050 vec_compare
= make_ssa_name (vec_cmp_type
);
8052 = gimple_build_assign (vec_compare
, bitop2
,
8053 vec_cond_lhs
, new_temp
);
8054 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8058 new_temp
= make_ssa_name (vec_dest
);
8059 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8060 vec_compare
, vec_then_clause
,
8062 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8064 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8071 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8073 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8075 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8078 vec_oprnds0
.release ();
8079 vec_oprnds1
.release ();
8080 vec_oprnds2
.release ();
8081 vec_oprnds3
.release ();
8086 /* vectorizable_comparison.
8088 Check if STMT is comparison expression that can be vectorized.
8089 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8090 comparison, put it in VEC_STMT, and insert it at GSI.
8092 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8095 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8096 gimple
**vec_stmt
, tree reduc_def
,
8099 tree lhs
, rhs1
, rhs2
;
8100 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8101 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8102 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8103 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8105 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8106 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8109 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8110 stmt_vec_info prev_stmt_info
= NULL
;
8112 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8113 vec
<tree
> vec_oprnds0
= vNULL
;
8114 vec
<tree
> vec_oprnds1
= vNULL
;
8119 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8122 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8125 mask_type
= vectype
;
8126 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8131 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
8133 gcc_assert (ncopies
>= 1);
8134 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8135 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8139 if (STMT_VINFO_LIVE_P (stmt_info
))
8141 if (dump_enabled_p ())
8142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8143 "value used after loop.\n");
8147 if (!is_gimple_assign (stmt
))
8150 code
= gimple_assign_rhs_code (stmt
);
8152 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8155 rhs1
= gimple_assign_rhs1 (stmt
);
8156 rhs2
= gimple_assign_rhs2 (stmt
);
8158 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8159 &dts
[0], &vectype1
))
8162 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8163 &dts
[1], &vectype2
))
8166 if (vectype1
&& vectype2
8167 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8170 vectype
= vectype1
? vectype1
: vectype2
;
8172 /* Invariant comparison. */
8175 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8176 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8179 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8182 /* Can't compare mask and non-mask types. */
8183 if (vectype1
&& vectype2
8184 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8187 /* Boolean values may have another representation in vectors
8188 and therefore we prefer bit operations over comparison for
8189 them (which also works for scalar masks). We store opcodes
8190 to use in bitop1 and bitop2. Statement is vectorized as
8191 BITOP2 (rhs1 BITOP1 rhs2) or
8192 rhs1 BITOP2 (BITOP1 rhs2)
8193 depending on bitop1 and bitop2 arity. */
8194 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8196 if (code
== GT_EXPR
)
8198 bitop1
= BIT_NOT_EXPR
;
8199 bitop2
= BIT_AND_EXPR
;
8201 else if (code
== GE_EXPR
)
8203 bitop1
= BIT_NOT_EXPR
;
8204 bitop2
= BIT_IOR_EXPR
;
8206 else if (code
== LT_EXPR
)
8208 bitop1
= BIT_NOT_EXPR
;
8209 bitop2
= BIT_AND_EXPR
;
8210 std::swap (rhs1
, rhs2
);
8211 std::swap (dts
[0], dts
[1]);
8213 else if (code
== LE_EXPR
)
8215 bitop1
= BIT_NOT_EXPR
;
8216 bitop2
= BIT_IOR_EXPR
;
8217 std::swap (rhs1
, rhs2
);
8218 std::swap (dts
[0], dts
[1]);
8222 bitop1
= BIT_XOR_EXPR
;
8223 if (code
== EQ_EXPR
)
8224 bitop2
= BIT_NOT_EXPR
;
8230 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8231 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8233 if (bitop1
== NOP_EXPR
)
8234 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8237 machine_mode mode
= TYPE_MODE (vectype
);
8240 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8241 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8244 if (bitop2
!= NOP_EXPR
)
8246 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8247 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8257 vec_oprnds0
.create (1);
8258 vec_oprnds1
.create (1);
8262 lhs
= gimple_assign_lhs (stmt
);
8263 mask
= vect_create_destination_var (lhs
, mask_type
);
8265 /* Handle cmp expr. */
8266 for (j
= 0; j
< ncopies
; j
++)
8268 gassign
*new_stmt
= NULL
;
8273 auto_vec
<tree
, 2> ops
;
8274 auto_vec
<vec
<tree
>, 2> vec_defs
;
8276 ops
.safe_push (rhs1
);
8277 ops
.safe_push (rhs2
);
8278 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
8279 vec_oprnds1
= vec_defs
.pop ();
8280 vec_oprnds0
= vec_defs
.pop ();
8284 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8285 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8290 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8291 vec_oprnds0
.pop ());
8292 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8293 vec_oprnds1
.pop ());
8298 vec_oprnds0
.quick_push (vec_rhs1
);
8299 vec_oprnds1
.quick_push (vec_rhs2
);
8302 /* Arguments are ready. Create the new vector stmt. */
8303 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8305 vec_rhs2
= vec_oprnds1
[i
];
8307 new_temp
= make_ssa_name (mask
);
8308 if (bitop1
== NOP_EXPR
)
8310 new_stmt
= gimple_build_assign (new_temp
, code
,
8311 vec_rhs1
, vec_rhs2
);
8312 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8316 if (bitop1
== BIT_NOT_EXPR
)
8317 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8319 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8321 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8322 if (bitop2
!= NOP_EXPR
)
8324 tree res
= make_ssa_name (mask
);
8325 if (bitop2
== BIT_NOT_EXPR
)
8326 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8328 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8330 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8334 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8341 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8343 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8345 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8348 vec_oprnds0
.release ();
8349 vec_oprnds1
.release ();
8354 /* Make sure the statement is vectorizable. */
8357 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8359 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8360 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8361 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8363 tree scalar_type
, vectype
;
8364 gimple
*pattern_stmt
;
8365 gimple_seq pattern_def_seq
;
8367 if (dump_enabled_p ())
8369 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8370 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8373 if (gimple_has_volatile_ops (stmt
))
8375 if (dump_enabled_p ())
8376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8377 "not vectorized: stmt has volatile operands\n");
8382 /* Skip stmts that do not need to be vectorized. In loops this is expected
8384 - the COND_EXPR which is the loop exit condition
8385 - any LABEL_EXPRs in the loop
8386 - computations that are used only for array indexing or loop control.
8387 In basic blocks we only analyze statements that are a part of some SLP
8388 instance, therefore, all the statements are relevant.
8390 Pattern statement needs to be analyzed instead of the original statement
8391 if the original statement is not relevant. Otherwise, we analyze both
8392 statements. In basic blocks we are called from some SLP instance
8393 traversal, don't analyze pattern stmts instead, the pattern stmts
8394 already will be part of SLP instance. */
8396 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8397 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8398 && !STMT_VINFO_LIVE_P (stmt_info
))
8400 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8402 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8403 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8405 /* Analyze PATTERN_STMT instead of the original stmt. */
8406 stmt
= pattern_stmt
;
8407 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8408 if (dump_enabled_p ())
8410 dump_printf_loc (MSG_NOTE
, vect_location
,
8411 "==> examining pattern statement: ");
8412 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8417 if (dump_enabled_p ())
8418 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8423 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8426 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8427 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8429 /* Analyze PATTERN_STMT too. */
8430 if (dump_enabled_p ())
8432 dump_printf_loc (MSG_NOTE
, vect_location
,
8433 "==> examining pattern statement: ");
8434 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8437 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8441 if (is_pattern_stmt_p (stmt_info
)
8443 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8445 gimple_stmt_iterator si
;
8447 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8449 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8450 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8451 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8453 /* Analyze def stmt of STMT if it's a pattern stmt. */
8454 if (dump_enabled_p ())
8456 dump_printf_loc (MSG_NOTE
, vect_location
,
8457 "==> examining pattern def statement: ");
8458 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8461 if (!vect_analyze_stmt (pattern_def_stmt
,
8462 need_to_vectorize
, node
))
8468 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8470 case vect_internal_def
:
8473 case vect_reduction_def
:
8474 case vect_nested_cycle
:
8475 gcc_assert (!bb_vinfo
8476 && (relevance
== vect_used_in_outer
8477 || relevance
== vect_used_in_outer_by_reduction
8478 || relevance
== vect_used_by_reduction
8479 || relevance
== vect_unused_in_scope
8480 || relevance
== vect_used_only_live
));
8483 case vect_induction_def
:
8484 case vect_constant_def
:
8485 case vect_external_def
:
8486 case vect_unknown_def_type
:
8493 gcc_assert (PURE_SLP_STMT (stmt_info
));
8495 /* Memory accesses already got their vector type assigned
8496 in vect_analyze_data_refs. */
8497 if (! STMT_VINFO_DATA_REF (stmt_info
))
8499 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8500 if (dump_enabled_p ())
8502 dump_printf_loc (MSG_NOTE
, vect_location
,
8503 "get vectype for scalar type: ");
8504 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8505 dump_printf (MSG_NOTE
, "\n");
8508 vectype
= get_vectype_for_scalar_type (scalar_type
);
8511 if (dump_enabled_p ())
8513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8514 "not SLPed: unsupported data-type ");
8515 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8517 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8522 if (dump_enabled_p ())
8524 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8525 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8526 dump_printf (MSG_NOTE
, "\n");
8529 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8533 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8535 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8536 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8537 || (is_gimple_call (stmt
)
8538 && gimple_call_lhs (stmt
) == NULL_TREE
));
8539 *need_to_vectorize
= true;
8542 if (PURE_SLP_STMT (stmt_info
) && !node
)
8544 dump_printf_loc (MSG_NOTE
, vect_location
,
8545 "handled only by SLP analysis\n");
8551 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8552 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8553 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8554 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8555 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8556 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8557 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8558 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8559 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8560 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8561 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8562 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8563 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8567 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8568 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8569 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8570 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8571 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8572 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8573 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8574 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8575 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8576 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8581 if (dump_enabled_p ())
8583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8584 "not vectorized: relevant stmt not ");
8585 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8586 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8595 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8596 need extra handling, except for vectorizable reductions. */
8597 if (STMT_VINFO_LIVE_P (stmt_info
)
8598 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8599 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8603 if (dump_enabled_p ())
8605 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8606 "not vectorized: live stmt not ");
8607 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8618 /* Function vect_transform_stmt.
8620 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8623 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8624 bool *grouped_store
, slp_tree slp_node
,
8625 slp_instance slp_node_instance
)
8627 bool is_store
= false;
8628 gimple
*vec_stmt
= NULL
;
8629 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8632 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8633 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8635 switch (STMT_VINFO_TYPE (stmt_info
))
8637 case type_demotion_vec_info_type
:
8638 case type_promotion_vec_info_type
:
8639 case type_conversion_vec_info_type
:
8640 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8644 case induc_vec_info_type
:
8645 gcc_assert (!slp_node
);
8646 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8650 case shift_vec_info_type
:
8651 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8655 case op_vec_info_type
:
8656 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8660 case assignment_vec_info_type
:
8661 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8665 case load_vec_info_type
:
8666 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8671 case store_vec_info_type
:
8672 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8674 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8676 /* In case of interleaving, the whole chain is vectorized when the
8677 last store in the chain is reached. Store stmts before the last
8678 one are skipped, and there vec_stmt_info shouldn't be freed
8680 *grouped_store
= true;
8681 if (STMT_VINFO_VEC_STMT (stmt_info
))
8688 case condition_vec_info_type
:
8689 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8693 case comparison_vec_info_type
:
8694 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8698 case call_vec_info_type
:
8699 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8700 stmt
= gsi_stmt (*gsi
);
8701 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8705 case call_simd_clone_vec_info_type
:
8706 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8707 stmt
= gsi_stmt (*gsi
);
8710 case reduc_vec_info_type
:
8711 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8716 if (!STMT_VINFO_LIVE_P (stmt_info
))
8718 if (dump_enabled_p ())
8719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8720 "stmt not supported.\n");
8725 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8726 This would break hybrid SLP vectorization. */
8728 gcc_assert (!vec_stmt
8729 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8731 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8732 is being vectorized, but outside the immediately enclosing loop. */
8734 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8735 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8736 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8737 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8738 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8739 || STMT_VINFO_RELEVANT (stmt_info
) ==
8740 vect_used_in_outer_by_reduction
))
8742 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8743 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8744 imm_use_iterator imm_iter
;
8745 use_operand_p use_p
;
8749 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_NOTE
, vect_location
,
8751 "Record the vdef for outer-loop vectorization.\n");
8753 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8754 (to be used when vectorizing outer-loop stmts that use the DEF of
8756 if (gimple_code (stmt
) == GIMPLE_PHI
)
8757 scalar_dest
= PHI_RESULT (stmt
);
8759 scalar_dest
= gimple_assign_lhs (stmt
);
8761 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8763 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8765 exit_phi
= USE_STMT (use_p
);
8766 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8771 /* Handle stmts whose DEF is used outside the loop-nest that is
8772 being vectorized. */
8777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8779 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8780 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8781 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8783 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8789 else if (STMT_VINFO_LIVE_P (stmt_info
)
8790 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8792 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8797 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8803 /* Remove a group of stores (for SLP or interleaving), free their
8807 vect_remove_stores (gimple
*first_stmt
)
8809 gimple
*next
= first_stmt
;
8811 gimple_stmt_iterator next_si
;
8815 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8817 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8818 if (is_pattern_stmt_p (stmt_info
))
8819 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8820 /* Free the attached stmt_vec_info and remove the stmt. */
8821 next_si
= gsi_for_stmt (next
);
8822 unlink_stmt_vdef (next
);
8823 gsi_remove (&next_si
, true);
8824 release_defs (next
);
8825 free_stmt_vec_info (next
);
8831 /* Function new_stmt_vec_info.
8833 Create and initialize a new stmt_vec_info struct for STMT. */
8836 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8839 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8841 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8842 STMT_VINFO_STMT (res
) = stmt
;
8844 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8845 STMT_VINFO_LIVE_P (res
) = false;
8846 STMT_VINFO_VECTYPE (res
) = NULL
;
8847 STMT_VINFO_VEC_STMT (res
) = NULL
;
8848 STMT_VINFO_VECTORIZABLE (res
) = true;
8849 STMT_VINFO_IN_PATTERN_P (res
) = false;
8850 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8851 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8852 STMT_VINFO_DATA_REF (res
) = NULL
;
8853 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8854 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8856 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8857 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8858 STMT_VINFO_DR_INIT (res
) = NULL
;
8859 STMT_VINFO_DR_STEP (res
) = NULL
;
8860 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8862 if (gimple_code (stmt
) == GIMPLE_PHI
8863 && is_loop_header_bb_p (gimple_bb (stmt
)))
8864 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8866 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8868 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8869 STMT_SLP_TYPE (res
) = loop_vect
;
8870 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8872 GROUP_FIRST_ELEMENT (res
) = NULL
;
8873 GROUP_NEXT_ELEMENT (res
) = NULL
;
8874 GROUP_SIZE (res
) = 0;
8875 GROUP_STORE_COUNT (res
) = 0;
8876 GROUP_GAP (res
) = 0;
8877 GROUP_SAME_DR_STMT (res
) = NULL
;
8883 /* Create a hash table for stmt_vec_info. */
8886 init_stmt_vec_info_vec (void)
8888 gcc_assert (!stmt_vec_info_vec
.exists ());
8889 stmt_vec_info_vec
.create (50);
8893 /* Free hash table for stmt_vec_info. */
8896 free_stmt_vec_info_vec (void)
8900 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8902 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8903 gcc_assert (stmt_vec_info_vec
.exists ());
8904 stmt_vec_info_vec
.release ();
8908 /* Free stmt vectorization related info. */
8911 free_stmt_vec_info (gimple
*stmt
)
8913 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8918 /* Check if this statement has a related "pattern stmt"
8919 (introduced by the vectorizer during the pattern recognition
8920 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8922 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8924 stmt_vec_info patt_info
8925 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8928 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8929 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8930 gimple_set_bb (patt_stmt
, NULL
);
8931 tree lhs
= gimple_get_lhs (patt_stmt
);
8932 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8933 release_ssa_name (lhs
);
8936 gimple_stmt_iterator si
;
8937 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8939 gimple
*seq_stmt
= gsi_stmt (si
);
8940 gimple_set_bb (seq_stmt
, NULL
);
8941 lhs
= gimple_get_lhs (seq_stmt
);
8942 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8943 release_ssa_name (lhs
);
8944 free_stmt_vec_info (seq_stmt
);
8947 free_stmt_vec_info (patt_stmt
);
8951 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8952 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8953 set_vinfo_for_stmt (stmt
, NULL
);
8958 /* Function get_vectype_for_scalar_type_and_size.
8960 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8964 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8966 tree orig_scalar_type
= scalar_type
;
8967 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8968 machine_mode simd_mode
;
8969 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8976 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8977 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8980 /* For vector types of elements whose mode precision doesn't
8981 match their types precision we use a element type of mode
8982 precision. The vectorization routines will have to make sure
8983 they support the proper result truncation/extension.
8984 We also make sure to build vector types with INTEGER_TYPE
8985 component type only. */
8986 if (INTEGRAL_TYPE_P (scalar_type
)
8987 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8988 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8989 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8990 TYPE_UNSIGNED (scalar_type
));
8992 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8993 When the component mode passes the above test simply use a type
8994 corresponding to that mode. The theory is that any use that
8995 would cause problems with this will disable vectorization anyway. */
8996 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8997 && !INTEGRAL_TYPE_P (scalar_type
))
8998 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9000 /* We can't build a vector type of elements with alignment bigger than
9002 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9003 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9004 TYPE_UNSIGNED (scalar_type
));
9006 /* If we felt back to using the mode fail if there was
9007 no scalar type for it. */
9008 if (scalar_type
== NULL_TREE
)
9011 /* If no size was supplied use the mode the target prefers. Otherwise
9012 lookup a vector mode of the specified size. */
9014 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9016 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
9017 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9021 vectype
= build_vector_type (scalar_type
, nunits
);
9023 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9024 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9027 /* Re-attach the address-space qualifier if we canonicalized the scalar
9029 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9030 return build_qualified_type
9031 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9036 unsigned int current_vector_size
;
9038 /* Function get_vectype_for_scalar_type.
9040 Returns the vector type corresponding to SCALAR_TYPE as supported
9044 get_vectype_for_scalar_type (tree scalar_type
)
9047 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9048 current_vector_size
);
9050 && current_vector_size
== 0)
9051 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9055 /* Function get_mask_type_for_scalar_type.
9057 Returns the mask type corresponding to a result of comparison
9058 of vectors of specified SCALAR_TYPE as supported by target. */
9061 get_mask_type_for_scalar_type (tree scalar_type
)
9063 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9068 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9069 current_vector_size
);
9072 /* Function get_same_sized_vectype
9074 Returns a vector type corresponding to SCALAR_TYPE of size
9075 VECTOR_TYPE if supported by the target. */
9078 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9080 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9081 return build_same_sized_truth_vector_type (vector_type
);
9083 return get_vectype_for_scalar_type_and_size
9084 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9087 /* Function vect_is_simple_use.
9090 VINFO - the vect info of the loop or basic block that is being vectorized.
9091 OPERAND - operand in the loop or bb.
9093 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9094 DT - the type of definition
9096 Returns whether a stmt with OPERAND can be vectorized.
9097 For loops, supportable operands are constants, loop invariants, and operands
9098 that are defined by the current iteration of the loop. Unsupportable
9099 operands are those that are defined by a previous iteration of the loop (as
9100 is the case in reduction/induction computations).
9101 For basic blocks, supportable operands are constants and bb invariants.
9102 For now, operands defined outside the basic block are not supported. */
9105 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9106 gimple
**def_stmt
, enum vect_def_type
*dt
)
9109 *dt
= vect_unknown_def_type
;
9111 if (dump_enabled_p ())
9113 dump_printf_loc (MSG_NOTE
, vect_location
,
9114 "vect_is_simple_use: operand ");
9115 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9116 dump_printf (MSG_NOTE
, "\n");
9119 if (CONSTANT_CLASS_P (operand
))
9121 *dt
= vect_constant_def
;
9125 if (is_gimple_min_invariant (operand
))
9127 *dt
= vect_external_def
;
9131 if (TREE_CODE (operand
) != SSA_NAME
)
9133 if (dump_enabled_p ())
9134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9139 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9141 *dt
= vect_external_def
;
9145 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9146 if (dump_enabled_p ())
9148 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9149 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9152 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9153 *dt
= vect_external_def
;
9156 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9157 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9160 if (dump_enabled_p ())
9162 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9165 case vect_uninitialized_def
:
9166 dump_printf (MSG_NOTE
, "uninitialized\n");
9168 case vect_constant_def
:
9169 dump_printf (MSG_NOTE
, "constant\n");
9171 case vect_external_def
:
9172 dump_printf (MSG_NOTE
, "external\n");
9174 case vect_internal_def
:
9175 dump_printf (MSG_NOTE
, "internal\n");
9177 case vect_induction_def
:
9178 dump_printf (MSG_NOTE
, "induction\n");
9180 case vect_reduction_def
:
9181 dump_printf (MSG_NOTE
, "reduction\n");
9183 case vect_double_reduction_def
:
9184 dump_printf (MSG_NOTE
, "double reduction\n");
9186 case vect_nested_cycle
:
9187 dump_printf (MSG_NOTE
, "nested cycle\n");
9189 case vect_unknown_def_type
:
9190 dump_printf (MSG_NOTE
, "unknown\n");
9195 if (*dt
== vect_unknown_def_type
)
9197 if (dump_enabled_p ())
9198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9199 "Unsupported pattern.\n");
9203 switch (gimple_code (*def_stmt
))
9210 if (dump_enabled_p ())
9211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9212 "unsupported defining stmt:\n");
9219 /* Function vect_is_simple_use.
9221 Same as vect_is_simple_use but also determines the vector operand
9222 type of OPERAND and stores it to *VECTYPE. If the definition of
9223 OPERAND is vect_uninitialized_def, vect_constant_def or
9224 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9225 is responsible to compute the best suited vector type for the
9229 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9230 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9232 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9235 /* Now get a vector type if the def is internal, otherwise supply
9236 NULL_TREE and leave it up to the caller to figure out a proper
9237 type for the use stmt. */
9238 if (*dt
== vect_internal_def
9239 || *dt
== vect_induction_def
9240 || *dt
== vect_reduction_def
9241 || *dt
== vect_double_reduction_def
9242 || *dt
== vect_nested_cycle
)
9244 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9246 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9247 && !STMT_VINFO_RELEVANT (stmt_info
)
9248 && !STMT_VINFO_LIVE_P (stmt_info
))
9249 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9251 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9252 gcc_assert (*vectype
!= NULL_TREE
);
9254 else if (*dt
== vect_uninitialized_def
9255 || *dt
== vect_constant_def
9256 || *dt
== vect_external_def
)
9257 *vectype
= NULL_TREE
;
9265 /* Function supportable_widening_operation
9267 Check whether an operation represented by the code CODE is a
9268 widening operation that is supported by the target platform in
9269 vector form (i.e., when operating on arguments of type VECTYPE_IN
9270 producing a result of type VECTYPE_OUT).
9272 Widening operations we currently support are NOP (CONVERT), FLOAT
9273 and WIDEN_MULT. This function checks if these operations are supported
9274 by the target platform either directly (via vector tree-codes), or via
9278 - CODE1 and CODE2 are codes of vector operations to be used when
9279 vectorizing the operation, if available.
9280 - MULTI_STEP_CVT determines the number of required intermediate steps in
9281 case of multi-step conversion (like char->short->int - in that case
9282 MULTI_STEP_CVT will be 1).
9283 - INTERM_TYPES contains the intermediate type required to perform the
9284 widening operation (short in the above example). */
9287 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9288 tree vectype_out
, tree vectype_in
,
9289 enum tree_code
*code1
, enum tree_code
*code2
,
9290 int *multi_step_cvt
,
9291 vec
<tree
> *interm_types
)
9293 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9294 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9295 struct loop
*vect_loop
= NULL
;
9296 machine_mode vec_mode
;
9297 enum insn_code icode1
, icode2
;
9298 optab optab1
, optab2
;
9299 tree vectype
= vectype_in
;
9300 tree wide_vectype
= vectype_out
;
9301 enum tree_code c1
, c2
;
9303 tree prev_type
, intermediate_type
;
9304 machine_mode intermediate_mode
, prev_mode
;
9305 optab optab3
, optab4
;
9307 *multi_step_cvt
= 0;
9309 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9313 case WIDEN_MULT_EXPR
:
9314 /* The result of a vectorized widening operation usually requires
9315 two vectors (because the widened results do not fit into one vector).
9316 The generated vector results would normally be expected to be
9317 generated in the same order as in the original scalar computation,
9318 i.e. if 8 results are generated in each vector iteration, they are
9319 to be organized as follows:
9320 vect1: [res1,res2,res3,res4],
9321 vect2: [res5,res6,res7,res8].
9323 However, in the special case that the result of the widening
9324 operation is used in a reduction computation only, the order doesn't
9325 matter (because when vectorizing a reduction we change the order of
9326 the computation). Some targets can take advantage of this and
9327 generate more efficient code. For example, targets like Altivec,
9328 that support widen_mult using a sequence of {mult_even,mult_odd}
9329 generate the following vectors:
9330 vect1: [res1,res3,res5,res7],
9331 vect2: [res2,res4,res6,res8].
9333 When vectorizing outer-loops, we execute the inner-loop sequentially
9334 (each vectorized inner-loop iteration contributes to VF outer-loop
9335 iterations in parallel). We therefore don't allow to change the
9336 order of the computation in the inner-loop during outer-loop
9338 /* TODO: Another case in which order doesn't *really* matter is when we
9339 widen and then contract again, e.g. (short)((int)x * y >> 8).
9340 Normally, pack_trunc performs an even/odd permute, whereas the
9341 repack from an even/odd expansion would be an interleave, which
9342 would be significantly simpler for e.g. AVX2. */
9343 /* In any case, in order to avoid duplicating the code below, recurse
9344 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9345 are properly set up for the caller. If we fail, we'll continue with
9346 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9348 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9349 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9350 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9351 stmt
, vectype_out
, vectype_in
,
9352 code1
, code2
, multi_step_cvt
,
9355 /* Elements in a vector with vect_used_by_reduction property cannot
9356 be reordered if the use chain with this property does not have the
9357 same operation. One such an example is s += a * b, where elements
9358 in a and b cannot be reordered. Here we check if the vector defined
9359 by STMT is only directly used in the reduction statement. */
9360 tree lhs
= gimple_assign_lhs (stmt
);
9361 use_operand_p dummy
;
9363 stmt_vec_info use_stmt_info
= NULL
;
9364 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9365 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9366 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9369 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9370 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9383 case VEC_WIDEN_MULT_EVEN_EXPR
:
9384 /* Support the recursion induced just above. */
9385 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9386 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9389 case WIDEN_LSHIFT_EXPR
:
9390 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9391 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9395 c1
= VEC_UNPACK_LO_EXPR
;
9396 c2
= VEC_UNPACK_HI_EXPR
;
9400 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9401 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9404 case FIX_TRUNC_EXPR
:
9405 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9406 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9407 computing the operation. */
9414 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9417 if (code
== FIX_TRUNC_EXPR
)
9419 /* The signedness is determined from output operand. */
9420 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9421 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9425 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9426 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9429 if (!optab1
|| !optab2
)
9432 vec_mode
= TYPE_MODE (vectype
);
9433 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9434 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9440 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9441 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9442 /* For scalar masks we may have different boolean
9443 vector types having the same QImode. Thus we
9444 add additional check for elements number. */
9445 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9446 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9447 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9449 /* Check if it's a multi-step conversion that can be done using intermediate
9452 prev_type
= vectype
;
9453 prev_mode
= vec_mode
;
9455 if (!CONVERT_EXPR_CODE_P (code
))
9458 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9459 intermediate steps in promotion sequence. We try
9460 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9462 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9463 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9465 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9466 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9469 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9470 current_vector_size
);
9471 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9476 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9477 TYPE_UNSIGNED (prev_type
));
9479 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9480 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9482 if (!optab3
|| !optab4
9483 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9484 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9485 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9486 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9487 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9488 == CODE_FOR_nothing
)
9489 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9490 == CODE_FOR_nothing
))
9493 interm_types
->quick_push (intermediate_type
);
9494 (*multi_step_cvt
)++;
9496 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9497 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9498 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9499 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9500 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9502 prev_type
= intermediate_type
;
9503 prev_mode
= intermediate_mode
;
9506 interm_types
->release ();
9511 /* Function supportable_narrowing_operation
9513 Check whether an operation represented by the code CODE is a
9514 narrowing operation that is supported by the target platform in
9515 vector form (i.e., when operating on arguments of type VECTYPE_IN
9516 and producing a result of type VECTYPE_OUT).
9518 Narrowing operations we currently support are NOP (CONVERT) and
9519 FIX_TRUNC. This function checks if these operations are supported by
9520 the target platform directly via vector tree-codes.
9523 - CODE1 is the code of a vector operation to be used when
9524 vectorizing the operation, if available.
9525 - MULTI_STEP_CVT determines the number of required intermediate steps in
9526 case of multi-step conversion (like int->short->char - in that case
9527 MULTI_STEP_CVT will be 1).
9528 - INTERM_TYPES contains the intermediate type required to perform the
9529 narrowing operation (short in the above example). */
9532 supportable_narrowing_operation (enum tree_code code
,
9533 tree vectype_out
, tree vectype_in
,
9534 enum tree_code
*code1
, int *multi_step_cvt
,
9535 vec
<tree
> *interm_types
)
9537 machine_mode vec_mode
;
9538 enum insn_code icode1
;
9539 optab optab1
, interm_optab
;
9540 tree vectype
= vectype_in
;
9541 tree narrow_vectype
= vectype_out
;
9543 tree intermediate_type
, prev_type
;
9544 machine_mode intermediate_mode
, prev_mode
;
9548 *multi_step_cvt
= 0;
9552 c1
= VEC_PACK_TRUNC_EXPR
;
9555 case FIX_TRUNC_EXPR
:
9556 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9560 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9561 tree code and optabs used for computing the operation. */
9568 if (code
== FIX_TRUNC_EXPR
)
9569 /* The signedness is determined from output operand. */
9570 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9572 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9577 vec_mode
= TYPE_MODE (vectype
);
9578 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9583 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9584 /* For scalar masks we may have different boolean
9585 vector types having the same QImode. Thus we
9586 add additional check for elements number. */
9587 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9588 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9589 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9591 /* Check if it's a multi-step conversion that can be done using intermediate
9593 prev_mode
= vec_mode
;
9594 prev_type
= vectype
;
9595 if (code
== FIX_TRUNC_EXPR
)
9596 uns
= TYPE_UNSIGNED (vectype_out
);
9598 uns
= TYPE_UNSIGNED (vectype
);
9600 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9601 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9602 costly than signed. */
9603 if (code
== FIX_TRUNC_EXPR
&& uns
)
9605 enum insn_code icode2
;
9608 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9610 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9611 if (interm_optab
!= unknown_optab
9612 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9613 && insn_data
[icode1
].operand
[0].mode
9614 == insn_data
[icode2
].operand
[0].mode
)
9617 optab1
= interm_optab
;
9622 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9623 intermediate steps in promotion sequence. We try
9624 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9625 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9626 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9628 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9629 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9632 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9633 current_vector_size
);
9634 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9639 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9641 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9644 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9645 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9646 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9647 == CODE_FOR_nothing
))
9650 interm_types
->quick_push (intermediate_type
);
9651 (*multi_step_cvt
)++;
9653 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9654 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9655 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9656 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9658 prev_mode
= intermediate_mode
;
9659 prev_type
= intermediate_type
;
9660 optab1
= interm_optab
;
9663 interm_types
->release ();