1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 stmt_info_for_cost si
= { count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
110 count
, kind
, stmt_info
, misalign
, where
);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
118 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
129 tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
155 tree array
, unsigned HOST_WIDE_INT n
)
160 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
161 build_int_cst (size_type_node
, n
),
162 NULL_TREE
, NULL_TREE
);
164 new_stmt
= gimple_build_assign (array_ref
, vect
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
177 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
191 enum vect_relevant relevant
, bool live_p
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
196 gimple
*pattern_stmt
;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE
, vect_location
,
201 "mark relevant %d, live %d: ", relevant
, live_p
);
202 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE
, vect_location
,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info
= vinfo_for_stmt (pattern_stmt
);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
224 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
225 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
229 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
230 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
231 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
233 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE
, vect_location
,
238 "already marked relevant/live.\n");
242 worklist
->safe_push (stmt
);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
257 if (!is_gimple_assign (stmt
))
260 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
262 enum vect_def_type dt
= vect_uninitialized_def
;
264 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
268 "use not simple.\n");
272 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
292 enum vect_relevant
*relevant
, bool *live_p
)
294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
296 imm_use_iterator imm_iter
;
300 *relevant
= vect_unused_in_scope
;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt
)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
306 != loop_exit_ctrl_vec_info_type
)
307 *relevant
= vect_used_in_scope
;
309 /* changing memory. */
310 if (gimple_code (stmt
) != GIMPLE_PHI
)
311 if (gimple_vdef (stmt
)
312 && !gimple_clobber_p (stmt
))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE
, vect_location
,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant
= vect_used_in_scope
;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
323 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
325 basic_block bb
= gimple_bb (USE_STMT (use_p
));
326 if (!flow_bb_inside_loop_p (loop
, bb
))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE
, vect_location
,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p
)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
338 gcc_assert (bb
== single_exit (loop
)->dest
);
345 if (*live_p
&& *relevant
== vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE
, vect_location
,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant
= vect_used_only_live
;
354 return (*live_p
|| *relevant
);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info
))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt
))
390 if (is_gimple_call (stmt
)
391 && gimple_call_internal_p (stmt
))
392 switch (gimple_call_internal_fn (stmt
))
395 operand
= gimple_call_arg (stmt
, 3);
400 operand
= gimple_call_arg (stmt
, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
412 operand
= gimple_assign_rhs1 (stmt
);
413 if (TREE_CODE (operand
) != SSA_NAME
)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
452 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
455 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
456 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
457 stmt_vec_info dstmt_vinfo
;
458 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
467 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt
|| gimple_nop_p (def_stmt
))
478 def_bb
= gimple_bb (def_stmt
);
479 if (!flow_bb_inside_loop_p (loop
, def_bb
))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
492 bb
= gimple_bb (stmt
);
493 if (gimple_code (stmt
) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
495 && gimple_code (def_stmt
) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
497 && bb
->loop_father
== def_bb
->loop_father
)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE
, vect_location
,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
503 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE
, vect_location
,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope
:
526 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
527 vect_used_in_scope
: vect_unused_in_scope
;
530 case vect_used_in_outer_by_reduction
:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
532 relevant
= vect_used_by_reduction
;
535 case vect_used_in_outer
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_in_scope
;
540 case vect_used_in_scope
:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE
, vect_location
,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope
:
564 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
566 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
569 case vect_used_by_reduction
:
570 case vect_used_only_live
:
571 relevant
= vect_used_in_outer_by_reduction
;
574 case vect_used_in_scope
:
575 relevant
= vect_used_in_outer
;
583 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
607 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
608 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
609 unsigned int nbbs
= loop
->num_nodes
;
610 gimple_stmt_iterator si
;
613 stmt_vec_info stmt_vinfo
;
617 enum vect_relevant relevant
;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE
, vect_location
,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec
<gimple
*, 64> worklist
;
625 /* 1. Init worklist. */
626 for (i
= 0; i
< nbbs
; i
++)
629 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
638 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
639 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
641 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
643 stmt
= gsi_stmt (si
);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
650 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
651 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
655 /* 2. Process_worklist */
656 while (worklist
.length () > 0)
661 stmt
= worklist
.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
671 stmt_vinfo
= vinfo_for_stmt (stmt
);
672 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
687 case vect_reduction_def
:
688 gcc_assert (relevant
!= vect_unused_in_scope
);
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_scope
691 && relevant
!= vect_used_by_reduction
692 && relevant
!= vect_used_only_live
)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
696 "unsupported use of reduction.\n");
701 case vect_nested_cycle
:
702 if (relevant
!= vect_unused_in_scope
703 && relevant
!= vect_used_in_outer_by_reduction
704 && relevant
!= vect_used_in_outer
)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
708 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def
:
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_by_reduction
717 && relevant
!= vect_used_only_live
)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
721 "unsupported use of double reduction.\n");
731 if (is_pattern_stmt_p (stmt_vinfo
))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt
))
738 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
739 tree op
= gimple_assign_rhs1 (stmt
);
742 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
744 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
745 relevant
, &worklist
, false)
746 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
747 relevant
, &worklist
, false))
751 for (; i
< gimple_num_ops (stmt
); i
++)
753 op
= gimple_op (stmt
, i
);
754 if (TREE_CODE (op
) == SSA_NAME
755 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
760 else if (is_gimple_call (stmt
))
762 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
764 tree arg
= gimple_call_arg (stmt
, i
);
765 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
772 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
774 tree op
= USE_FROM_PTR (use_p
);
775 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
782 gather_scatter_info gs_info
;
783 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
785 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
789 } /* while worklist */
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
802 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
803 enum vect_def_type
*dt
,
804 stmt_vector_for_cost
*prologue_cost_vec
,
805 stmt_vector_for_cost
*body_cost_vec
)
808 int inside_cost
= 0, prologue_cost
= 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info
))
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i
= 0; i
< 2; i
++)
816 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
817 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
818 stmt_info
, 0, vect_prologue
);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
822 stmt_info
, 0, vect_body
);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE
, vect_location
,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
838 enum vect_def_type
*dt
, int pwr
)
841 int inside_cost
= 0, prologue_cost
= 0;
842 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
843 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
844 void *target_cost_data
;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info
))
851 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
853 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
855 for (i
= 0; i
< pwr
+ 1; i
++)
857 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
859 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
860 vec_promote_demote
, stmt_info
, 0,
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i
= 0; i
< 2; i
++)
866 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
867 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
868 stmt_info
, 0, vect_prologue
);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE
, vect_location
,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
882 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
883 vect_memory_access_type memory_access_type
,
884 enum vect_def_type dt
, slp_tree slp_node
,
885 stmt_vector_for_cost
*prologue_cost_vec
,
886 stmt_vector_for_cost
*body_cost_vec
)
888 unsigned int inside_cost
= 0, prologue_cost
= 0;
889 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
890 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
891 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
893 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
894 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
895 stmt_info
, 0, vect_prologue
);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node
&& grouped_access_p
)
901 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
902 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
915 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
917 /* Uses a high and low interleave or shuffle operations for each
919 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
920 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
921 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
922 stmt_info
, 0, vect_body
);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE
, vect_location
,
926 "vect_model_store_cost: strided group_size = %d .\n",
930 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
931 /* Costs of the stores. */
932 if (memory_access_type
== VMAT_ELEMENTWISE
)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost
+= record_stmt_cost (body_cost_vec
,
935 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
936 scalar_store
, stmt_info
, 0, vect_body
);
938 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
940 if (memory_access_type
== VMAT_ELEMENTWISE
941 || memory_access_type
== VMAT_STRIDED_SLP
)
942 inside_cost
+= record_stmt_cost (body_cost_vec
,
943 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
944 vec_to_scalar
, stmt_info
, 0, vect_body
);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE
, vect_location
,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
953 /* Calculate cost of DR's memory access. */
955 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
956 unsigned int *inside_cost
,
957 stmt_vector_for_cost
*body_cost_vec
)
959 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
960 gimple
*stmt
= DR_STMT (dr
);
961 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
963 switch (alignment_support_scheme
)
967 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
968 vector_store
, stmt_info
, 0,
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: aligned.\n");
977 case dr_unaligned_supported
:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
981 unaligned_store
, stmt_info
,
982 DR_MISALIGNMENT (dr
), vect_body
);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_model_store_cost: unaligned supported by "
990 case dr_unaligned_unsupported
:
992 *inside_cost
= VECT_MAX_COST
;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
996 "vect_model_store_cost: unsupported access.\n");
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1014 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1015 vect_memory_access_type memory_access_type
,
1017 stmt_vector_for_cost
*prologue_cost_vec
,
1018 stmt_vector_for_cost
*body_cost_vec
)
1020 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1021 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1022 unsigned int inside_cost
= 0, prologue_cost
= 0;
1023 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node
&& grouped_access_p
)
1029 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1030 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1043 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1048 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1049 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1050 stmt_info
, 0, vect_body
);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE
, vect_location
,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1058 /* The loads themselves. */
1059 if (memory_access_type
== VMAT_ELEMENTWISE
)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1063 inside_cost
+= record_stmt_cost (body_cost_vec
,
1064 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1065 scalar_load
, stmt_info
, 0, vect_body
);
1068 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1069 &inside_cost
, &prologue_cost
,
1070 prologue_cost_vec
, body_cost_vec
, true);
1071 if (memory_access_type
== VMAT_ELEMENTWISE
1072 || memory_access_type
== VMAT_STRIDED_SLP
)
1073 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1074 stmt_info
, 0, vect_body
);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE
, vect_location
,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1083 /* Calculate cost of DR's memory access. */
1085 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1086 bool add_realign_cost
, unsigned int *inside_cost
,
1087 unsigned int *prologue_cost
,
1088 stmt_vector_for_cost
*prologue_cost_vec
,
1089 stmt_vector_for_cost
*body_cost_vec
,
1090 bool record_prologue_costs
)
1092 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1093 gimple
*stmt
= DR_STMT (dr
);
1094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1096 switch (alignment_support_scheme
)
1100 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: aligned.\n");
1109 case dr_unaligned_supported
:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1113 unaligned_load
, stmt_info
,
1114 DR_MISALIGNMENT (dr
), vect_body
);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE
, vect_location
,
1118 "vect_model_load_cost: unaligned supported by "
1123 case dr_explicit_realign
:
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1126 vector_load
, stmt_info
, 0, vect_body
);
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1128 vec_perm
, stmt_info
, 0, vect_body
);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1133 if (targetm
.vectorize
.builtin_mask_for_load
)
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: explicit realign\n");
1143 case dr_explicit_realign_optimized
:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned software "
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost
&& record_prologue_costs
)
1159 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1160 vector_stmt
, stmt_info
,
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1164 vector_stmt
, stmt_info
,
1168 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1169 stmt_info
, 0, vect_body
);
1170 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1171 stmt_info
, 0, vect_body
);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: explicit realign optimized"
1181 case dr_unaligned_unsupported
:
1183 *inside_cost
= VECT_MAX_COST
;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1187 "vect_model_load_cost: unsupported access.\n");
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1200 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1215 if (nested_in_vect_loop_p (loop
, stmt
))
1218 pe
= loop_preheader_edge (loop
);
1219 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1220 gcc_assert (!new_bb
);
1224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1226 gimple_stmt_iterator gsi_bb_start
;
1228 gcc_assert (bb_vinfo
);
1229 bb
= BB_VINFO_BB (bb_vinfo
);
1230 gsi_bb_start
= gsi_after_labels (bb
);
1231 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1262 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1263 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type
))
1269 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1270 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1272 if (CONSTANT_CLASS_P (val
))
1273 val
= integer_zerop (val
) ? false_val
: true_val
;
1276 new_temp
= make_ssa_name (TREE_TYPE (type
));
1277 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1278 val
, true_val
, false_val
);
1279 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1283 else if (CONSTANT_CLASS_P (val
))
1284 val
= fold_convert (TREE_TYPE (type
), val
);
1287 new_temp
= make_ssa_name (TREE_TYPE (type
));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1289 init_stmt
= gimple_build_assign (new_temp
,
1290 fold_build1 (VIEW_CONVERT_EXPR
,
1294 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1295 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1299 val
= build_vector_from_val (type
, val
);
1302 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1303 init_stmt
= gimple_build_assign (new_temp
, val
);
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1314 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1318 stmt_vec_info def_stmt_info
= NULL
;
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def
:
1324 case vect_external_def
:
1325 /* Code should use vect_get_vec_def_for_operand. */
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def
:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1334 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1335 /* Get vectorized pattern statement. */
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1339 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1341 gcc_assert (vec_stmt
);
1342 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1343 vec_oprnd
= PHI_RESULT (vec_stmt
);
1344 else if (is_gimple_call (vec_stmt
))
1345 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1347 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def
:
1353 case vect_double_reduction_def
:
1354 case vect_nested_cycle
:
1355 /* Code should use get_initial_def_for_reduction. */
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def
:
1361 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1365 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1366 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1367 vec_oprnd
= PHI_RESULT (vec_stmt
);
1369 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1392 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1395 enum vect_def_type dt
;
1397 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1398 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1405 dump_printf (MSG_NOTE
, "\n");
1408 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1409 gcc_assert (is_simple_use
);
1410 if (def_stmt
&& dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1416 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1418 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1422 vector_type
= vectype
;
1423 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1425 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1427 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1429 gcc_assert (vector_type
);
1430 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1433 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1496 gimple
*vec_stmt_for_operand
;
1497 stmt_vec_info def_stmt_info
;
1499 /* Do nothing; can reuse same def. */
1500 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1503 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1504 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1505 gcc_assert (def_stmt_info
);
1506 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1507 gcc_assert (vec_stmt_for_operand
);
1508 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1509 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1521 vec
<tree
> *vec_oprnds0
,
1522 vec
<tree
> *vec_oprnds1
)
1524 tree vec_oprnd
= vec_oprnds0
->pop ();
1526 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1527 vec_oprnds0
->quick_push (vec_oprnd
);
1529 if (vec_oprnds1
&& vec_oprnds1
->length ())
1531 vec_oprnd
= vec_oprnds1
->pop ();
1532 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1533 vec_oprnds1
->quick_push (vec_oprnd
);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1543 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1544 vec
<tree
> *vec_oprnds0
,
1545 vec
<tree
> *vec_oprnds1
,
1546 slp_tree slp_node
, int reduc_index
)
1550 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1551 auto_vec
<tree
> ops (nops
);
1552 auto_vec
<vec
<tree
> > vec_defs (nops
);
1554 ops
.quick_push (op0
);
1556 ops
.quick_push (op1
);
1558 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1560 *vec_oprnds0
= vec_defs
[0];
1562 *vec_oprnds1
= vec_defs
[1];
1568 vec_oprnds0
->create (1);
1569 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1570 vec_oprnds0
->quick_push (vec_oprnd
);
1574 vec_oprnds1
->create (1);
1575 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1576 vec_oprnds1
->quick_push (vec_oprnd
);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1587 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1588 gimple_stmt_iterator
*gsi
)
1590 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1591 vec_info
*vinfo
= stmt_info
->vinfo
;
1593 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1595 if (!gsi_end_p (*gsi
)
1596 && gimple_has_mem_ops (vec_stmt
))
1598 gimple
*at_stmt
= gsi_stmt (*gsi
);
1599 tree vuse
= gimple_vuse (at_stmt
);
1600 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1602 tree vdef
= gimple_vdef (at_stmt
);
1603 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1609 && ((is_gimple_assign (vec_stmt
)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1611 || (is_gimple_call (vec_stmt
)
1612 && !(gimple_call_flags (vec_stmt
)
1613 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1615 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1616 gimple_set_vdef (vec_stmt
, new_vdef
);
1617 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1621 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1623 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1631 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1637 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1638 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1647 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1648 tree vectype_out
, tree vectype_in
)
1651 if (internal_fn_p (cfn
))
1652 ifn
= as_internal_fn (cfn
);
1654 ifn
= associated_internal_fn (fndecl
);
1655 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1657 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1658 if (info
.vectorizable
)
1660 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1661 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1662 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1663 OPTIMIZE_FOR_SPEED
))
1671 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1672 gimple_stmt_iterator
*);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1679 compare_step_with_zero (gimple
*stmt
)
1681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1682 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1684 if (loop_vinfo
&& nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), stmt
))
1685 step
= STMT_VINFO_DR_STEP (stmt_info
);
1687 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1688 return tree_int_cst_compare (step
, size_zero_node
);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1695 perm_mask_for_reverse (tree vectype
)
1700 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1701 sel
= XALLOCAVEC (unsigned char, nunits
);
1703 for (i
= 0; i
< nunits
; ++i
)
1704 sel
[i
] = nunits
- 1 - i
;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
1708 return vect_gen_perm_mask_checked (vectype
, sel
);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1721 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1722 vec_load_store_type vls_type
,
1723 vect_memory_access_type
*memory_access_type
)
1725 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1726 vec_info
*vinfo
= stmt_info
->vinfo
;
1727 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1728 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1729 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1730 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1731 bool single_element_p
= (stmt
== first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info
));
1733 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1734 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p
= false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1753 if (STMT_VINFO_STRIDED_P (stmt_info
))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits
% group_size
== 0)
1759 *memory_access_type
= VMAT_STRIDED_SLP
;
1761 *memory_access_type
= VMAT_ELEMENTWISE
;
1765 overrun_p
= loop_vinfo
&& gap
!= 0;
1766 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1773 /* If the access is aligned an overrun is fine. */
1776 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
))))
1778 if (overrun_p
&& !can_overrun_p
)
1780 if (dump_enabled_p ())
1781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1782 "Peeling for outer loop is not supported\n");
1785 *memory_access_type
= VMAT_CONTIGUOUS
;
1790 /* We can always handle this case using elementwise accesses,
1791 but see if something more efficient is available. */
1792 *memory_access_type
= VMAT_ELEMENTWISE
;
1794 /* If there is a gap at the end of the group then these optimizations
1795 would access excess elements in the last iteration. */
1796 bool would_overrun_p
= (gap
!= 0);
1797 /* If the access is aligned an overrun is fine. */
1799 && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info
)))
1800 would_overrun_p
= false;
1801 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1802 && (can_overrun_p
|| !would_overrun_p
)
1803 && compare_step_with_zero (stmt
) > 0)
1805 /* First try using LOAD/STORE_LANES. */
1806 if (vls_type
== VLS_LOAD
1807 ? vect_load_lanes_supported (vectype
, group_size
)
1808 : vect_store_lanes_supported (vectype
, group_size
))
1810 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1811 overrun_p
= would_overrun_p
;
1814 /* If that fails, try using permuting loads. */
1815 if (*memory_access_type
== VMAT_ELEMENTWISE
1816 && (vls_type
== VLS_LOAD
1817 ? vect_grouped_load_supported (vectype
, single_element_p
,
1819 : vect_grouped_store_supported (vectype
, group_size
)))
1821 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1822 overrun_p
= would_overrun_p
;
1827 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1829 /* STMT is the leader of the group. Check the operands of all the
1830 stmts of the group. */
1831 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1834 gcc_assert (gimple_assign_single_p (next_stmt
));
1835 tree op
= gimple_assign_rhs1 (next_stmt
);
1837 enum vect_def_type dt
;
1838 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1842 "use not simple.\n");
1845 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1851 gcc_assert (can_overrun_p
);
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1854 "Data access with gaps requires scalar "
1856 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1862 /* A subroutine of get_load_store_type, with a subset of the same
1863 arguments. Handle the case where STMT is a load or store that
1864 accesses consecutive elements with a negative step. */
1866 static vect_memory_access_type
1867 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1868 vec_load_store_type vls_type
,
1869 unsigned int ncopies
)
1871 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1872 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1873 dr_alignment_support alignment_support_scheme
;
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1879 "multiple types with negative step.\n");
1880 return VMAT_ELEMENTWISE
;
1883 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1884 if (alignment_support_scheme
!= dr_aligned
1885 && alignment_support_scheme
!= dr_unaligned_supported
)
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1889 "negative step but alignment required.\n");
1890 return VMAT_ELEMENTWISE
;
1893 if (vls_type
== VLS_STORE_INVARIANT
)
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE
, vect_location
,
1897 "negative step with invariant source;"
1898 " no permute needed.\n");
1899 return VMAT_CONTIGUOUS_DOWN
;
1902 if (!perm_mask_for_reverse (vectype
))
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1906 "negative step and reversing not supported.\n");
1907 return VMAT_ELEMENTWISE
;
1910 return VMAT_CONTIGUOUS_REVERSE
;
1913 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1914 if there is a memory access type that the vectorized form can use,
1915 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1916 or scatters, fill in GS_INFO accordingly.
1918 SLP says whether we're performing SLP rather than loop vectorization.
1919 VECTYPE is the vector type that the vectorized statements will use.
1920 NCOPIES is the number of vector statements that will be needed. */
1923 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1924 vec_load_store_type vls_type
, unsigned int ncopies
,
1925 vect_memory_access_type
*memory_access_type
,
1926 gather_scatter_info
*gs_info
)
1928 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1929 vec_info
*vinfo
= stmt_info
->vinfo
;
1930 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1931 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1933 *memory_access_type
= VMAT_GATHER_SCATTER
;
1935 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1937 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1938 &gs_info
->offset_dt
,
1939 &gs_info
->offset_vectype
))
1941 if (dump_enabled_p ())
1942 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1943 "%s index use not simple.\n",
1944 vls_type
== VLS_LOAD
? "gather" : "scatter");
1948 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1950 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1951 memory_access_type
))
1954 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1957 *memory_access_type
= VMAT_ELEMENTWISE
;
1961 int cmp
= compare_step_with_zero (stmt
);
1963 *memory_access_type
= get_negative_load_store_type
1964 (stmt
, vectype
, vls_type
, ncopies
);
1967 gcc_assert (vls_type
== VLS_LOAD
);
1968 *memory_access_type
= VMAT_INVARIANT
;
1971 *memory_access_type
= VMAT_CONTIGUOUS
;
1974 /* FIXME: At the moment the cost model seems to underestimate the
1975 cost of using elementwise accesses. This check preserves the
1976 traditional behavior until that can be fixed. */
1977 if (*memory_access_type
== VMAT_ELEMENTWISE
1978 && !STMT_VINFO_STRIDED_P (stmt_info
))
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1982 "not falling back to elementwise accesses\n");
1988 /* Function vectorizable_mask_load_store.
1990 Check if STMT performs a conditional load or store that can be vectorized.
1991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1992 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1996 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1997 gimple
**vec_stmt
, slp_tree slp_node
)
1999 tree vec_dest
= NULL
;
2000 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2001 stmt_vec_info prev_stmt_info
;
2002 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2003 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2004 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2005 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2006 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2007 tree rhs_vectype
= NULL_TREE
;
2012 tree dataref_ptr
= NULL_TREE
;
2014 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2018 gather_scatter_info gs_info
;
2019 vec_load_store_type vls_type
;
2022 enum vect_def_type dt
;
2024 if (slp_node
!= NULL
)
2027 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2028 gcc_assert (ncopies
>= 1);
2030 mask
= gimple_call_arg (stmt
, 2);
2032 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2035 /* FORNOW. This restriction should be relaxed. */
2036 if (nested_in_vect_loop
&& ncopies
> 1)
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2040 "multiple types in nested loop.");
2044 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2047 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2051 if (!STMT_VINFO_DATA_REF (stmt_info
))
2054 elem_type
= TREE_TYPE (vectype
);
2056 if (TREE_CODE (mask
) != SSA_NAME
)
2059 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2063 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2065 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2066 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2069 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2071 tree rhs
= gimple_call_arg (stmt
, 3);
2072 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2074 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2075 vls_type
= VLS_STORE_INVARIANT
;
2077 vls_type
= VLS_STORE
;
2080 vls_type
= VLS_LOAD
;
2082 vect_memory_access_type memory_access_type
;
2083 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2084 &memory_access_type
, &gs_info
))
2087 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2089 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2091 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2092 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2094 if (dump_enabled_p ())
2095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2096 "masked gather with integer mask not supported.");
2100 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2104 "unsupported access type for masked %s.\n",
2105 vls_type
== VLS_LOAD
? "load" : "store");
2108 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2109 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2110 TYPE_MODE (mask_vectype
),
2111 vls_type
== VLS_LOAD
)
2113 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2116 if (!vec_stmt
) /* transformation not required. */
2118 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2119 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2120 if (vls_type
== VLS_LOAD
)
2121 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2124 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2125 dt
, NULL
, NULL
, NULL
);
2128 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2132 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2134 tree vec_oprnd0
= NULL_TREE
, op
;
2135 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2136 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2137 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2138 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2139 tree mask_perm_mask
= NULL_TREE
;
2140 edge pe
= loop_preheader_edge (loop
);
2143 enum { NARROW
, NONE
, WIDEN
} modifier
;
2144 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2146 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2147 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2148 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2149 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2150 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2151 scaletype
= TREE_VALUE (arglist
);
2152 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2153 && types_compatible_p (srctype
, masktype
));
2155 if (nunits
== gather_off_nunits
)
2157 else if (nunits
== gather_off_nunits
/ 2)
2159 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
2162 for (i
= 0; i
< gather_off_nunits
; ++i
)
2163 sel
[i
] = i
| nunits
;
2165 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2167 else if (nunits
== gather_off_nunits
* 2)
2169 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
2172 for (i
= 0; i
< nunits
; ++i
)
2173 sel
[i
] = i
< gather_off_nunits
2174 ? i
: i
+ nunits
- gather_off_nunits
;
2176 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2178 for (i
= 0; i
< nunits
; ++i
)
2179 sel
[i
] = i
| gather_off_nunits
;
2180 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2185 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2187 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2188 if (!is_gimple_min_invariant (ptr
))
2190 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2191 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2192 gcc_assert (!new_bb
);
2195 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2197 prev_stmt_info
= NULL
;
2198 for (j
= 0; j
< ncopies
; ++j
)
2200 if (modifier
== WIDEN
&& (j
& 1))
2201 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2202 perm_mask
, stmt
, gsi
);
2205 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2208 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2210 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2212 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2213 == TYPE_VECTOR_SUBPARTS (idxtype
));
2214 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2215 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2217 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2218 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2222 if (mask_perm_mask
&& (j
& 1))
2223 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2224 mask_perm_mask
, stmt
, gsi
);
2228 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2231 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2232 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2236 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2238 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2239 == TYPE_VECTOR_SUBPARTS (masktype
));
2240 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2241 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2243 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2244 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2250 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2253 if (!useless_type_conversion_p (vectype
, rettype
))
2255 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2256 == TYPE_VECTOR_SUBPARTS (rettype
));
2257 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2258 gimple_call_set_lhs (new_stmt
, op
);
2259 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2260 var
= make_ssa_name (vec_dest
);
2261 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2262 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2266 var
= make_ssa_name (vec_dest
, new_stmt
);
2267 gimple_call_set_lhs (new_stmt
, var
);
2270 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2272 if (modifier
== NARROW
)
2279 var
= permute_vec_elements (prev_res
, var
,
2280 perm_mask
, stmt
, gsi
);
2281 new_stmt
= SSA_NAME_DEF_STMT (var
);
2284 if (prev_stmt_info
== NULL
)
2285 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2287 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2288 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2291 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2293 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2295 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2296 stmt_info
= vinfo_for_stmt (stmt
);
2298 tree lhs
= gimple_call_lhs (stmt
);
2299 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2300 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2301 set_vinfo_for_stmt (stmt
, NULL
);
2302 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2303 gsi_replace (gsi
, new_stmt
, true);
2306 else if (vls_type
!= VLS_LOAD
)
2308 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2309 prev_stmt_info
= NULL
;
2310 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2311 for (i
= 0; i
< ncopies
; i
++)
2313 unsigned align
, misalign
;
2317 tree rhs
= gimple_call_arg (stmt
, 3);
2318 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2319 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2320 /* We should have catched mismatched types earlier. */
2321 gcc_assert (useless_type_conversion_p (vectype
,
2322 TREE_TYPE (vec_rhs
)));
2323 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2324 NULL_TREE
, &dummy
, gsi
,
2325 &ptr_incr
, false, &inv_p
);
2326 gcc_assert (!inv_p
);
2330 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2331 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2332 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2333 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2334 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2335 TYPE_SIZE_UNIT (vectype
));
2338 align
= TYPE_ALIGN_UNIT (vectype
);
2339 if (aligned_access_p (dr
))
2341 else if (DR_MISALIGNMENT (dr
) == -1)
2343 align
= TYPE_ALIGN_UNIT (elem_type
);
2347 misalign
= DR_MISALIGNMENT (dr
);
2348 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2350 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2351 misalign
? least_bit_hwi (misalign
) : align
);
2353 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2354 ptr
, vec_mask
, vec_rhs
);
2355 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2357 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2359 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2360 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2365 tree vec_mask
= NULL_TREE
;
2366 prev_stmt_info
= NULL
;
2367 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2368 for (i
= 0; i
< ncopies
; i
++)
2370 unsigned align
, misalign
;
2374 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2375 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2376 NULL_TREE
, &dummy
, gsi
,
2377 &ptr_incr
, false, &inv_p
);
2378 gcc_assert (!inv_p
);
2382 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2383 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2384 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2385 TYPE_SIZE_UNIT (vectype
));
2388 align
= TYPE_ALIGN_UNIT (vectype
);
2389 if (aligned_access_p (dr
))
2391 else if (DR_MISALIGNMENT (dr
) == -1)
2393 align
= TYPE_ALIGN_UNIT (elem_type
);
2397 misalign
= DR_MISALIGNMENT (dr
);
2398 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2400 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2401 misalign
? least_bit_hwi (misalign
) : align
);
2403 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2405 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2406 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2408 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2410 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2411 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2415 if (vls_type
== VLS_LOAD
)
2417 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2419 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2421 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2422 stmt_info
= vinfo_for_stmt (stmt
);
2424 tree lhs
= gimple_call_lhs (stmt
);
2425 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2426 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2427 set_vinfo_for_stmt (stmt
, NULL
);
2428 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2429 gsi_replace (gsi
, new_stmt
, true);
2435 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2438 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2439 gimple
**vec_stmt
, slp_tree slp_node
,
2440 tree vectype_in
, enum vect_def_type
*dt
)
2443 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2444 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2445 unsigned ncopies
, nunits
;
2447 op
= gimple_call_arg (stmt
, 0);
2448 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2449 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2451 /* Multiple types in SLP are handled by creating the appropriate number of
2452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2457 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2459 gcc_assert (ncopies
>= 1);
2461 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2466 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype
));
2467 unsigned char *elt
= elts
;
2468 unsigned word_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
) / nunits
;
2469 for (unsigned i
= 0; i
< nunits
; ++i
)
2470 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2471 *elt
++ = (i
+ 1) * word_bytes
- j
- 1;
2473 if (! can_vec_perm_p (TYPE_MODE (char_vectype
), false, elts
))
2478 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2482 if (! PURE_SLP_STMT (stmt_info
))
2484 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2485 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2486 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2487 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2492 tree
*telts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (char_vectype
));
2493 for (unsigned i
= 0; i
< TYPE_VECTOR_SUBPARTS (char_vectype
); ++i
)
2494 telts
[i
] = build_int_cst (char_type_node
, elts
[i
]);
2495 tree bswap_vconst
= build_vector (char_vectype
, telts
);
2498 vec
<tree
> vec_oprnds
= vNULL
;
2499 gimple
*new_stmt
= NULL
;
2500 stmt_vec_info prev_stmt_info
= NULL
;
2501 for (unsigned j
= 0; j
< ncopies
; j
++)
2505 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2507 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2509 /* Arguments are ready. create the new vector stmt. */
2512 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2514 tree tem
= make_ssa_name (char_vectype
);
2515 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2516 char_vectype
, vop
));
2517 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2518 tree tem2
= make_ssa_name (char_vectype
);
2519 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2520 tem
, tem
, bswap_vconst
);
2521 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2522 tem
= make_ssa_name (vectype
);
2523 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2525 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2527 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2534 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2536 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2538 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2541 vec_oprnds
.release ();
2545 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2546 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2547 in a single step. On success, store the binary pack code in
2551 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2552 tree_code
*convert_code
)
2554 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2555 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2559 int multi_step_cvt
= 0;
2560 auto_vec
<tree
, 8> interm_types
;
2561 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2562 &code
, &multi_step_cvt
,
2567 *convert_code
= code
;
2571 /* Function vectorizable_call.
2573 Check if GS performs a function call that can be vectorized.
2574 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2575 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2576 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2579 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2586 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2587 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2588 tree vectype_out
, vectype_in
;
2591 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2592 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2593 vec_info
*vinfo
= stmt_info
->vinfo
;
2594 tree fndecl
, new_temp
, rhs_type
;
2596 enum vect_def_type dt
[3]
2597 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2598 gimple
*new_stmt
= NULL
;
2600 vec
<tree
> vargs
= vNULL
;
2601 enum { NARROW
, NONE
, WIDEN
} modifier
;
2605 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2608 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2612 /* Is GS a vectorizable call? */
2613 stmt
= dyn_cast
<gcall
*> (gs
);
2617 if (gimple_call_internal_p (stmt
)
2618 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2619 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2620 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2623 if (gimple_call_lhs (stmt
) == NULL_TREE
2624 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2627 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2629 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2631 /* Process function arguments. */
2632 rhs_type
= NULL_TREE
;
2633 vectype_in
= NULL_TREE
;
2634 nargs
= gimple_call_num_args (stmt
);
2636 /* Bail out if the function has more than three arguments, we do not have
2637 interesting builtin functions to vectorize with more than two arguments
2638 except for fma. No arguments is also not good. */
2639 if (nargs
== 0 || nargs
> 3)
2642 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2643 if (gimple_call_internal_p (stmt
)
2644 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2647 rhs_type
= unsigned_type_node
;
2650 for (i
= 0; i
< nargs
; i
++)
2654 op
= gimple_call_arg (stmt
, i
);
2656 /* We can only handle calls with arguments of the same type. */
2658 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2662 "argument types differ.\n");
2666 rhs_type
= TREE_TYPE (op
);
2668 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2670 if (dump_enabled_p ())
2671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2672 "use not simple.\n");
2677 vectype_in
= opvectype
;
2679 && opvectype
!= vectype_in
)
2681 if (dump_enabled_p ())
2682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2683 "argument vector types differ.\n");
2687 /* If all arguments are external or constant defs use a vector type with
2688 the same size as the output vector type. */
2690 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2692 gcc_assert (vectype_in
);
2695 if (dump_enabled_p ())
2697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2698 "no vectype for scalar type ");
2699 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2700 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2707 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2708 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2709 if (nunits_in
== nunits_out
/ 2)
2711 else if (nunits_out
== nunits_in
)
2713 else if (nunits_out
== nunits_in
/ 2)
2718 /* We only handle functions that do not read or clobber memory. */
2719 if (gimple_vuse (stmt
))
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2723 "function reads from or writes to memory.\n");
2727 /* For now, we only vectorize functions if a target specific builtin
2728 is available. TODO -- in some cases, it might be profitable to
2729 insert the calls for pieces of the vector, in order to be able
2730 to vectorize other operations in the loop. */
2732 internal_fn ifn
= IFN_LAST
;
2733 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2734 tree callee
= gimple_call_fndecl (stmt
);
2736 /* First try using an internal function. */
2737 tree_code convert_code
= ERROR_MARK
;
2739 && (modifier
== NONE
2740 || (modifier
== NARROW
2741 && simple_integer_narrowing (vectype_out
, vectype_in
,
2743 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2746 /* If that fails, try asking for a target-specific built-in function. */
2747 if (ifn
== IFN_LAST
)
2749 if (cfn
!= CFN_LAST
)
2750 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2751 (cfn
, vectype_out
, vectype_in
);
2753 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2754 (callee
, vectype_out
, vectype_in
);
2757 if (ifn
== IFN_LAST
&& !fndecl
)
2759 if (cfn
== CFN_GOMP_SIMD_LANE
2762 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2763 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2764 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2765 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2767 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2768 { 0, 1, 2, ... vf - 1 } vector. */
2769 gcc_assert (nargs
== 0);
2771 else if (modifier
== NONE
2772 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2773 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2774 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2775 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2779 if (dump_enabled_p ())
2780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2781 "function is not vectorizable.\n");
2788 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2789 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2791 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2793 /* Sanity check: make sure that at least one copy of the vectorized stmt
2794 needs to be generated. */
2795 gcc_assert (ncopies
>= 1);
2797 if (!vec_stmt
) /* transformation not required. */
2799 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2800 if (dump_enabled_p ())
2801 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2803 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2804 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2805 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2806 vec_promote_demote
, stmt_info
, 0, vect_body
);
2813 if (dump_enabled_p ())
2814 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2817 scalar_dest
= gimple_call_lhs (stmt
);
2818 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2820 prev_stmt_info
= NULL
;
2821 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2823 tree prev_res
= NULL_TREE
;
2824 for (j
= 0; j
< ncopies
; ++j
)
2826 /* Build argument list for the vectorized call. */
2828 vargs
.create (nargs
);
2834 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2835 vec
<tree
> vec_oprnds0
;
2837 for (i
= 0; i
< nargs
; i
++)
2838 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2839 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2840 vec_oprnds0
= vec_defs
[0];
2842 /* Arguments are ready. Create the new vector stmt. */
2843 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2846 for (k
= 0; k
< nargs
; k
++)
2848 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2849 vargs
[k
] = vec_oprndsk
[i
];
2851 if (modifier
== NARROW
)
2853 tree half_res
= make_ssa_name (vectype_in
);
2854 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2855 gimple_call_set_lhs (new_stmt
, half_res
);
2856 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2859 prev_res
= half_res
;
2862 new_temp
= make_ssa_name (vec_dest
);
2863 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2864 prev_res
, half_res
);
2868 if (ifn
!= IFN_LAST
)
2869 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2871 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2872 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2873 gimple_call_set_lhs (new_stmt
, new_temp
);
2875 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2876 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2879 for (i
= 0; i
< nargs
; i
++)
2881 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2882 vec_oprndsi
.release ();
2887 for (i
= 0; i
< nargs
; i
++)
2889 op
= gimple_call_arg (stmt
, i
);
2892 = vect_get_vec_def_for_operand (op
, stmt
);
2895 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2897 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2900 vargs
.quick_push (vec_oprnd0
);
2903 if (gimple_call_internal_p (stmt
)
2904 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2906 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2908 for (k
= 0; k
< nunits_out
; ++k
)
2909 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2910 tree cst
= build_vector (vectype_out
, v
);
2912 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2913 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2914 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2915 new_temp
= make_ssa_name (vec_dest
);
2916 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2918 else if (modifier
== NARROW
)
2920 tree half_res
= make_ssa_name (vectype_in
);
2921 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2922 gimple_call_set_lhs (new_stmt
, half_res
);
2923 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2926 prev_res
= half_res
;
2929 new_temp
= make_ssa_name (vec_dest
);
2930 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2931 prev_res
, half_res
);
2935 if (ifn
!= IFN_LAST
)
2936 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2938 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2939 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2940 gimple_call_set_lhs (new_stmt
, new_temp
);
2942 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2944 if (j
== (modifier
== NARROW
? 1 : 0))
2945 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2947 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2949 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2952 else if (modifier
== NARROW
)
2954 for (j
= 0; j
< ncopies
; ++j
)
2956 /* Build argument list for the vectorized call. */
2958 vargs
.create (nargs
* 2);
2964 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2965 vec
<tree
> vec_oprnds0
;
2967 for (i
= 0; i
< nargs
; i
++)
2968 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2969 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2970 vec_oprnds0
= vec_defs
[0];
2972 /* Arguments are ready. Create the new vector stmt. */
2973 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2977 for (k
= 0; k
< nargs
; k
++)
2979 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2980 vargs
.quick_push (vec_oprndsk
[i
]);
2981 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2983 if (ifn
!= IFN_LAST
)
2984 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2986 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2987 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2988 gimple_call_set_lhs (new_stmt
, new_temp
);
2989 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2990 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2993 for (i
= 0; i
< nargs
; i
++)
2995 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2996 vec_oprndsi
.release ();
3001 for (i
= 0; i
< nargs
; i
++)
3003 op
= gimple_call_arg (stmt
, i
);
3007 = vect_get_vec_def_for_operand (op
, stmt
);
3009 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3013 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3015 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3017 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3020 vargs
.quick_push (vec_oprnd0
);
3021 vargs
.quick_push (vec_oprnd1
);
3024 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3025 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3026 gimple_call_set_lhs (new_stmt
, new_temp
);
3027 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3030 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3032 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3034 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3037 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3040 /* No current target implements this case. */
3045 /* The call in STMT might prevent it from being removed in dce.
3046 We however cannot remove it here, due to the way the ssa name
3047 it defines is mapped to the new definition. So just replace
3048 rhs of the statement with something harmless. */
3053 type
= TREE_TYPE (scalar_dest
);
3054 if (is_pattern_stmt_p (stmt_info
))
3055 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3057 lhs
= gimple_call_lhs (stmt
);
3059 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3060 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3061 set_vinfo_for_stmt (stmt
, NULL
);
3062 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3063 gsi_replace (gsi
, new_stmt
, false);
3069 struct simd_call_arg_info
3073 enum vect_def_type dt
;
3074 HOST_WIDE_INT linear_step
;
3076 bool simd_lane_linear
;
3079 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3080 is linear within simd lane (but not within whole loop), note it in
3084 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3085 struct simd_call_arg_info
*arginfo
)
3087 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3089 if (!is_gimple_assign (def_stmt
)
3090 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3091 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3094 tree base
= gimple_assign_rhs1 (def_stmt
);
3095 HOST_WIDE_INT linear_step
= 0;
3096 tree v
= gimple_assign_rhs2 (def_stmt
);
3097 while (TREE_CODE (v
) == SSA_NAME
)
3100 def_stmt
= SSA_NAME_DEF_STMT (v
);
3101 if (is_gimple_assign (def_stmt
))
3102 switch (gimple_assign_rhs_code (def_stmt
))
3105 t
= gimple_assign_rhs2 (def_stmt
);
3106 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3108 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3109 v
= gimple_assign_rhs1 (def_stmt
);
3112 t
= gimple_assign_rhs2 (def_stmt
);
3113 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3115 linear_step
= tree_to_shwi (t
);
3116 v
= gimple_assign_rhs1 (def_stmt
);
3119 t
= gimple_assign_rhs1 (def_stmt
);
3120 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3121 || (TYPE_PRECISION (TREE_TYPE (v
))
3122 < TYPE_PRECISION (TREE_TYPE (t
))))
3131 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3133 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3134 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3139 arginfo
->linear_step
= linear_step
;
3141 arginfo
->simd_lane_linear
= true;
3147 /* Function vectorizable_simd_clone_call.
3149 Check if STMT performs a function call that can be vectorized
3150 by calling a simd clone of the function.
3151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3156 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3157 gimple
**vec_stmt
, slp_tree slp_node
)
3162 tree vec_oprnd0
= NULL_TREE
;
3163 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3165 unsigned int nunits
;
3166 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3167 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3168 vec_info
*vinfo
= stmt_info
->vinfo
;
3169 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3170 tree fndecl
, new_temp
;
3172 gimple
*new_stmt
= NULL
;
3174 auto_vec
<simd_call_arg_info
> arginfo
;
3175 vec
<tree
> vargs
= vNULL
;
3177 tree lhs
, rtype
, ratype
;
3178 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3180 /* Is STMT a vectorizable call? */
3181 if (!is_gimple_call (stmt
))
3184 fndecl
= gimple_call_fndecl (stmt
);
3185 if (fndecl
== NULL_TREE
)
3188 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3189 if (node
== NULL
|| node
->simd_clones
== NULL
)
3192 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3195 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3199 if (gimple_call_lhs (stmt
)
3200 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3203 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3205 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3207 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3214 /* Process function arguments. */
3215 nargs
= gimple_call_num_args (stmt
);
3217 /* Bail out if the function has zero arguments. */
3221 arginfo
.reserve (nargs
, true);
3223 for (i
= 0; i
< nargs
; i
++)
3225 simd_call_arg_info thisarginfo
;
3228 thisarginfo
.linear_step
= 0;
3229 thisarginfo
.align
= 0;
3230 thisarginfo
.op
= NULL_TREE
;
3231 thisarginfo
.simd_lane_linear
= false;
3233 op
= gimple_call_arg (stmt
, i
);
3234 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3235 &thisarginfo
.vectype
)
3236 || thisarginfo
.dt
== vect_uninitialized_def
)
3238 if (dump_enabled_p ())
3239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3240 "use not simple.\n");
3244 if (thisarginfo
.dt
== vect_constant_def
3245 || thisarginfo
.dt
== vect_external_def
)
3246 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3248 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3250 /* For linear arguments, the analyze phase should have saved
3251 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3252 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3253 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3255 gcc_assert (vec_stmt
);
3256 thisarginfo
.linear_step
3257 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3259 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3260 thisarginfo
.simd_lane_linear
3261 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3262 == boolean_true_node
);
3263 /* If loop has been peeled for alignment, we need to adjust it. */
3264 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3265 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3266 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3268 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3269 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3270 tree opt
= TREE_TYPE (thisarginfo
.op
);
3271 bias
= fold_convert (TREE_TYPE (step
), bias
);
3272 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3274 = fold_build2 (POINTER_TYPE_P (opt
)
3275 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3276 thisarginfo
.op
, bias
);
3280 && thisarginfo
.dt
!= vect_constant_def
3281 && thisarginfo
.dt
!= vect_external_def
3283 && TREE_CODE (op
) == SSA_NAME
3284 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3286 && tree_fits_shwi_p (iv
.step
))
3288 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3289 thisarginfo
.op
= iv
.base
;
3291 else if ((thisarginfo
.dt
== vect_constant_def
3292 || thisarginfo
.dt
== vect_external_def
)
3293 && POINTER_TYPE_P (TREE_TYPE (op
)))
3294 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3295 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3297 if (POINTER_TYPE_P (TREE_TYPE (op
))
3298 && !thisarginfo
.linear_step
3300 && thisarginfo
.dt
!= vect_constant_def
3301 && thisarginfo
.dt
!= vect_external_def
3304 && TREE_CODE (op
) == SSA_NAME
)
3305 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3307 arginfo
.quick_push (thisarginfo
);
3310 unsigned int badness
= 0;
3311 struct cgraph_node
*bestn
= NULL
;
3312 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3313 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3315 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3316 n
= n
->simdclone
->next_clone
)
3318 unsigned int this_badness
= 0;
3319 if (n
->simdclone
->simdlen
3320 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3321 || n
->simdclone
->nargs
!= nargs
)
3323 if (n
->simdclone
->simdlen
3324 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3325 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3326 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3327 if (n
->simdclone
->inbranch
)
3328 this_badness
+= 2048;
3329 int target_badness
= targetm
.simd_clone
.usable (n
);
3330 if (target_badness
< 0)
3332 this_badness
+= target_badness
* 512;
3333 /* FORNOW: Have to add code to add the mask argument. */
3334 if (n
->simdclone
->inbranch
)
3336 for (i
= 0; i
< nargs
; i
++)
3338 switch (n
->simdclone
->args
[i
].arg_type
)
3340 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3341 if (!useless_type_conversion_p
3342 (n
->simdclone
->args
[i
].orig_type
,
3343 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3345 else if (arginfo
[i
].dt
== vect_constant_def
3346 || arginfo
[i
].dt
== vect_external_def
3347 || arginfo
[i
].linear_step
)
3350 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3351 if (arginfo
[i
].dt
!= vect_constant_def
3352 && arginfo
[i
].dt
!= vect_external_def
)
3355 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3356 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3357 if (arginfo
[i
].dt
== vect_constant_def
3358 || arginfo
[i
].dt
== vect_external_def
3359 || (arginfo
[i
].linear_step
3360 != n
->simdclone
->args
[i
].linear_step
))
3363 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3364 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3365 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3366 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3367 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3368 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3372 case SIMD_CLONE_ARG_TYPE_MASK
:
3375 if (i
== (size_t) -1)
3377 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3382 if (arginfo
[i
].align
)
3383 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3384 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3386 if (i
== (size_t) -1)
3388 if (bestn
== NULL
|| this_badness
< badness
)
3391 badness
= this_badness
;
3398 for (i
= 0; i
< nargs
; i
++)
3399 if ((arginfo
[i
].dt
== vect_constant_def
3400 || arginfo
[i
].dt
== vect_external_def
)
3401 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3404 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3406 if (arginfo
[i
].vectype
== NULL
3407 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3408 > bestn
->simdclone
->simdlen
))
3412 fndecl
= bestn
->decl
;
3413 nunits
= bestn
->simdclone
->simdlen
;
3414 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3416 /* If the function isn't const, only allow it in simd loops where user
3417 has asserted that at least nunits consecutive iterations can be
3418 performed using SIMD instructions. */
3419 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3420 && gimple_vuse (stmt
))
3423 /* Sanity check: make sure that at least one copy of the vectorized stmt
3424 needs to be generated. */
3425 gcc_assert (ncopies
>= 1);
3427 if (!vec_stmt
) /* transformation not required. */
3429 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3430 for (i
= 0; i
< nargs
; i
++)
3431 if ((bestn
->simdclone
->args
[i
].arg_type
3432 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3433 || (bestn
->simdclone
->args
[i
].arg_type
3434 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3436 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3438 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3439 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3440 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3441 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3442 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3443 tree sll
= arginfo
[i
].simd_lane_linear
3444 ? boolean_true_node
: boolean_false_node
;
3445 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3447 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3448 if (dump_enabled_p ())
3449 dump_printf_loc (MSG_NOTE
, vect_location
,
3450 "=== vectorizable_simd_clone_call ===\n");
3451 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3457 if (dump_enabled_p ())
3458 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3461 scalar_dest
= gimple_call_lhs (stmt
);
3462 vec_dest
= NULL_TREE
;
3467 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3468 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3469 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3472 rtype
= TREE_TYPE (ratype
);
3476 prev_stmt_info
= NULL
;
3477 for (j
= 0; j
< ncopies
; ++j
)
3479 /* Build argument list for the vectorized call. */
3481 vargs
.create (nargs
);
3485 for (i
= 0; i
< nargs
; i
++)
3487 unsigned int k
, l
, m
, o
;
3489 op
= gimple_call_arg (stmt
, i
);
3490 switch (bestn
->simdclone
->args
[i
].arg_type
)
3492 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3493 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3494 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3495 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3497 if (TYPE_VECTOR_SUBPARTS (atype
)
3498 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3500 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3501 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3502 / TYPE_VECTOR_SUBPARTS (atype
));
3503 gcc_assert ((k
& (k
- 1)) == 0);
3506 = vect_get_vec_def_for_operand (op
, stmt
);
3509 vec_oprnd0
= arginfo
[i
].op
;
3510 if ((m
& (k
- 1)) == 0)
3512 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3515 arginfo
[i
].op
= vec_oprnd0
;
3517 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3519 bitsize_int ((m
& (k
- 1)) * prec
));
3521 = gimple_build_assign (make_ssa_name (atype
),
3523 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3524 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3528 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3529 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3530 gcc_assert ((k
& (k
- 1)) == 0);
3531 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3533 vec_alloc (ctor_elts
, k
);
3536 for (l
= 0; l
< k
; l
++)
3538 if (m
== 0 && l
== 0)
3540 = vect_get_vec_def_for_operand (op
, stmt
);
3543 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3545 arginfo
[i
].op
= vec_oprnd0
;
3548 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3552 vargs
.safe_push (vec_oprnd0
);
3555 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3557 = gimple_build_assign (make_ssa_name (atype
),
3559 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3560 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3565 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3566 vargs
.safe_push (op
);
3568 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3569 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3574 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3579 edge pe
= loop_preheader_edge (loop
);
3580 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3581 gcc_assert (!new_bb
);
3583 if (arginfo
[i
].simd_lane_linear
)
3585 vargs
.safe_push (arginfo
[i
].op
);
3588 tree phi_res
= copy_ssa_name (op
);
3589 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3590 set_vinfo_for_stmt (new_phi
,
3591 new_stmt_vec_info (new_phi
, loop_vinfo
));
3592 add_phi_arg (new_phi
, arginfo
[i
].op
,
3593 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3595 = POINTER_TYPE_P (TREE_TYPE (op
))
3596 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3597 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3598 ? sizetype
: TREE_TYPE (op
);
3600 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3602 tree tcst
= wide_int_to_tree (type
, cst
);
3603 tree phi_arg
= copy_ssa_name (op
);
3605 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3606 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3607 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3608 set_vinfo_for_stmt (new_stmt
,
3609 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3610 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3612 arginfo
[i
].op
= phi_res
;
3613 vargs
.safe_push (phi_res
);
3618 = POINTER_TYPE_P (TREE_TYPE (op
))
3619 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3620 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3621 ? sizetype
: TREE_TYPE (op
);
3623 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3625 tree tcst
= wide_int_to_tree (type
, cst
);
3626 new_temp
= make_ssa_name (TREE_TYPE (op
));
3627 new_stmt
= gimple_build_assign (new_temp
, code
,
3628 arginfo
[i
].op
, tcst
);
3629 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3630 vargs
.safe_push (new_temp
);
3633 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3634 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3635 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3636 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3637 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3638 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3644 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3647 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3649 new_temp
= create_tmp_var (ratype
);
3650 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3651 == TYPE_VECTOR_SUBPARTS (rtype
))
3652 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3654 new_temp
= make_ssa_name (rtype
, new_stmt
);
3655 gimple_call_set_lhs (new_stmt
, new_temp
);
3657 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3661 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3664 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3665 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3666 gcc_assert ((k
& (k
- 1)) == 0);
3667 for (l
= 0; l
< k
; l
++)
3672 t
= build_fold_addr_expr (new_temp
);
3673 t
= build2 (MEM_REF
, vectype
, t
,
3674 build_int_cst (TREE_TYPE (t
),
3675 l
* prec
/ BITS_PER_UNIT
));
3678 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3679 size_int (prec
), bitsize_int (l
* prec
));
3681 = gimple_build_assign (make_ssa_name (vectype
), t
);
3682 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3683 if (j
== 0 && l
== 0)
3684 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3686 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3688 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3693 tree clobber
= build_constructor (ratype
, NULL
);
3694 TREE_THIS_VOLATILE (clobber
) = 1;
3695 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3696 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3700 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3702 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3703 / TYPE_VECTOR_SUBPARTS (rtype
));
3704 gcc_assert ((k
& (k
- 1)) == 0);
3705 if ((j
& (k
- 1)) == 0)
3706 vec_alloc (ret_ctor_elts
, k
);
3709 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3710 for (m
= 0; m
< o
; m
++)
3712 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3713 size_int (m
), NULL_TREE
, NULL_TREE
);
3715 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3716 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3717 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3718 gimple_assign_lhs (new_stmt
));
3720 tree clobber
= build_constructor (ratype
, NULL
);
3721 TREE_THIS_VOLATILE (clobber
) = 1;
3722 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3723 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3726 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3727 if ((j
& (k
- 1)) != k
- 1)
3729 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3731 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3732 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3734 if ((unsigned) j
== k
- 1)
3735 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3737 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3739 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3744 tree t
= build_fold_addr_expr (new_temp
);
3745 t
= build2 (MEM_REF
, vectype
, t
,
3746 build_int_cst (TREE_TYPE (t
), 0));
3748 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3749 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3750 tree clobber
= build_constructor (ratype
, NULL
);
3751 TREE_THIS_VOLATILE (clobber
) = 1;
3752 vect_finish_stmt_generation (stmt
,
3753 gimple_build_assign (new_temp
,
3759 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3761 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3763 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3768 /* The call in STMT might prevent it from being removed in dce.
3769 We however cannot remove it here, due to the way the ssa name
3770 it defines is mapped to the new definition. So just replace
3771 rhs of the statement with something harmless. */
3778 type
= TREE_TYPE (scalar_dest
);
3779 if (is_pattern_stmt_p (stmt_info
))
3780 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3782 lhs
= gimple_call_lhs (stmt
);
3783 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3786 new_stmt
= gimple_build_nop ();
3787 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3788 set_vinfo_for_stmt (stmt
, NULL
);
3789 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3790 gsi_replace (gsi
, new_stmt
, true);
3791 unlink_stmt_vdef (stmt
);
3797 /* Function vect_gen_widened_results_half
3799 Create a vector stmt whose code, type, number of arguments, and result
3800 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3801 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3802 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3803 needs to be created (DECL is a function-decl of a target-builtin).
3804 STMT is the original scalar stmt that we are vectorizing. */
3807 vect_gen_widened_results_half (enum tree_code code
,
3809 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3810 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3816 /* Generate half of the widened result: */
3817 if (code
== CALL_EXPR
)
3819 /* Target specific support */
3820 if (op_type
== binary_op
)
3821 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3823 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3824 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3825 gimple_call_set_lhs (new_stmt
, new_temp
);
3829 /* Generic support */
3830 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3831 if (op_type
!= binary_op
)
3833 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3834 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3835 gimple_assign_set_lhs (new_stmt
, new_temp
);
3837 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3843 /* Get vectorized definitions for loop-based vectorization. For the first
3844 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3845 scalar operand), and for the rest we get a copy with
3846 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3847 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3848 The vectors are collected into VEC_OPRNDS. */
3851 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3852 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3856 /* Get first vector operand. */
3857 /* All the vector operands except the very first one (that is scalar oprnd)
3859 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3860 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3862 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3864 vec_oprnds
->quick_push (vec_oprnd
);
3866 /* Get second vector operand. */
3867 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3868 vec_oprnds
->quick_push (vec_oprnd
);
3872 /* For conversion in multiple steps, continue to get operands
3875 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3879 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3880 For multi-step conversions store the resulting vectors and call the function
3884 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3885 int multi_step_cvt
, gimple
*stmt
,
3887 gimple_stmt_iterator
*gsi
,
3888 slp_tree slp_node
, enum tree_code code
,
3889 stmt_vec_info
*prev_stmt_info
)
3892 tree vop0
, vop1
, new_tmp
, vec_dest
;
3894 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3896 vec_dest
= vec_dsts
.pop ();
3898 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3900 /* Create demotion operation. */
3901 vop0
= (*vec_oprnds
)[i
];
3902 vop1
= (*vec_oprnds
)[i
+ 1];
3903 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3904 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3905 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3906 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3909 /* Store the resulting vector for next recursive call. */
3910 (*vec_oprnds
)[i
/2] = new_tmp
;
3913 /* This is the last step of the conversion sequence. Store the
3914 vectors in SLP_NODE or in vector info of the scalar statement
3915 (or in STMT_VINFO_RELATED_STMT chain). */
3917 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3920 if (!*prev_stmt_info
)
3921 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3923 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3925 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3930 /* For multi-step demotion operations we first generate demotion operations
3931 from the source type to the intermediate types, and then combine the
3932 results (stored in VEC_OPRNDS) in demotion operation to the destination
3936 /* At each level of recursion we have half of the operands we had at the
3938 vec_oprnds
->truncate ((i
+1)/2);
3939 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3940 stmt
, vec_dsts
, gsi
, slp_node
,
3941 VEC_PACK_TRUNC_EXPR
,
3945 vec_dsts
.quick_push (vec_dest
);
3949 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3950 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3951 the resulting vectors and call the function recursively. */
3954 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3955 vec
<tree
> *vec_oprnds1
,
3956 gimple
*stmt
, tree vec_dest
,
3957 gimple_stmt_iterator
*gsi
,
3958 enum tree_code code1
,
3959 enum tree_code code2
, tree decl1
,
3960 tree decl2
, int op_type
)
3963 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3964 gimple
*new_stmt1
, *new_stmt2
;
3965 vec
<tree
> vec_tmp
= vNULL
;
3967 vec_tmp
.create (vec_oprnds0
->length () * 2);
3968 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3970 if (op_type
== binary_op
)
3971 vop1
= (*vec_oprnds1
)[i
];
3975 /* Generate the two halves of promotion operation. */
3976 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3977 op_type
, vec_dest
, gsi
, stmt
);
3978 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3979 op_type
, vec_dest
, gsi
, stmt
);
3980 if (is_gimple_call (new_stmt1
))
3982 new_tmp1
= gimple_call_lhs (new_stmt1
);
3983 new_tmp2
= gimple_call_lhs (new_stmt2
);
3987 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3988 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3991 /* Store the results for the next step. */
3992 vec_tmp
.quick_push (new_tmp1
);
3993 vec_tmp
.quick_push (new_tmp2
);
3996 vec_oprnds0
->release ();
3997 *vec_oprnds0
= vec_tmp
;
4001 /* Check if STMT performs a conversion operation, that can be vectorized.
4002 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4003 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4004 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4007 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4008 gimple
**vec_stmt
, slp_tree slp_node
)
4012 tree op0
, op1
= NULL_TREE
;
4013 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4014 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4015 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4016 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4017 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4018 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4021 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4022 gimple
*new_stmt
= NULL
;
4023 stmt_vec_info prev_stmt_info
;
4026 tree vectype_out
, vectype_in
;
4028 tree lhs_type
, rhs_type
;
4029 enum { NARROW
, NONE
, WIDEN
} modifier
;
4030 vec
<tree
> vec_oprnds0
= vNULL
;
4031 vec
<tree
> vec_oprnds1
= vNULL
;
4033 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4034 vec_info
*vinfo
= stmt_info
->vinfo
;
4035 int multi_step_cvt
= 0;
4036 vec
<tree
> interm_types
= vNULL
;
4037 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4039 machine_mode rhs_mode
;
4040 unsigned short fltsz
;
4042 /* Is STMT a vectorizable conversion? */
4044 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4047 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4051 if (!is_gimple_assign (stmt
))
4054 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4057 code
= gimple_assign_rhs_code (stmt
);
4058 if (!CONVERT_EXPR_CODE_P (code
)
4059 && code
!= FIX_TRUNC_EXPR
4060 && code
!= FLOAT_EXPR
4061 && code
!= WIDEN_MULT_EXPR
4062 && code
!= WIDEN_LSHIFT_EXPR
)
4065 op_type
= TREE_CODE_LENGTH (code
);
4067 /* Check types of lhs and rhs. */
4068 scalar_dest
= gimple_assign_lhs (stmt
);
4069 lhs_type
= TREE_TYPE (scalar_dest
);
4070 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4072 op0
= gimple_assign_rhs1 (stmt
);
4073 rhs_type
= TREE_TYPE (op0
);
4075 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4076 && !((INTEGRAL_TYPE_P (lhs_type
)
4077 && INTEGRAL_TYPE_P (rhs_type
))
4078 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4079 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4082 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4083 && ((INTEGRAL_TYPE_P (lhs_type
)
4084 && (TYPE_PRECISION (lhs_type
)
4085 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
4086 || (INTEGRAL_TYPE_P (rhs_type
)
4087 && (TYPE_PRECISION (rhs_type
)
4088 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4092 "type conversion to/from bit-precision unsupported."
4097 /* Check the operands of the operation. */
4098 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4100 if (dump_enabled_p ())
4101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4102 "use not simple.\n");
4105 if (op_type
== binary_op
)
4109 op1
= gimple_assign_rhs2 (stmt
);
4110 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4111 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4113 if (CONSTANT_CLASS_P (op0
))
4114 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4116 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4120 if (dump_enabled_p ())
4121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4122 "use not simple.\n");
4127 /* If op0 is an external or constant defs use a vector type of
4128 the same size as the output vector type. */
4130 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4132 gcc_assert (vectype_in
);
4135 if (dump_enabled_p ())
4137 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4138 "no vectype for scalar type ");
4139 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4140 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4146 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4147 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4149 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4152 "can't convert between boolean and non "
4154 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4155 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4161 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4162 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4163 if (nunits_in
< nunits_out
)
4165 else if (nunits_out
== nunits_in
)
4170 /* Multiple types in SLP are handled by creating the appropriate number of
4171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4175 else if (modifier
== NARROW
)
4176 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4178 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4180 /* Sanity check: make sure that at least one copy of the vectorized stmt
4181 needs to be generated. */
4182 gcc_assert (ncopies
>= 1);
4184 /* Supportable by target? */
4188 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4190 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4195 if (dump_enabled_p ())
4196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4197 "conversion not supported by target.\n");
4201 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4202 &code1
, &code2
, &multi_step_cvt
,
4205 /* Binary widening operation can only be supported directly by the
4207 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4211 if (code
!= FLOAT_EXPR
4212 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4213 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4216 rhs_mode
= TYPE_MODE (rhs_type
);
4217 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
4218 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
4219 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
4220 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
4223 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4224 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4225 if (cvt_type
== NULL_TREE
)
4228 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4230 if (!supportable_convert_operation (code
, vectype_out
,
4231 cvt_type
, &decl1
, &codecvt1
))
4234 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4235 cvt_type
, &codecvt1
,
4236 &codecvt2
, &multi_step_cvt
,
4240 gcc_assert (multi_step_cvt
== 0);
4242 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4243 vectype_in
, &code1
, &code2
,
4244 &multi_step_cvt
, &interm_types
))
4248 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
4251 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4252 codecvt2
= ERROR_MARK
;
4256 interm_types
.safe_push (cvt_type
);
4257 cvt_type
= NULL_TREE
;
4262 gcc_assert (op_type
== unary_op
);
4263 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4264 &code1
, &multi_step_cvt
,
4268 if (code
!= FIX_TRUNC_EXPR
4269 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4270 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4273 rhs_mode
= TYPE_MODE (rhs_type
);
4275 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4276 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4277 if (cvt_type
== NULL_TREE
)
4279 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4282 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4283 &code1
, &multi_step_cvt
,
4292 if (!vec_stmt
) /* transformation not required. */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE
, vect_location
,
4296 "=== vectorizable_conversion ===\n");
4297 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4299 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4300 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4302 else if (modifier
== NARROW
)
4304 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4305 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4309 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4310 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4312 interm_types
.release ();
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_NOTE
, vect_location
,
4319 "transform conversion. ncopies = %d.\n", ncopies
);
4321 if (op_type
== binary_op
)
4323 if (CONSTANT_CLASS_P (op0
))
4324 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4325 else if (CONSTANT_CLASS_P (op1
))
4326 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4329 /* In case of multi-step conversion, we first generate conversion operations
4330 to the intermediate types, and then from that types to the final one.
4331 We create vector destinations for the intermediate type (TYPES) received
4332 from supportable_*_operation, and store them in the correct order
4333 for future use in vect_create_vectorized_*_stmts (). */
4334 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4335 vec_dest
= vect_create_destination_var (scalar_dest
,
4336 (cvt_type
&& modifier
== WIDEN
)
4337 ? cvt_type
: vectype_out
);
4338 vec_dsts
.quick_push (vec_dest
);
4342 for (i
= interm_types
.length () - 1;
4343 interm_types
.iterate (i
, &intermediate_type
); i
--)
4345 vec_dest
= vect_create_destination_var (scalar_dest
,
4347 vec_dsts
.quick_push (vec_dest
);
4352 vec_dest
= vect_create_destination_var (scalar_dest
,
4354 ? vectype_out
: cvt_type
);
4358 if (modifier
== WIDEN
)
4360 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4361 if (op_type
== binary_op
)
4362 vec_oprnds1
.create (1);
4364 else if (modifier
== NARROW
)
4365 vec_oprnds0
.create (
4366 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4368 else if (code
== WIDEN_LSHIFT_EXPR
)
4369 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4372 prev_stmt_info
= NULL
;
4376 for (j
= 0; j
< ncopies
; j
++)
4379 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4382 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4384 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4386 /* Arguments are ready, create the new vector stmt. */
4387 if (code1
== CALL_EXPR
)
4389 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4390 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4391 gimple_call_set_lhs (new_stmt
, new_temp
);
4395 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4396 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4397 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4398 gimple_assign_set_lhs (new_stmt
, new_temp
);
4401 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4403 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4406 if (!prev_stmt_info
)
4407 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4409 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4410 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4417 /* In case the vectorization factor (VF) is bigger than the number
4418 of elements that we can fit in a vectype (nunits), we have to
4419 generate more than one vector stmt - i.e - we need to "unroll"
4420 the vector stmt by a factor VF/nunits. */
4421 for (j
= 0; j
< ncopies
; j
++)
4428 if (code
== WIDEN_LSHIFT_EXPR
)
4433 /* Store vec_oprnd1 for every vector stmt to be created
4434 for SLP_NODE. We check during the analysis that all
4435 the shift arguments are the same. */
4436 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4437 vec_oprnds1
.quick_push (vec_oprnd1
);
4439 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4443 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4444 &vec_oprnds1
, slp_node
, -1);
4448 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4449 vec_oprnds0
.quick_push (vec_oprnd0
);
4450 if (op_type
== binary_op
)
4452 if (code
== WIDEN_LSHIFT_EXPR
)
4455 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4456 vec_oprnds1
.quick_push (vec_oprnd1
);
4462 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4463 vec_oprnds0
.truncate (0);
4464 vec_oprnds0
.quick_push (vec_oprnd0
);
4465 if (op_type
== binary_op
)
4467 if (code
== WIDEN_LSHIFT_EXPR
)
4470 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4472 vec_oprnds1
.truncate (0);
4473 vec_oprnds1
.quick_push (vec_oprnd1
);
4477 /* Arguments are ready. Create the new vector stmts. */
4478 for (i
= multi_step_cvt
; i
>= 0; i
--)
4480 tree this_dest
= vec_dsts
[i
];
4481 enum tree_code c1
= code1
, c2
= code2
;
4482 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4487 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4489 stmt
, this_dest
, gsi
,
4490 c1
, c2
, decl1
, decl2
,
4494 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4498 if (codecvt1
== CALL_EXPR
)
4500 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4501 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4502 gimple_call_set_lhs (new_stmt
, new_temp
);
4506 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4507 new_temp
= make_ssa_name (vec_dest
);
4508 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4512 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4515 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4518 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4521 if (!prev_stmt_info
)
4522 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4524 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4525 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4530 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4534 /* In case the vectorization factor (VF) is bigger than the number
4535 of elements that we can fit in a vectype (nunits), we have to
4536 generate more than one vector stmt - i.e - we need to "unroll"
4537 the vector stmt by a factor VF/nunits. */
4538 for (j
= 0; j
< ncopies
; j
++)
4542 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4546 vec_oprnds0
.truncate (0);
4547 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4548 vect_pow2 (multi_step_cvt
) - 1);
4551 /* Arguments are ready. Create the new vector stmts. */
4553 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4555 if (codecvt1
== CALL_EXPR
)
4557 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4558 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4559 gimple_call_set_lhs (new_stmt
, new_temp
);
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4564 new_temp
= make_ssa_name (vec_dest
);
4565 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4570 vec_oprnds0
[i
] = new_temp
;
4573 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4574 stmt
, vec_dsts
, gsi
,
4579 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4583 vec_oprnds0
.release ();
4584 vec_oprnds1
.release ();
4585 interm_types
.release ();
4591 /* Function vectorizable_assignment.
4593 Check if STMT performs an assignment (copy) that can be vectorized.
4594 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4595 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4596 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4599 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4600 gimple
**vec_stmt
, slp_tree slp_node
)
4605 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4606 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4609 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4612 vec
<tree
> vec_oprnds
= vNULL
;
4614 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4615 vec_info
*vinfo
= stmt_info
->vinfo
;
4616 gimple
*new_stmt
= NULL
;
4617 stmt_vec_info prev_stmt_info
= NULL
;
4618 enum tree_code code
;
4621 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4624 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4628 /* Is vectorizable assignment? */
4629 if (!is_gimple_assign (stmt
))
4632 scalar_dest
= gimple_assign_lhs (stmt
);
4633 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4636 code
= gimple_assign_rhs_code (stmt
);
4637 if (gimple_assign_single_p (stmt
)
4638 || code
== PAREN_EXPR
4639 || CONVERT_EXPR_CODE_P (code
))
4640 op
= gimple_assign_rhs1 (stmt
);
4644 if (code
== VIEW_CONVERT_EXPR
)
4645 op
= TREE_OPERAND (op
, 0);
4647 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4648 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4650 /* Multiple types in SLP are handled by creating the appropriate number of
4651 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4656 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4658 gcc_assert (ncopies
>= 1);
4660 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4664 "use not simple.\n");
4668 /* We can handle NOP_EXPR conversions that do not change the number
4669 of elements or the vector size. */
4670 if ((CONVERT_EXPR_CODE_P (code
)
4671 || code
== VIEW_CONVERT_EXPR
)
4673 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4674 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4675 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4678 /* We do not handle bit-precision changes. */
4679 if ((CONVERT_EXPR_CODE_P (code
)
4680 || code
== VIEW_CONVERT_EXPR
)
4681 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4682 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4683 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4684 || ((TYPE_PRECISION (TREE_TYPE (op
))
4685 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4686 /* But a conversion that does not change the bit-pattern is ok. */
4687 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4688 > TYPE_PRECISION (TREE_TYPE (op
)))
4689 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4690 /* Conversion between boolean types of different sizes is
4691 a simple assignment in case their vectypes are same
4693 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4694 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4696 if (dump_enabled_p ())
4697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4698 "type conversion to/from bit-precision "
4703 if (!vec_stmt
) /* transformation not required. */
4705 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4706 if (dump_enabled_p ())
4707 dump_printf_loc (MSG_NOTE
, vect_location
,
4708 "=== vectorizable_assignment ===\n");
4709 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4714 if (dump_enabled_p ())
4715 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4718 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4721 for (j
= 0; j
< ncopies
; j
++)
4725 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4727 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4729 /* Arguments are ready. create the new vector stmt. */
4730 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4732 if (CONVERT_EXPR_CODE_P (code
)
4733 || code
== VIEW_CONVERT_EXPR
)
4734 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4735 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4736 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4737 gimple_assign_set_lhs (new_stmt
, new_temp
);
4738 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4740 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4747 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4749 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4751 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4754 vec_oprnds
.release ();
4759 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4760 either as shift by a scalar or by a vector. */
4763 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4766 machine_mode vec_mode
;
4771 vectype
= get_vectype_for_scalar_type (scalar_type
);
4775 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4777 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4779 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4781 || (optab_handler (optab
, TYPE_MODE (vectype
))
4782 == CODE_FOR_nothing
))
4786 vec_mode
= TYPE_MODE (vectype
);
4787 icode
= (int) optab_handler (optab
, vec_mode
);
4788 if (icode
== CODE_FOR_nothing
)
4795 /* Function vectorizable_shift.
4797 Check if STMT performs a shift operation that can be vectorized.
4798 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4799 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4800 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4803 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4804 gimple
**vec_stmt
, slp_tree slp_node
)
4808 tree op0
, op1
= NULL
;
4809 tree vec_oprnd1
= NULL_TREE
;
4810 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4812 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4813 enum tree_code code
;
4814 machine_mode vec_mode
;
4818 machine_mode optab_op2_mode
;
4820 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4821 gimple
*new_stmt
= NULL
;
4822 stmt_vec_info prev_stmt_info
;
4829 vec
<tree
> vec_oprnds0
= vNULL
;
4830 vec
<tree
> vec_oprnds1
= vNULL
;
4833 bool scalar_shift_arg
= true;
4834 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4835 vec_info
*vinfo
= stmt_info
->vinfo
;
4838 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4841 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4845 /* Is STMT a vectorizable binary/unary operation? */
4846 if (!is_gimple_assign (stmt
))
4849 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4852 code
= gimple_assign_rhs_code (stmt
);
4854 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4855 || code
== RROTATE_EXPR
))
4858 scalar_dest
= gimple_assign_lhs (stmt
);
4859 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4860 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4861 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4863 if (dump_enabled_p ())
4864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4865 "bit-precision shifts not supported.\n");
4869 op0
= gimple_assign_rhs1 (stmt
);
4870 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4874 "use not simple.\n");
4877 /* If op0 is an external or constant def use a vector type with
4878 the same size as the output vector type. */
4880 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4882 gcc_assert (vectype
);
4885 if (dump_enabled_p ())
4886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4887 "no vectype for scalar type\n");
4891 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4892 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4893 if (nunits_out
!= nunits_in
)
4896 op1
= gimple_assign_rhs2 (stmt
);
4897 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4901 "use not simple.\n");
4906 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4910 /* Multiple types in SLP are handled by creating the appropriate number of
4911 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4916 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4918 gcc_assert (ncopies
>= 1);
4920 /* Determine whether the shift amount is a vector, or scalar. If the
4921 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4923 if ((dt
[1] == vect_internal_def
4924 || dt
[1] == vect_induction_def
)
4926 scalar_shift_arg
= false;
4927 else if (dt
[1] == vect_constant_def
4928 || dt
[1] == vect_external_def
4929 || dt
[1] == vect_internal_def
)
4931 /* In SLP, need to check whether the shift count is the same,
4932 in loops if it is a constant or invariant, it is always
4936 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4939 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4940 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4941 scalar_shift_arg
= false;
4944 /* If the shift amount is computed by a pattern stmt we cannot
4945 use the scalar amount directly thus give up and use a vector
4947 if (dt
[1] == vect_internal_def
)
4949 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4950 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4951 scalar_shift_arg
= false;
4956 if (dump_enabled_p ())
4957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4958 "operand mode requires invariant argument.\n");
4962 /* Vector shifted by vector. */
4963 if (!scalar_shift_arg
)
4965 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4966 if (dump_enabled_p ())
4967 dump_printf_loc (MSG_NOTE
, vect_location
,
4968 "vector/vector shift/rotate found.\n");
4971 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4972 if (op1_vectype
== NULL_TREE
4973 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4975 if (dump_enabled_p ())
4976 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4977 "unusable type for last operand in"
4978 " vector/vector shift/rotate.\n");
4982 /* See if the machine has a vector shifted by scalar insn and if not
4983 then see if it has a vector shifted by vector insn. */
4986 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4988 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4990 if (dump_enabled_p ())
4991 dump_printf_loc (MSG_NOTE
, vect_location
,
4992 "vector/scalar shift/rotate found.\n");
4996 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4998 && (optab_handler (optab
, TYPE_MODE (vectype
))
4999 != CODE_FOR_nothing
))
5001 scalar_shift_arg
= false;
5003 if (dump_enabled_p ())
5004 dump_printf_loc (MSG_NOTE
, vect_location
,
5005 "vector/vector shift/rotate found.\n");
5007 /* Unlike the other binary operators, shifts/rotates have
5008 the rhs being int, instead of the same type as the lhs,
5009 so make sure the scalar is the right type if we are
5010 dealing with vectors of long long/long/short/char. */
5011 if (dt
[1] == vect_constant_def
)
5012 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5013 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5017 && TYPE_MODE (TREE_TYPE (vectype
))
5018 != TYPE_MODE (TREE_TYPE (op1
)))
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5022 "unusable type for last operand in"
5023 " vector/vector shift/rotate.\n");
5026 if (vec_stmt
&& !slp_node
)
5028 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5029 op1
= vect_init_vector (stmt
, op1
,
5030 TREE_TYPE (vectype
), NULL
);
5037 /* Supportable by target? */
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5045 vec_mode
= TYPE_MODE (vectype
);
5046 icode
= (int) optab_handler (optab
, vec_mode
);
5047 if (icode
== CODE_FOR_nothing
)
5049 if (dump_enabled_p ())
5050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5051 "op not supported by target.\n");
5052 /* Check only during analysis. */
5053 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5054 || (vf
< vect_min_worthwhile_factor (code
)
5057 if (dump_enabled_p ())
5058 dump_printf_loc (MSG_NOTE
, vect_location
,
5059 "proceeding using word mode.\n");
5062 /* Worthwhile without SIMD support? Check only during analysis. */
5063 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5064 && vf
< vect_min_worthwhile_factor (code
)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "not worthwhile without SIMD support.\n");
5073 if (!vec_stmt
) /* transformation not required. */
5075 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5076 if (dump_enabled_p ())
5077 dump_printf_loc (MSG_NOTE
, vect_location
,
5078 "=== vectorizable_shift ===\n");
5079 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5085 if (dump_enabled_p ())
5086 dump_printf_loc (MSG_NOTE
, vect_location
,
5087 "transform binary/unary operation.\n");
5090 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5092 prev_stmt_info
= NULL
;
5093 for (j
= 0; j
< ncopies
; j
++)
5098 if (scalar_shift_arg
)
5100 /* Vector shl and shr insn patterns can be defined with scalar
5101 operand 2 (shift operand). In this case, use constant or loop
5102 invariant op1 directly, without extending it to vector mode
5104 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5105 if (!VECTOR_MODE_P (optab_op2_mode
))
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE
, vect_location
,
5109 "operand 1 using scalar mode.\n");
5111 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5112 vec_oprnds1
.quick_push (vec_oprnd1
);
5115 /* Store vec_oprnd1 for every vector stmt to be created
5116 for SLP_NODE. We check during the analysis that all
5117 the shift arguments are the same.
5118 TODO: Allow different constants for different vector
5119 stmts generated for an SLP instance. */
5120 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5121 vec_oprnds1
.quick_push (vec_oprnd1
);
5126 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5127 (a special case for certain kind of vector shifts); otherwise,
5128 operand 1 should be of a vector type (the usual case). */
5130 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5133 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5137 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5139 /* Arguments are ready. Create the new vector stmt. */
5140 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5142 vop1
= vec_oprnds1
[i
];
5143 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5144 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5145 gimple_assign_set_lhs (new_stmt
, new_temp
);
5146 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5148 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5155 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5157 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5158 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5161 vec_oprnds0
.release ();
5162 vec_oprnds1
.release ();
5168 /* Function vectorizable_operation.
5170 Check if STMT performs a binary, unary or ternary operation that can
5172 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5173 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5174 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5177 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5178 gimple
**vec_stmt
, slp_tree slp_node
)
5182 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5183 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5185 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5186 enum tree_code code
;
5187 machine_mode vec_mode
;
5191 bool target_support_p
;
5193 enum vect_def_type dt
[3]
5194 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5195 gimple
*new_stmt
= NULL
;
5196 stmt_vec_info prev_stmt_info
;
5202 vec
<tree
> vec_oprnds0
= vNULL
;
5203 vec
<tree
> vec_oprnds1
= vNULL
;
5204 vec
<tree
> vec_oprnds2
= vNULL
;
5205 tree vop0
, vop1
, vop2
;
5206 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5207 vec_info
*vinfo
= stmt_info
->vinfo
;
5210 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5213 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5217 /* Is STMT a vectorizable binary/unary operation? */
5218 if (!is_gimple_assign (stmt
))
5221 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5224 code
= gimple_assign_rhs_code (stmt
);
5226 /* For pointer addition, we should use the normal plus for
5227 the vector addition. */
5228 if (code
== POINTER_PLUS_EXPR
)
5231 /* Support only unary or binary operations. */
5232 op_type
= TREE_CODE_LENGTH (code
);
5233 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5237 "num. args = %d (not unary/binary/ternary op).\n",
5242 scalar_dest
= gimple_assign_lhs (stmt
);
5243 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5245 /* Most operations cannot handle bit-precision types without extra
5247 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5248 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5249 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
5250 /* Exception are bitwise binary operations. */
5251 && code
!= BIT_IOR_EXPR
5252 && code
!= BIT_XOR_EXPR
5253 && code
!= BIT_AND_EXPR
)
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5257 "bit-precision arithmetic not supported.\n");
5261 op0
= gimple_assign_rhs1 (stmt
);
5262 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5264 if (dump_enabled_p ())
5265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5266 "use not simple.\n");
5269 /* If op0 is an external or constant def use a vector type with
5270 the same size as the output vector type. */
5273 /* For boolean type we cannot determine vectype by
5274 invariant value (don't know whether it is a vector
5275 of booleans or vector of integers). We use output
5276 vectype because operations on boolean don't change
5278 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5280 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5284 "not supported operation on bool value.\n");
5287 vectype
= vectype_out
;
5290 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5293 gcc_assert (vectype
);
5296 if (dump_enabled_p ())
5298 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5299 "no vectype for scalar type ");
5300 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5302 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5308 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5309 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5310 if (nunits_out
!= nunits_in
)
5313 if (op_type
== binary_op
|| op_type
== ternary_op
)
5315 op1
= gimple_assign_rhs2 (stmt
);
5316 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5320 "use not simple.\n");
5324 if (op_type
== ternary_op
)
5326 op2
= gimple_assign_rhs3 (stmt
);
5327 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5331 "use not simple.\n");
5337 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5341 /* Multiple types in SLP are handled by creating the appropriate number of
5342 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5347 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
5349 gcc_assert (ncopies
>= 1);
5351 /* Shifts are handled in vectorizable_shift (). */
5352 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5353 || code
== RROTATE_EXPR
)
5356 /* Supportable by target? */
5358 vec_mode
= TYPE_MODE (vectype
);
5359 if (code
== MULT_HIGHPART_EXPR
)
5360 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5363 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5366 if (dump_enabled_p ())
5367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5371 target_support_p
= (optab_handler (optab
, vec_mode
)
5372 != CODE_FOR_nothing
);
5375 if (!target_support_p
)
5377 if (dump_enabled_p ())
5378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5379 "op not supported by target.\n");
5380 /* Check only during analysis. */
5381 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5382 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_NOTE
, vect_location
,
5386 "proceeding using word mode.\n");
5389 /* Worthwhile without SIMD support? Check only during analysis. */
5390 if (!VECTOR_MODE_P (vec_mode
)
5392 && vf
< vect_min_worthwhile_factor (code
))
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5396 "not worthwhile without SIMD support.\n");
5400 if (!vec_stmt
) /* transformation not required. */
5402 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE
, vect_location
,
5405 "=== vectorizable_operation ===\n");
5406 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_NOTE
, vect_location
,
5414 "transform binary/unary operation.\n");
5417 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5419 /* In case the vectorization factor (VF) is bigger than the number
5420 of elements that we can fit in a vectype (nunits), we have to generate
5421 more than one vector stmt - i.e - we need to "unroll" the
5422 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5423 from one copy of the vector stmt to the next, in the field
5424 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5425 stages to find the correct vector defs to be used when vectorizing
5426 stmts that use the defs of the current stmt. The example below
5427 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5428 we need to create 4 vectorized stmts):
5430 before vectorization:
5431 RELATED_STMT VEC_STMT
5435 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5437 RELATED_STMT VEC_STMT
5438 VS1_0: vx0 = memref0 VS1_1 -
5439 VS1_1: vx1 = memref1 VS1_2 -
5440 VS1_2: vx2 = memref2 VS1_3 -
5441 VS1_3: vx3 = memref3 - -
5442 S1: x = load - VS1_0
5445 step2: vectorize stmt S2 (done here):
5446 To vectorize stmt S2 we first need to find the relevant vector
5447 def for the first operand 'x'. This is, as usual, obtained from
5448 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5449 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5450 relevant vector def 'vx0'. Having found 'vx0' we can generate
5451 the vector stmt VS2_0, and as usual, record it in the
5452 STMT_VINFO_VEC_STMT of stmt S2.
5453 When creating the second copy (VS2_1), we obtain the relevant vector
5454 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5455 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5456 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5457 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5458 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5459 chain of stmts and pointers:
5460 RELATED_STMT VEC_STMT
5461 VS1_0: vx0 = memref0 VS1_1 -
5462 VS1_1: vx1 = memref1 VS1_2 -
5463 VS1_2: vx2 = memref2 VS1_3 -
5464 VS1_3: vx3 = memref3 - -
5465 S1: x = load - VS1_0
5466 VS2_0: vz0 = vx0 + v1 VS2_1 -
5467 VS2_1: vz1 = vx1 + v1 VS2_2 -
5468 VS2_2: vz2 = vx2 + v1 VS2_3 -
5469 VS2_3: vz3 = vx3 + v1 - -
5470 S2: z = x + 1 - VS2_0 */
5472 prev_stmt_info
= NULL
;
5473 for (j
= 0; j
< ncopies
; j
++)
5478 if (op_type
== binary_op
|| op_type
== ternary_op
)
5479 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5482 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5484 if (op_type
== ternary_op
)
5485 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5490 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5491 if (op_type
== ternary_op
)
5493 tree vec_oprnd
= vec_oprnds2
.pop ();
5494 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5499 /* Arguments are ready. Create the new vector stmt. */
5500 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5502 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5503 ? vec_oprnds1
[i
] : NULL_TREE
);
5504 vop2
= ((op_type
== ternary_op
)
5505 ? vec_oprnds2
[i
] : NULL_TREE
);
5506 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5507 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5508 gimple_assign_set_lhs (new_stmt
, new_temp
);
5509 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5511 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5518 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5520 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5521 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5524 vec_oprnds0
.release ();
5525 vec_oprnds1
.release ();
5526 vec_oprnds2
.release ();
5531 /* A helper function to ensure data reference DR's base alignment
5535 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5540 if (DR_VECT_AUX (dr
)->base_misaligned
)
5542 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5543 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5545 if (decl_in_symtab_p (base_decl
))
5546 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5549 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5550 DECL_USER_ALIGN (base_decl
) = 1;
5552 DR_VECT_AUX (dr
)->base_misaligned
= false;
5557 /* Function get_group_alias_ptr_type.
5559 Return the alias type for the group starting at FIRST_STMT. */
5562 get_group_alias_ptr_type (gimple
*first_stmt
)
5564 struct data_reference
*first_dr
, *next_dr
;
5567 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5568 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5571 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5572 if (get_alias_set (DR_REF (first_dr
))
5573 != get_alias_set (DR_REF (next_dr
)))
5575 if (dump_enabled_p ())
5576 dump_printf_loc (MSG_NOTE
, vect_location
,
5577 "conflicting alias set types.\n");
5578 return ptr_type_node
;
5580 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5582 return reference_alias_ptr_type (DR_REF (first_dr
));
5586 /* Function vectorizable_store.
5588 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5595 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5601 tree vec_oprnd
= NULL_TREE
;
5602 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5603 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5605 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5606 struct loop
*loop
= NULL
;
5607 machine_mode vec_mode
;
5609 enum dr_alignment_support alignment_support_scheme
;
5611 enum vect_def_type dt
;
5612 stmt_vec_info prev_stmt_info
= NULL
;
5613 tree dataref_ptr
= NULL_TREE
;
5614 tree dataref_offset
= NULL_TREE
;
5615 gimple
*ptr_incr
= NULL
;
5618 gimple
*next_stmt
, *first_stmt
;
5620 unsigned int group_size
, i
;
5621 vec
<tree
> oprnds
= vNULL
;
5622 vec
<tree
> result_chain
= vNULL
;
5624 tree offset
= NULL_TREE
;
5625 vec
<tree
> vec_oprnds
= vNULL
;
5626 bool slp
= (slp_node
!= NULL
);
5627 unsigned int vec_num
;
5628 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5629 vec_info
*vinfo
= stmt_info
->vinfo
;
5631 gather_scatter_info gs_info
;
5632 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5635 vec_load_store_type vls_type
;
5638 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5641 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5645 /* Is vectorizable store? */
5647 if (!is_gimple_assign (stmt
))
5650 scalar_dest
= gimple_assign_lhs (stmt
);
5651 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5652 && is_pattern_stmt_p (stmt_info
))
5653 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5654 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5655 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5656 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5657 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5658 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5659 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5660 && TREE_CODE (scalar_dest
) != MEM_REF
)
5663 /* Cannot have hybrid store SLP -- that would mean storing to the
5664 same location twice. */
5665 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5667 gcc_assert (gimple_assign_single_p (stmt
));
5669 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5670 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5674 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5675 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5680 /* Multiple types in SLP are handled by creating the appropriate number of
5681 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5686 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5688 gcc_assert (ncopies
>= 1);
5690 /* FORNOW. This restriction should be relaxed. */
5691 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5695 "multiple types in nested loop.\n");
5699 op
= gimple_assign_rhs1 (stmt
);
5701 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5705 "use not simple.\n");
5709 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5710 vls_type
= VLS_STORE_INVARIANT
;
5712 vls_type
= VLS_STORE
;
5714 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5717 elem_type
= TREE_TYPE (vectype
);
5718 vec_mode
= TYPE_MODE (vectype
);
5720 /* FORNOW. In some cases can vectorize even if data-type not supported
5721 (e.g. - array initialization with 0). */
5722 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5725 if (!STMT_VINFO_DATA_REF (stmt_info
))
5728 vect_memory_access_type memory_access_type
;
5729 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5730 &memory_access_type
, &gs_info
))
5733 if (!vec_stmt
) /* transformation not required. */
5735 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5736 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5737 /* The SLP costs are calculated during SLP analysis. */
5738 if (!PURE_SLP_STMT (stmt_info
))
5739 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5743 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5747 ensure_base_align (stmt_info
, dr
);
5749 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5751 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5752 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5753 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5754 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5755 edge pe
= loop_preheader_edge (loop
);
5758 enum { NARROW
, NONE
, WIDEN
} modifier
;
5759 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5761 if (nunits
== (unsigned int) scatter_off_nunits
)
5763 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5765 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5768 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5769 sel
[i
] = i
| nunits
;
5771 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5772 gcc_assert (perm_mask
!= NULL_TREE
);
5774 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5776 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5779 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5780 sel
[i
] = i
| scatter_off_nunits
;
5782 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5783 gcc_assert (perm_mask
!= NULL_TREE
);
5789 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5790 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5791 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5792 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5793 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5794 scaletype
= TREE_VALUE (arglist
);
5796 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5797 && TREE_CODE (rettype
) == VOID_TYPE
);
5799 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5800 if (!is_gimple_min_invariant (ptr
))
5802 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5803 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5804 gcc_assert (!new_bb
);
5807 /* Currently we support only unconditional scatter stores,
5808 so mask should be all ones. */
5809 mask
= build_int_cst (masktype
, -1);
5810 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5812 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5814 prev_stmt_info
= NULL
;
5815 for (j
= 0; j
< ncopies
; ++j
)
5820 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5822 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5824 else if (modifier
!= NONE
&& (j
& 1))
5826 if (modifier
== WIDEN
)
5829 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5830 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5833 else if (modifier
== NARROW
)
5835 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5838 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5847 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5849 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5853 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5855 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5856 == TYPE_VECTOR_SUBPARTS (srctype
));
5857 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5858 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5859 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5860 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5864 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5866 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5867 == TYPE_VECTOR_SUBPARTS (idxtype
));
5868 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5869 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5870 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5871 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5876 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5878 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5880 if (prev_stmt_info
== NULL
)
5881 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5883 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5884 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5889 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5892 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5893 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5894 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5896 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5899 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5901 /* We vectorize all the stmts of the interleaving group when we
5902 reach the last stmt in the group. */
5903 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5904 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5913 grouped_store
= false;
5914 /* VEC_NUM is the number of vect stmts to be created for this
5916 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5917 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5918 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5919 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5920 op
= gimple_assign_rhs1 (first_stmt
);
5923 /* VEC_NUM is the number of vect stmts to be created for this
5925 vec_num
= group_size
;
5927 ref_type
= get_group_alias_ptr_type (first_stmt
);
5933 group_size
= vec_num
= 1;
5934 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5937 if (dump_enabled_p ())
5938 dump_printf_loc (MSG_NOTE
, vect_location
,
5939 "transform store. ncopies = %d\n", ncopies
);
5941 if (memory_access_type
== VMAT_ELEMENTWISE
5942 || memory_access_type
== VMAT_STRIDED_SLP
)
5944 gimple_stmt_iterator incr_gsi
;
5950 gimple_seq stmts
= NULL
;
5951 tree stride_base
, stride_step
, alias_off
;
5955 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5958 = fold_build_pointer_plus
5959 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5960 size_binop (PLUS_EXPR
,
5961 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5962 convert_to_ptrofftype (DR_INIT (first_dr
))));
5963 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5965 /* For a store with loop-invariant (but other than power-of-2)
5966 stride (i.e. not a grouped access) like so:
5968 for (i = 0; i < n; i += stride)
5971 we generate a new induction variable and new stores from
5972 the components of the (vectorized) rhs:
5974 for (j = 0; ; j += VF*stride)
5979 array[j + stride] = tmp2;
5983 unsigned nstores
= nunits
;
5985 tree ltype
= elem_type
;
5988 if (group_size
< nunits
5989 && nunits
% group_size
== 0)
5991 nstores
= nunits
/ group_size
;
5993 ltype
= build_vector_type (elem_type
, group_size
);
5995 else if (group_size
>= nunits
5996 && group_size
% nunits
== 0)
6002 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6003 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6006 ivstep
= stride_step
;
6007 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6008 build_int_cst (TREE_TYPE (ivstep
), vf
));
6010 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6012 create_iv (stride_base
, ivstep
, NULL
,
6013 loop
, &incr_gsi
, insert_after
,
6015 incr
= gsi_stmt (incr_gsi
);
6016 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6018 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6020 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6022 prev_stmt_info
= NULL
;
6023 alias_off
= build_int_cst (ref_type
, 0);
6024 next_stmt
= first_stmt
;
6025 for (g
= 0; g
< group_size
; g
++)
6027 running_off
= offvar
;
6030 tree size
= TYPE_SIZE_UNIT (ltype
);
6031 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6033 tree newoff
= copy_ssa_name (running_off
, NULL
);
6034 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6036 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6037 running_off
= newoff
;
6039 unsigned int group_el
= 0;
6040 unsigned HOST_WIDE_INT
6041 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6042 for (j
= 0; j
< ncopies
; j
++)
6044 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6045 and first_stmt == stmt. */
6050 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6052 vec_oprnd
= vec_oprnds
[0];
6056 gcc_assert (gimple_assign_single_p (next_stmt
));
6057 op
= gimple_assign_rhs1 (next_stmt
);
6058 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6064 vec_oprnd
= vec_oprnds
[j
];
6067 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6068 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6072 for (i
= 0; i
< nstores
; i
++)
6074 tree newref
, newoff
;
6075 gimple
*incr
, *assign
;
6076 tree size
= TYPE_SIZE (ltype
);
6077 /* Extract the i'th component. */
6078 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6079 bitsize_int (i
), size
);
6080 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6083 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6087 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6089 newref
= build2 (MEM_REF
, ltype
,
6090 running_off
, this_off
);
6092 /* And store it to *running_off. */
6093 assign
= gimple_build_assign (newref
, elem
);
6094 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6098 || group_el
== group_size
)
6100 newoff
= copy_ssa_name (running_off
, NULL
);
6101 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6102 running_off
, stride_step
);
6103 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6105 running_off
= newoff
;
6108 if (g
== group_size
- 1
6111 if (j
== 0 && i
== 0)
6112 STMT_VINFO_VEC_STMT (stmt_info
)
6113 = *vec_stmt
= assign
;
6115 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6116 prev_stmt_info
= vinfo_for_stmt (assign
);
6120 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6127 auto_vec
<tree
> dr_chain (group_size
);
6128 oprnds
.create (group_size
);
6130 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6131 gcc_assert (alignment_support_scheme
);
6132 /* Targets with store-lane instructions must not require explicit
6134 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6135 || alignment_support_scheme
== dr_aligned
6136 || alignment_support_scheme
== dr_unaligned_supported
);
6138 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6139 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6140 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6142 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6143 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6145 aggr_type
= vectype
;
6147 /* In case the vectorization factor (VF) is bigger than the number
6148 of elements that we can fit in a vectype (nunits), we have to generate
6149 more than one vector stmt - i.e - we need to "unroll" the
6150 vector stmt by a factor VF/nunits. For more details see documentation in
6151 vect_get_vec_def_for_copy_stmt. */
6153 /* In case of interleaving (non-unit grouped access):
6160 We create vectorized stores starting from base address (the access of the
6161 first stmt in the chain (S2 in the above example), when the last store stmt
6162 of the chain (S4) is reached:
6165 VS2: &base + vec_size*1 = vx0
6166 VS3: &base + vec_size*2 = vx1
6167 VS4: &base + vec_size*3 = vx3
6169 Then permutation statements are generated:
6171 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6172 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6175 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6176 (the order of the data-refs in the output of vect_permute_store_chain
6177 corresponds to the order of scalar stmts in the interleaving chain - see
6178 the documentation of vect_permute_store_chain()).
6180 In case of both multiple types and interleaving, above vector stores and
6181 permutation stmts are created for every copy. The result vector stmts are
6182 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6183 STMT_VINFO_RELATED_STMT for the next copies.
6186 prev_stmt_info
= NULL
;
6187 for (j
= 0; j
< ncopies
; j
++)
6194 /* Get vectorized arguments for SLP_NODE. */
6195 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6196 NULL
, slp_node
, -1);
6198 vec_oprnd
= vec_oprnds
[0];
6202 /* For interleaved stores we collect vectorized defs for all the
6203 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6204 used as an input to vect_permute_store_chain(), and OPRNDS as
6205 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6207 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6208 OPRNDS are of size 1. */
6209 next_stmt
= first_stmt
;
6210 for (i
= 0; i
< group_size
; i
++)
6212 /* Since gaps are not supported for interleaved stores,
6213 GROUP_SIZE is the exact number of stmts in the chain.
6214 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6215 there is no interleaving, GROUP_SIZE is 1, and only one
6216 iteration of the loop will be executed. */
6217 gcc_assert (next_stmt
6218 && gimple_assign_single_p (next_stmt
));
6219 op
= gimple_assign_rhs1 (next_stmt
);
6221 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6222 dr_chain
.quick_push (vec_oprnd
);
6223 oprnds
.quick_push (vec_oprnd
);
6224 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6228 /* We should have catched mismatched types earlier. */
6229 gcc_assert (useless_type_conversion_p (vectype
,
6230 TREE_TYPE (vec_oprnd
)));
6231 bool simd_lane_access_p
6232 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6233 if (simd_lane_access_p
6234 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6235 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6236 && integer_zerop (DR_OFFSET (first_dr
))
6237 && integer_zerop (DR_INIT (first_dr
))
6238 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6239 get_alias_set (TREE_TYPE (ref_type
))))
6241 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6242 dataref_offset
= build_int_cst (ref_type
, 0);
6247 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6248 simd_lane_access_p
? loop
: NULL
,
6249 offset
, &dummy
, gsi
, &ptr_incr
,
6250 simd_lane_access_p
, &inv_p
);
6251 gcc_assert (bb_vinfo
|| !inv_p
);
6255 /* For interleaved stores we created vectorized defs for all the
6256 defs stored in OPRNDS in the previous iteration (previous copy).
6257 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6258 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6260 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6261 OPRNDS are of size 1. */
6262 for (i
= 0; i
< group_size
; i
++)
6265 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6266 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6267 dr_chain
[i
] = vec_oprnd
;
6268 oprnds
[i
] = vec_oprnd
;
6272 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6273 TYPE_SIZE_UNIT (aggr_type
));
6275 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6276 TYPE_SIZE_UNIT (aggr_type
));
6279 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6283 /* Combine all the vectors into an array. */
6284 vec_array
= create_vector_array (vectype
, vec_num
);
6285 for (i
= 0; i
< vec_num
; i
++)
6287 vec_oprnd
= dr_chain
[i
];
6288 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6292 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6293 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6294 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
6295 gimple_call_set_lhs (new_stmt
, data_ref
);
6296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6304 result_chain
.create (group_size
);
6306 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6310 next_stmt
= first_stmt
;
6311 for (i
= 0; i
< vec_num
; i
++)
6313 unsigned align
, misalign
;
6316 /* Bump the vector pointer. */
6317 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6321 vec_oprnd
= vec_oprnds
[i
];
6322 else if (grouped_store
)
6323 /* For grouped stores vectorized defs are interleaved in
6324 vect_permute_store_chain(). */
6325 vec_oprnd
= result_chain
[i
];
6327 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
6331 : build_int_cst (ref_type
, 0));
6332 align
= TYPE_ALIGN_UNIT (vectype
);
6333 if (aligned_access_p (first_dr
))
6335 else if (DR_MISALIGNMENT (first_dr
) == -1)
6337 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6338 align
= TYPE_ALIGN_UNIT (elem_type
);
6340 align
= get_object_alignment (DR_REF (first_dr
))
6343 TREE_TYPE (data_ref
)
6344 = build_aligned_type (TREE_TYPE (data_ref
),
6345 align
* BITS_PER_UNIT
);
6349 TREE_TYPE (data_ref
)
6350 = build_aligned_type (TREE_TYPE (data_ref
),
6351 TYPE_ALIGN (elem_type
));
6352 misalign
= DR_MISALIGNMENT (first_dr
);
6354 if (dataref_offset
== NULL_TREE
6355 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6356 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6359 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6361 tree perm_mask
= perm_mask_for_reverse (vectype
);
6363 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6365 tree new_temp
= make_ssa_name (perm_dest
);
6367 /* Generate the permute statement. */
6369 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6370 vec_oprnd
, perm_mask
);
6371 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6373 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6374 vec_oprnd
= new_temp
;
6377 /* Arguments are ready. Create the new vector stmt. */
6378 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6379 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6384 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6392 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6394 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6395 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6400 result_chain
.release ();
6401 vec_oprnds
.release ();
6406 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6407 VECTOR_CST mask. No checks are made that the target platform supports the
6408 mask, so callers may wish to test can_vec_perm_p separately, or use
6409 vect_gen_perm_mask_checked. */
6412 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6414 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6417 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6419 mask_elt_type
= lang_hooks
.types
.type_for_mode
6420 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6421 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6423 mask_elts
= XALLOCAVEC (tree
, nunits
);
6424 for (i
= nunits
- 1; i
>= 0; i
--)
6425 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6426 mask_vec
= build_vector (mask_type
, mask_elts
);
6431 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6432 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6435 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6437 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6438 return vect_gen_perm_mask_any (vectype
, sel
);
6441 /* Given a vector variable X and Y, that was generated for the scalar
6442 STMT, generate instructions to permute the vector elements of X and Y
6443 using permutation mask MASK_VEC, insert them at *GSI and return the
6444 permuted vector variable. */
6447 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6448 gimple_stmt_iterator
*gsi
)
6450 tree vectype
= TREE_TYPE (x
);
6451 tree perm_dest
, data_ref
;
6454 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6455 data_ref
= make_ssa_name (perm_dest
);
6457 /* Generate the permute statement. */
6458 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6459 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6464 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6465 inserting them on the loops preheader edge. Returns true if we
6466 were successful in doing so (and thus STMT can be moved then),
6467 otherwise returns false. */
6470 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6476 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6478 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6479 if (!gimple_nop_p (def_stmt
)
6480 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6482 /* Make sure we don't need to recurse. While we could do
6483 so in simple cases when there are more complex use webs
6484 we don't have an easy way to preserve stmt order to fulfil
6485 dependencies within them. */
6488 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6490 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6492 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6493 if (!gimple_nop_p (def_stmt2
)
6494 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6504 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6506 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6507 if (!gimple_nop_p (def_stmt
)
6508 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6510 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6511 gsi_remove (&gsi
, false);
6512 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6519 /* vectorizable_load.
6521 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6523 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6524 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6525 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6528 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6529 slp_tree slp_node
, slp_instance slp_node_instance
)
6532 tree vec_dest
= NULL
;
6533 tree data_ref
= NULL
;
6534 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6535 stmt_vec_info prev_stmt_info
;
6536 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6537 struct loop
*loop
= NULL
;
6538 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6539 bool nested_in_vect_loop
= false;
6540 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6544 gimple
*new_stmt
= NULL
;
6546 enum dr_alignment_support alignment_support_scheme
;
6547 tree dataref_ptr
= NULL_TREE
;
6548 tree dataref_offset
= NULL_TREE
;
6549 gimple
*ptr_incr
= NULL
;
6551 int i
, j
, group_size
, group_gap_adj
;
6552 tree msq
= NULL_TREE
, lsq
;
6553 tree offset
= NULL_TREE
;
6554 tree byte_offset
= NULL_TREE
;
6555 tree realignment_token
= NULL_TREE
;
6557 vec
<tree
> dr_chain
= vNULL
;
6558 bool grouped_load
= false;
6560 gimple
*first_stmt_for_drptr
= NULL
;
6562 bool compute_in_loop
= false;
6563 struct loop
*at_loop
;
6565 bool slp
= (slp_node
!= NULL
);
6566 bool slp_perm
= false;
6567 enum tree_code code
;
6568 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6571 gather_scatter_info gs_info
;
6572 vec_info
*vinfo
= stmt_info
->vinfo
;
6575 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6578 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6582 /* Is vectorizable load? */
6583 if (!is_gimple_assign (stmt
))
6586 scalar_dest
= gimple_assign_lhs (stmt
);
6587 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6590 code
= gimple_assign_rhs_code (stmt
);
6591 if (code
!= ARRAY_REF
6592 && code
!= BIT_FIELD_REF
6593 && code
!= INDIRECT_REF
6594 && code
!= COMPONENT_REF
6595 && code
!= IMAGPART_EXPR
6596 && code
!= REALPART_EXPR
6598 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6601 if (!STMT_VINFO_DATA_REF (stmt_info
))
6604 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6605 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6609 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6610 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6611 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6616 /* Multiple types in SLP are handled by creating the appropriate number of
6617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6622 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6624 gcc_assert (ncopies
>= 1);
6626 /* FORNOW. This restriction should be relaxed. */
6627 if (nested_in_vect_loop
&& ncopies
> 1)
6629 if (dump_enabled_p ())
6630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6631 "multiple types in nested loop.\n");
6635 /* Invalidate assumptions made by dependence analysis when vectorization
6636 on the unrolled body effectively re-orders stmts. */
6638 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6639 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6640 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6642 if (dump_enabled_p ())
6643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6644 "cannot perform implicit CSE when unrolling "
6645 "with negative dependence distance\n");
6649 elem_type
= TREE_TYPE (vectype
);
6650 mode
= TYPE_MODE (vectype
);
6652 /* FORNOW. In some cases can vectorize even if data-type not supported
6653 (e.g. - data copies). */
6654 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6656 if (dump_enabled_p ())
6657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6658 "Aligned load, but unsupported type.\n");
6662 /* Check if the load is a part of an interleaving chain. */
6663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6665 grouped_load
= true;
6667 gcc_assert (!nested_in_vect_loop
);
6668 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6670 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6671 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6673 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6676 /* Invalidate assumptions made by dependence analysis when vectorization
6677 on the unrolled body effectively re-orders stmts. */
6678 if (!PURE_SLP_STMT (stmt_info
)
6679 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6680 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6681 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6685 "cannot perform implicit CSE when performing "
6686 "group loads with negative dependence distance\n");
6690 /* Similarly when the stmt is a load that is both part of a SLP
6691 instance and a loop vectorized stmt via the same-dr mechanism
6692 we have to give up. */
6693 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6694 && (STMT_SLP_TYPE (stmt_info
)
6695 != STMT_SLP_TYPE (vinfo_for_stmt
6696 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6698 if (dump_enabled_p ())
6699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6700 "conflicting SLP types for CSEd load\n");
6705 vect_memory_access_type memory_access_type
;
6706 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6707 &memory_access_type
, &gs_info
))
6710 if (!vec_stmt
) /* transformation not required. */
6713 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6714 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6715 /* The SLP costs are calculated during SLP analysis. */
6716 if (!PURE_SLP_STMT (stmt_info
))
6717 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6723 gcc_assert (memory_access_type
6724 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6726 if (dump_enabled_p ())
6727 dump_printf_loc (MSG_NOTE
, vect_location
,
6728 "transform load. ncopies = %d\n", ncopies
);
6732 ensure_base_align (stmt_info
, dr
);
6734 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6736 tree vec_oprnd0
= NULL_TREE
, op
;
6737 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6738 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6739 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6740 edge pe
= loop_preheader_edge (loop
);
6743 enum { NARROW
, NONE
, WIDEN
} modifier
;
6744 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6746 if (nunits
== gather_off_nunits
)
6748 else if (nunits
== gather_off_nunits
/ 2)
6750 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6753 for (i
= 0; i
< gather_off_nunits
; ++i
)
6754 sel
[i
] = i
| nunits
;
6756 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6758 else if (nunits
== gather_off_nunits
* 2)
6760 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6763 for (i
= 0; i
< nunits
; ++i
)
6764 sel
[i
] = i
< gather_off_nunits
6765 ? i
: i
+ nunits
- gather_off_nunits
;
6767 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6773 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6774 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6775 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6776 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6777 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6778 scaletype
= TREE_VALUE (arglist
);
6779 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6781 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6783 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6784 if (!is_gimple_min_invariant (ptr
))
6786 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6787 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6788 gcc_assert (!new_bb
);
6791 /* Currently we support only unconditional gather loads,
6792 so mask should be all ones. */
6793 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6794 mask
= build_int_cst (masktype
, -1);
6795 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6797 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6798 mask
= build_vector_from_val (masktype
, mask
);
6799 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6801 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6805 for (j
= 0; j
< 6; ++j
)
6807 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6808 mask
= build_real (TREE_TYPE (masktype
), r
);
6809 mask
= build_vector_from_val (masktype
, mask
);
6810 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6815 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6817 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6818 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6819 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6823 for (j
= 0; j
< 6; ++j
)
6825 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6826 merge
= build_real (TREE_TYPE (rettype
), r
);
6830 merge
= build_vector_from_val (rettype
, merge
);
6831 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6833 prev_stmt_info
= NULL
;
6834 for (j
= 0; j
< ncopies
; ++j
)
6836 if (modifier
== WIDEN
&& (j
& 1))
6837 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6838 perm_mask
, stmt
, gsi
);
6841 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6844 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6846 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6848 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6849 == TYPE_VECTOR_SUBPARTS (idxtype
));
6850 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6851 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6853 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6854 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6859 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6861 if (!useless_type_conversion_p (vectype
, rettype
))
6863 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6864 == TYPE_VECTOR_SUBPARTS (rettype
));
6865 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6866 gimple_call_set_lhs (new_stmt
, op
);
6867 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6868 var
= make_ssa_name (vec_dest
);
6869 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6871 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6875 var
= make_ssa_name (vec_dest
, new_stmt
);
6876 gimple_call_set_lhs (new_stmt
, var
);
6879 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6881 if (modifier
== NARROW
)
6888 var
= permute_vec_elements (prev_res
, var
,
6889 perm_mask
, stmt
, gsi
);
6890 new_stmt
= SSA_NAME_DEF_STMT (var
);
6893 if (prev_stmt_info
== NULL
)
6894 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6896 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6897 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6902 if (memory_access_type
== VMAT_ELEMENTWISE
6903 || memory_access_type
== VMAT_STRIDED_SLP
)
6905 gimple_stmt_iterator incr_gsi
;
6911 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6912 gimple_seq stmts
= NULL
;
6913 tree stride_base
, stride_step
, alias_off
;
6915 gcc_assert (!nested_in_vect_loop
);
6917 if (slp
&& grouped_load
)
6919 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6920 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6921 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6922 ref_type
= get_group_alias_ptr_type (first_stmt
);
6929 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6933 = fold_build_pointer_plus
6934 (DR_BASE_ADDRESS (first_dr
),
6935 size_binop (PLUS_EXPR
,
6936 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6937 convert_to_ptrofftype (DR_INIT (first_dr
))));
6938 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6940 /* For a load with loop-invariant (but other than power-of-2)
6941 stride (i.e. not a grouped access) like so:
6943 for (i = 0; i < n; i += stride)
6946 we generate a new induction variable and new accesses to
6947 form a new vector (or vectors, depending on ncopies):
6949 for (j = 0; ; j += VF*stride)
6951 tmp2 = array[j + stride];
6953 vectemp = {tmp1, tmp2, ...}
6956 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6957 build_int_cst (TREE_TYPE (stride_step
), vf
));
6959 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6961 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6962 loop
, &incr_gsi
, insert_after
,
6964 incr
= gsi_stmt (incr_gsi
);
6965 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6967 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6968 &stmts
, true, NULL_TREE
);
6970 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6972 prev_stmt_info
= NULL
;
6973 running_off
= offvar
;
6974 alias_off
= build_int_cst (ref_type
, 0);
6975 int nloads
= nunits
;
6977 tree ltype
= TREE_TYPE (vectype
);
6978 tree lvectype
= vectype
;
6979 auto_vec
<tree
> dr_chain
;
6980 if (memory_access_type
== VMAT_STRIDED_SLP
)
6982 if (group_size
< nunits
)
6984 /* Avoid emitting a constructor of vector elements by performing
6985 the loads using an integer type of the same size,
6986 constructing a vector of those and then re-interpreting it
6987 as the original vector type. This works around the fact
6988 that the vec_init optab was only designed for scalar
6989 element modes and thus expansion goes through memory.
6990 This avoids a huge runtime penalty due to the general
6991 inability to perform store forwarding from smaller stores
6992 to a larger load. */
6994 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
6995 enum machine_mode elmode
= mode_for_size (lsize
, MODE_INT
, 0);
6996 enum machine_mode vmode
= mode_for_vector (elmode
,
6997 nunits
/ group_size
);
6998 /* If we can't construct such a vector fall back to
6999 element loads of the original vector type. */
7000 if (VECTOR_MODE_P (vmode
)
7001 && optab_handler (vec_init_optab
, vmode
) != CODE_FOR_nothing
)
7003 nloads
= nunits
/ group_size
;
7005 ltype
= build_nonstandard_integer_type (lsize
, 1);
7006 lvectype
= build_vector_type (ltype
, nloads
);
7015 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7019 /* For SLP permutation support we need to load the whole group,
7020 not only the number of vector stmts the permutation result
7024 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7025 dr_chain
.create (ncopies
);
7028 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7031 unsigned HOST_WIDE_INT
7032 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7033 for (j
= 0; j
< ncopies
; j
++)
7036 vec_alloc (v
, nloads
);
7037 for (i
= 0; i
< nloads
; i
++)
7039 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7041 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7042 build2 (MEM_REF
, ltype
,
7043 running_off
, this_off
));
7044 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7046 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7047 gimple_assign_lhs (new_stmt
));
7051 || group_el
== group_size
)
7053 tree newoff
= copy_ssa_name (running_off
);
7054 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7055 running_off
, stride_step
);
7056 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7058 running_off
= newoff
;
7064 tree vec_inv
= build_constructor (lvectype
, v
);
7065 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7066 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7067 if (lvectype
!= vectype
)
7069 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7071 build1 (VIEW_CONVERT_EXPR
,
7072 vectype
, new_temp
));
7073 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7080 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7082 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7087 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7089 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7090 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7096 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7097 slp_node_instance
, false, &n_perms
);
7104 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7105 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7106 /* For SLP vectorization we directly vectorize a subchain
7107 without permutation. */
7108 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7109 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7110 /* For BB vectorization always use the first stmt to base
7111 the data ref pointer on. */
7113 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7115 /* Check if the chain of loads is already vectorized. */
7116 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7117 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7118 ??? But we can only do so if there is exactly one
7119 as we have no way to get at the rest. Leave the CSE
7121 ??? With the group load eventually participating
7122 in multiple different permutations (having multiple
7123 slp nodes which refer to the same group) the CSE
7124 is even wrong code. See PR56270. */
7127 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7130 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7133 /* VEC_NUM is the number of vect stmts to be created for this group. */
7136 grouped_load
= false;
7137 /* For SLP permutation support we need to load the whole group,
7138 not only the number of vector stmts the permutation result
7141 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7143 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7144 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7147 vec_num
= group_size
;
7149 ref_type
= get_group_alias_ptr_type (first_stmt
);
7155 group_size
= vec_num
= 1;
7157 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7160 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7161 gcc_assert (alignment_support_scheme
);
7162 /* Targets with load-lane instructions must not require explicit
7164 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7165 || alignment_support_scheme
== dr_aligned
7166 || alignment_support_scheme
== dr_unaligned_supported
);
7168 /* In case the vectorization factor (VF) is bigger than the number
7169 of elements that we can fit in a vectype (nunits), we have to generate
7170 more than one vector stmt - i.e - we need to "unroll" the
7171 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7172 from one copy of the vector stmt to the next, in the field
7173 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7174 stages to find the correct vector defs to be used when vectorizing
7175 stmts that use the defs of the current stmt. The example below
7176 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7177 need to create 4 vectorized stmts):
7179 before vectorization:
7180 RELATED_STMT VEC_STMT
7184 step 1: vectorize stmt S1:
7185 We first create the vector stmt VS1_0, and, as usual, record a
7186 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7187 Next, we create the vector stmt VS1_1, and record a pointer to
7188 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7189 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7191 RELATED_STMT VEC_STMT
7192 VS1_0: vx0 = memref0 VS1_1 -
7193 VS1_1: vx1 = memref1 VS1_2 -
7194 VS1_2: vx2 = memref2 VS1_3 -
7195 VS1_3: vx3 = memref3 - -
7196 S1: x = load - VS1_0
7199 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7200 information we recorded in RELATED_STMT field is used to vectorize
7203 /* In case of interleaving (non-unit grouped access):
7210 Vectorized loads are created in the order of memory accesses
7211 starting from the access of the first stmt of the chain:
7214 VS2: vx1 = &base + vec_size*1
7215 VS3: vx3 = &base + vec_size*2
7216 VS4: vx4 = &base + vec_size*3
7218 Then permutation statements are generated:
7220 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7221 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7224 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7225 (the order of the data-refs in the output of vect_permute_load_chain
7226 corresponds to the order of scalar stmts in the interleaving chain - see
7227 the documentation of vect_permute_load_chain()).
7228 The generation of permutation stmts and recording them in
7229 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7231 In case of both multiple types and interleaving, the vector loads and
7232 permutation stmts above are created for every copy. The result vector
7233 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7234 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7236 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7237 on a target that supports unaligned accesses (dr_unaligned_supported)
7238 we generate the following code:
7242 p = p + indx * vectype_size;
7247 Otherwise, the data reference is potentially unaligned on a target that
7248 does not support unaligned accesses (dr_explicit_realign_optimized) -
7249 then generate the following code, in which the data in each iteration is
7250 obtained by two vector loads, one from the previous iteration, and one
7251 from the current iteration:
7253 msq_init = *(floor(p1))
7254 p2 = initial_addr + VS - 1;
7255 realignment_token = call target_builtin;
7258 p2 = p2 + indx * vectype_size
7260 vec_dest = realign_load (msq, lsq, realignment_token)
7265 /* If the misalignment remains the same throughout the execution of the
7266 loop, we can create the init_addr and permutation mask at the loop
7267 preheader. Otherwise, it needs to be created inside the loop.
7268 This can only occur when vectorizing memory accesses in the inner-loop
7269 nested within an outer-loop that is being vectorized. */
7271 if (nested_in_vect_loop
7272 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7273 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7275 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7276 compute_in_loop
= true;
7279 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7280 || alignment_support_scheme
== dr_explicit_realign
)
7281 && !compute_in_loop
)
7283 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7284 alignment_support_scheme
, NULL_TREE
,
7286 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7288 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7289 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7296 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7297 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7299 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7300 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7302 aggr_type
= vectype
;
7304 prev_stmt_info
= NULL
;
7305 for (j
= 0; j
< ncopies
; j
++)
7307 /* 1. Create the vector or array pointer update chain. */
7310 bool simd_lane_access_p
7311 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7312 if (simd_lane_access_p
7313 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7314 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7315 && integer_zerop (DR_OFFSET (first_dr
))
7316 && integer_zerop (DR_INIT (first_dr
))
7317 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7318 get_alias_set (TREE_TYPE (ref_type
)))
7319 && (alignment_support_scheme
== dr_aligned
7320 || alignment_support_scheme
== dr_unaligned_supported
))
7322 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7323 dataref_offset
= build_int_cst (ref_type
, 0);
7326 else if (first_stmt_for_drptr
7327 && first_stmt
!= first_stmt_for_drptr
)
7330 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7331 at_loop
, offset
, &dummy
, gsi
,
7332 &ptr_incr
, simd_lane_access_p
,
7333 &inv_p
, byte_offset
);
7334 /* Adjust the pointer by the difference to first_stmt. */
7335 data_reference_p ptrdr
7336 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7337 tree diff
= fold_convert (sizetype
,
7338 size_binop (MINUS_EXPR
,
7341 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7346 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7347 offset
, &dummy
, gsi
, &ptr_incr
,
7348 simd_lane_access_p
, &inv_p
,
7351 else if (dataref_offset
)
7352 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7353 TYPE_SIZE_UNIT (aggr_type
));
7355 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7356 TYPE_SIZE_UNIT (aggr_type
));
7358 if (grouped_load
|| slp_perm
)
7359 dr_chain
.create (vec_num
);
7361 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7365 vec_array
= create_vector_array (vectype
, vec_num
);
7368 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7369 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7370 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7371 gimple_call_set_lhs (new_stmt
, vec_array
);
7372 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7374 /* Extract each vector into an SSA_NAME. */
7375 for (i
= 0; i
< vec_num
; i
++)
7377 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7379 dr_chain
.quick_push (new_temp
);
7382 /* Record the mapping between SSA_NAMEs and statements. */
7383 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7387 for (i
= 0; i
< vec_num
; i
++)
7390 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7393 /* 2. Create the vector-load in the loop. */
7394 switch (alignment_support_scheme
)
7397 case dr_unaligned_supported
:
7399 unsigned int align
, misalign
;
7402 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7405 : build_int_cst (ref_type
, 0));
7406 align
= TYPE_ALIGN_UNIT (vectype
);
7407 if (alignment_support_scheme
== dr_aligned
)
7409 gcc_assert (aligned_access_p (first_dr
));
7412 else if (DR_MISALIGNMENT (first_dr
) == -1)
7414 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7415 align
= TYPE_ALIGN_UNIT (elem_type
);
7417 align
= (get_object_alignment (DR_REF (first_dr
))
7420 TREE_TYPE (data_ref
)
7421 = build_aligned_type (TREE_TYPE (data_ref
),
7422 align
* BITS_PER_UNIT
);
7426 TREE_TYPE (data_ref
)
7427 = build_aligned_type (TREE_TYPE (data_ref
),
7428 TYPE_ALIGN (elem_type
));
7429 misalign
= DR_MISALIGNMENT (first_dr
);
7431 if (dataref_offset
== NULL_TREE
7432 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7433 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7437 case dr_explicit_realign
:
7441 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7443 if (compute_in_loop
)
7444 msq
= vect_setup_realignment (first_stmt
, gsi
,
7446 dr_explicit_realign
,
7449 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7450 ptr
= copy_ssa_name (dataref_ptr
);
7452 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7453 new_stmt
= gimple_build_assign
7454 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7456 (TREE_TYPE (dataref_ptr
),
7457 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7458 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7460 = build2 (MEM_REF
, vectype
, ptr
,
7461 build_int_cst (ref_type
, 0));
7462 vec_dest
= vect_create_destination_var (scalar_dest
,
7464 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7465 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7466 gimple_assign_set_lhs (new_stmt
, new_temp
);
7467 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7468 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7469 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7472 bump
= size_binop (MULT_EXPR
, vs
,
7473 TYPE_SIZE_UNIT (elem_type
));
7474 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7475 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7476 new_stmt
= gimple_build_assign
7477 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7480 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7481 ptr
= copy_ssa_name (ptr
, new_stmt
);
7482 gimple_assign_set_lhs (new_stmt
, ptr
);
7483 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7485 = build2 (MEM_REF
, vectype
, ptr
,
7486 build_int_cst (ref_type
, 0));
7489 case dr_explicit_realign_optimized
:
7490 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7491 new_temp
= copy_ssa_name (dataref_ptr
);
7493 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7494 new_stmt
= gimple_build_assign
7495 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7497 (TREE_TYPE (dataref_ptr
),
7498 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7499 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7501 = build2 (MEM_REF
, vectype
, new_temp
,
7502 build_int_cst (ref_type
, 0));
7507 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7508 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7509 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7510 gimple_assign_set_lhs (new_stmt
, new_temp
);
7511 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7513 /* 3. Handle explicit realignment if necessary/supported.
7515 vec_dest = realign_load (msq, lsq, realignment_token) */
7516 if (alignment_support_scheme
== dr_explicit_realign_optimized
7517 || alignment_support_scheme
== dr_explicit_realign
)
7519 lsq
= gimple_assign_lhs (new_stmt
);
7520 if (!realignment_token
)
7521 realignment_token
= dataref_ptr
;
7522 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7523 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7524 msq
, lsq
, realignment_token
);
7525 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7526 gimple_assign_set_lhs (new_stmt
, new_temp
);
7527 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7529 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7532 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7533 add_phi_arg (phi
, lsq
,
7534 loop_latch_edge (containing_loop
),
7540 /* 4. Handle invariant-load. */
7541 if (inv_p
&& !bb_vinfo
)
7543 gcc_assert (!grouped_load
);
7544 /* If we have versioned for aliasing or the loop doesn't
7545 have any data dependencies that would preclude this,
7546 then we are sure this is a loop invariant load and
7547 thus we can insert it on the preheader edge. */
7548 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7549 && !nested_in_vect_loop
7550 && hoist_defs_of_uses (stmt
, loop
))
7552 if (dump_enabled_p ())
7554 dump_printf_loc (MSG_NOTE
, vect_location
,
7555 "hoisting out of the vectorized "
7557 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7559 tree tem
= copy_ssa_name (scalar_dest
);
7560 gsi_insert_on_edge_immediate
7561 (loop_preheader_edge (loop
),
7562 gimple_build_assign (tem
,
7564 (gimple_assign_rhs1 (stmt
))));
7565 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7566 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7567 set_vinfo_for_stmt (new_stmt
,
7568 new_stmt_vec_info (new_stmt
, vinfo
));
7572 gimple_stmt_iterator gsi2
= *gsi
;
7574 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7576 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7580 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7582 tree perm_mask
= perm_mask_for_reverse (vectype
);
7583 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7584 perm_mask
, stmt
, gsi
);
7585 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7588 /* Collect vector loads and later create their permutation in
7589 vect_transform_grouped_load (). */
7590 if (grouped_load
|| slp_perm
)
7591 dr_chain
.quick_push (new_temp
);
7593 /* Store vector loads in the corresponding SLP_NODE. */
7594 if (slp
&& !slp_perm
)
7595 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7597 /* Bump the vector pointer to account for a gap or for excess
7598 elements loaded for a permuted SLP load. */
7599 if (group_gap_adj
!= 0)
7603 = wide_int_to_tree (sizetype
,
7604 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7605 group_gap_adj
, &ovf
));
7606 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7611 if (slp
&& !slp_perm
)
7617 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7618 slp_node_instance
, false,
7621 dr_chain
.release ();
7629 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7630 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7631 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7636 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7638 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7639 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7642 dr_chain
.release ();
7648 /* Function vect_is_simple_cond.
7651 LOOP - the loop that is being vectorized.
7652 COND - Condition that is checked for simple use.
7655 *COMP_VECTYPE - the vector type for the comparison.
7657 Returns whether a COND can be vectorized. Checks whether
7658 condition operands are supportable using vec_is_simple_use. */
7661 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7664 enum vect_def_type dt
;
7665 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7668 if (TREE_CODE (cond
) == SSA_NAME
7669 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7671 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7672 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7675 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7680 if (!COMPARISON_CLASS_P (cond
))
7683 lhs
= TREE_OPERAND (cond
, 0);
7684 rhs
= TREE_OPERAND (cond
, 1);
7686 if (TREE_CODE (lhs
) == SSA_NAME
)
7688 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7689 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7692 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7693 && TREE_CODE (lhs
) != FIXED_CST
)
7696 if (TREE_CODE (rhs
) == SSA_NAME
)
7698 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7699 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7702 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7703 && TREE_CODE (rhs
) != FIXED_CST
)
7706 if (vectype1
&& vectype2
7707 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7710 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7714 /* vectorizable_condition.
7716 Check if STMT is conditional modify expression that can be vectorized.
7717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7718 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7721 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7722 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7723 else clause if it is 2).
7725 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7728 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7729 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7732 tree scalar_dest
= NULL_TREE
;
7733 tree vec_dest
= NULL_TREE
;
7734 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7735 tree then_clause
, else_clause
;
7736 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7737 tree comp_vectype
= NULL_TREE
;
7738 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7739 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7742 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7743 enum vect_def_type dt
, dts
[4];
7745 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7746 stmt_vec_info prev_stmt_info
= NULL
;
7748 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7749 vec
<tree
> vec_oprnds0
= vNULL
;
7750 vec
<tree
> vec_oprnds1
= vNULL
;
7751 vec
<tree
> vec_oprnds2
= vNULL
;
7752 vec
<tree
> vec_oprnds3
= vNULL
;
7754 bool masked
= false;
7756 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7759 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7761 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7764 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7765 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7769 /* FORNOW: not yet supported. */
7770 if (STMT_VINFO_LIVE_P (stmt_info
))
7772 if (dump_enabled_p ())
7773 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7774 "value used after loop.\n");
7779 /* Is vectorizable conditional operation? */
7780 if (!is_gimple_assign (stmt
))
7783 code
= gimple_assign_rhs_code (stmt
);
7785 if (code
!= COND_EXPR
)
7788 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7789 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7790 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7795 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7797 gcc_assert (ncopies
>= 1);
7798 if (reduc_index
&& ncopies
> 1)
7799 return false; /* FORNOW */
7801 cond_expr
= gimple_assign_rhs1 (stmt
);
7802 then_clause
= gimple_assign_rhs2 (stmt
);
7803 else_clause
= gimple_assign_rhs3 (stmt
);
7805 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7810 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7813 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7817 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7820 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7823 masked
= !COMPARISON_CLASS_P (cond_expr
);
7824 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7826 if (vec_cmp_type
== NULL_TREE
)
7829 cond_code
= TREE_CODE (cond_expr
);
7832 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
7833 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
7836 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7838 /* Boolean values may have another representation in vectors
7839 and therefore we prefer bit operations over comparison for
7840 them (which also works for scalar masks). We store opcodes
7841 to use in bitop1 and bitop2. Statement is vectorized as
7842 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7843 depending on bitop1 and bitop2 arity. */
7847 bitop1
= BIT_NOT_EXPR
;
7848 bitop2
= BIT_AND_EXPR
;
7851 bitop1
= BIT_NOT_EXPR
;
7852 bitop2
= BIT_IOR_EXPR
;
7855 bitop1
= BIT_NOT_EXPR
;
7856 bitop2
= BIT_AND_EXPR
;
7857 std::swap (cond_expr0
, cond_expr1
);
7860 bitop1
= BIT_NOT_EXPR
;
7861 bitop2
= BIT_IOR_EXPR
;
7862 std::swap (cond_expr0
, cond_expr1
);
7865 bitop1
= BIT_XOR_EXPR
;
7868 bitop1
= BIT_XOR_EXPR
;
7869 bitop2
= BIT_NOT_EXPR
;
7874 cond_code
= SSA_NAME
;
7879 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7880 if (bitop1
!= NOP_EXPR
)
7882 machine_mode mode
= TYPE_MODE (comp_vectype
);
7885 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
7886 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7889 if (bitop2
!= NOP_EXPR
)
7891 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
7893 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7897 return expand_vec_cond_expr_p (vectype
, comp_vectype
,
7905 vec_oprnds0
.create (1);
7906 vec_oprnds1
.create (1);
7907 vec_oprnds2
.create (1);
7908 vec_oprnds3
.create (1);
7912 scalar_dest
= gimple_assign_lhs (stmt
);
7913 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7915 /* Handle cond expr. */
7916 for (j
= 0; j
< ncopies
; j
++)
7918 gassign
*new_stmt
= NULL
;
7923 auto_vec
<tree
, 4> ops
;
7924 auto_vec
<vec
<tree
>, 4> vec_defs
;
7927 ops
.safe_push (cond_expr
);
7930 ops
.safe_push (cond_expr0
);
7931 ops
.safe_push (cond_expr1
);
7933 ops
.safe_push (then_clause
);
7934 ops
.safe_push (else_clause
);
7935 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7936 vec_oprnds3
= vec_defs
.pop ();
7937 vec_oprnds2
= vec_defs
.pop ();
7939 vec_oprnds1
= vec_defs
.pop ();
7940 vec_oprnds0
= vec_defs
.pop ();
7948 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7950 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7956 = vect_get_vec_def_for_operand (cond_expr0
,
7957 stmt
, comp_vectype
);
7958 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
7961 = vect_get_vec_def_for_operand (cond_expr1
,
7962 stmt
, comp_vectype
);
7963 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
7965 if (reduc_index
== 1)
7966 vec_then_clause
= reduc_def
;
7969 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7971 vect_is_simple_use (then_clause
, loop_vinfo
,
7974 if (reduc_index
== 2)
7975 vec_else_clause
= reduc_def
;
7978 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7980 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7987 = vect_get_vec_def_for_stmt_copy (dts
[0],
7988 vec_oprnds0
.pop ());
7991 = vect_get_vec_def_for_stmt_copy (dts
[1],
7992 vec_oprnds1
.pop ());
7994 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7995 vec_oprnds2
.pop ());
7996 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7997 vec_oprnds3
.pop ());
8002 vec_oprnds0
.quick_push (vec_cond_lhs
);
8004 vec_oprnds1
.quick_push (vec_cond_rhs
);
8005 vec_oprnds2
.quick_push (vec_then_clause
);
8006 vec_oprnds3
.quick_push (vec_else_clause
);
8009 /* Arguments are ready. Create the new vector stmt. */
8010 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8012 vec_then_clause
= vec_oprnds2
[i
];
8013 vec_else_clause
= vec_oprnds3
[i
];
8016 vec_compare
= vec_cond_lhs
;
8019 vec_cond_rhs
= vec_oprnds1
[i
];
8020 if (bitop1
== NOP_EXPR
)
8021 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8022 vec_cond_lhs
, vec_cond_rhs
);
8025 new_temp
= make_ssa_name (vec_cmp_type
);
8026 if (bitop1
== BIT_NOT_EXPR
)
8027 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8031 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8033 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8034 if (bitop2
== NOP_EXPR
)
8035 vec_compare
= new_temp
;
8036 else if (bitop2
== BIT_NOT_EXPR
)
8038 /* Instead of doing ~x ? y : z do x ? z : y. */
8039 vec_compare
= new_temp
;
8040 std::swap (vec_then_clause
, vec_else_clause
);
8044 vec_compare
= make_ssa_name (vec_cmp_type
);
8046 = gimple_build_assign (vec_compare
, bitop2
,
8047 vec_cond_lhs
, new_temp
);
8048 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8052 new_temp
= make_ssa_name (vec_dest
);
8053 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8054 vec_compare
, vec_then_clause
,
8056 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8058 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8065 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8067 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8069 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8072 vec_oprnds0
.release ();
8073 vec_oprnds1
.release ();
8074 vec_oprnds2
.release ();
8075 vec_oprnds3
.release ();
8080 /* vectorizable_comparison.
8082 Check if STMT is comparison expression that can be vectorized.
8083 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8084 comparison, put it in VEC_STMT, and insert it at GSI.
8086 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8089 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8090 gimple
**vec_stmt
, tree reduc_def
,
8093 tree lhs
, rhs1
, rhs2
;
8094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8095 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8096 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8097 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8099 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8100 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8103 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8104 stmt_vec_info prev_stmt_info
= NULL
;
8106 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8107 vec
<tree
> vec_oprnds0
= vNULL
;
8108 vec
<tree
> vec_oprnds1
= vNULL
;
8113 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8116 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8119 mask_type
= vectype
;
8120 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8125 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
8127 gcc_assert (ncopies
>= 1);
8128 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8129 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8133 if (STMT_VINFO_LIVE_P (stmt_info
))
8135 if (dump_enabled_p ())
8136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8137 "value used after loop.\n");
8141 if (!is_gimple_assign (stmt
))
8144 code
= gimple_assign_rhs_code (stmt
);
8146 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8149 rhs1
= gimple_assign_rhs1 (stmt
);
8150 rhs2
= gimple_assign_rhs2 (stmt
);
8152 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8153 &dts
[0], &vectype1
))
8156 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8157 &dts
[1], &vectype2
))
8160 if (vectype1
&& vectype2
8161 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8164 vectype
= vectype1
? vectype1
: vectype2
;
8166 /* Invariant comparison. */
8169 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8170 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8173 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8176 /* Can't compare mask and non-mask types. */
8177 if (vectype1
&& vectype2
8178 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8181 /* Boolean values may have another representation in vectors
8182 and therefore we prefer bit operations over comparison for
8183 them (which also works for scalar masks). We store opcodes
8184 to use in bitop1 and bitop2. Statement is vectorized as
8185 BITOP2 (rhs1 BITOP1 rhs2) or
8186 rhs1 BITOP2 (BITOP1 rhs2)
8187 depending on bitop1 and bitop2 arity. */
8188 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8190 if (code
== GT_EXPR
)
8192 bitop1
= BIT_NOT_EXPR
;
8193 bitop2
= BIT_AND_EXPR
;
8195 else if (code
== GE_EXPR
)
8197 bitop1
= BIT_NOT_EXPR
;
8198 bitop2
= BIT_IOR_EXPR
;
8200 else if (code
== LT_EXPR
)
8202 bitop1
= BIT_NOT_EXPR
;
8203 bitop2
= BIT_AND_EXPR
;
8204 std::swap (rhs1
, rhs2
);
8205 std::swap (dts
[0], dts
[1]);
8207 else if (code
== LE_EXPR
)
8209 bitop1
= BIT_NOT_EXPR
;
8210 bitop2
= BIT_IOR_EXPR
;
8211 std::swap (rhs1
, rhs2
);
8212 std::swap (dts
[0], dts
[1]);
8216 bitop1
= BIT_XOR_EXPR
;
8217 if (code
== EQ_EXPR
)
8218 bitop2
= BIT_NOT_EXPR
;
8224 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8225 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8227 if (bitop1
== NOP_EXPR
)
8228 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8231 machine_mode mode
= TYPE_MODE (vectype
);
8234 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8235 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8238 if (bitop2
!= NOP_EXPR
)
8240 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8241 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8251 vec_oprnds0
.create (1);
8252 vec_oprnds1
.create (1);
8256 lhs
= gimple_assign_lhs (stmt
);
8257 mask
= vect_create_destination_var (lhs
, mask_type
);
8259 /* Handle cmp expr. */
8260 for (j
= 0; j
< ncopies
; j
++)
8262 gassign
*new_stmt
= NULL
;
8267 auto_vec
<tree
, 2> ops
;
8268 auto_vec
<vec
<tree
>, 2> vec_defs
;
8270 ops
.safe_push (rhs1
);
8271 ops
.safe_push (rhs2
);
8272 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
8273 vec_oprnds1
= vec_defs
.pop ();
8274 vec_oprnds0
= vec_defs
.pop ();
8278 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8279 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8284 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8285 vec_oprnds0
.pop ());
8286 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8287 vec_oprnds1
.pop ());
8292 vec_oprnds0
.quick_push (vec_rhs1
);
8293 vec_oprnds1
.quick_push (vec_rhs2
);
8296 /* Arguments are ready. Create the new vector stmt. */
8297 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8299 vec_rhs2
= vec_oprnds1
[i
];
8301 new_temp
= make_ssa_name (mask
);
8302 if (bitop1
== NOP_EXPR
)
8304 new_stmt
= gimple_build_assign (new_temp
, code
,
8305 vec_rhs1
, vec_rhs2
);
8306 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8310 if (bitop1
== BIT_NOT_EXPR
)
8311 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8313 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8315 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8316 if (bitop2
!= NOP_EXPR
)
8318 tree res
= make_ssa_name (mask
);
8319 if (bitop2
== BIT_NOT_EXPR
)
8320 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8322 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8324 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8328 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8335 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8337 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8339 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8342 vec_oprnds0
.release ();
8343 vec_oprnds1
.release ();
8348 /* Make sure the statement is vectorizable. */
8351 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8353 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8354 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8355 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8357 tree scalar_type
, vectype
;
8358 gimple
*pattern_stmt
;
8359 gimple_seq pattern_def_seq
;
8361 if (dump_enabled_p ())
8363 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8364 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8367 if (gimple_has_volatile_ops (stmt
))
8369 if (dump_enabled_p ())
8370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8371 "not vectorized: stmt has volatile operands\n");
8376 /* Skip stmts that do not need to be vectorized. In loops this is expected
8378 - the COND_EXPR which is the loop exit condition
8379 - any LABEL_EXPRs in the loop
8380 - computations that are used only for array indexing or loop control.
8381 In basic blocks we only analyze statements that are a part of some SLP
8382 instance, therefore, all the statements are relevant.
8384 Pattern statement needs to be analyzed instead of the original statement
8385 if the original statement is not relevant. Otherwise, we analyze both
8386 statements. In basic blocks we are called from some SLP instance
8387 traversal, don't analyze pattern stmts instead, the pattern stmts
8388 already will be part of SLP instance. */
8390 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8391 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8392 && !STMT_VINFO_LIVE_P (stmt_info
))
8394 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8396 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8397 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8399 /* Analyze PATTERN_STMT instead of the original stmt. */
8400 stmt
= pattern_stmt
;
8401 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8402 if (dump_enabled_p ())
8404 dump_printf_loc (MSG_NOTE
, vect_location
,
8405 "==> examining pattern statement: ");
8406 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8411 if (dump_enabled_p ())
8412 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8417 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8420 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8421 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8423 /* Analyze PATTERN_STMT too. */
8424 if (dump_enabled_p ())
8426 dump_printf_loc (MSG_NOTE
, vect_location
,
8427 "==> examining pattern statement: ");
8428 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8431 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8435 if (is_pattern_stmt_p (stmt_info
)
8437 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8439 gimple_stmt_iterator si
;
8441 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8443 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8444 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8445 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8447 /* Analyze def stmt of STMT if it's a pattern stmt. */
8448 if (dump_enabled_p ())
8450 dump_printf_loc (MSG_NOTE
, vect_location
,
8451 "==> examining pattern def statement: ");
8452 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8455 if (!vect_analyze_stmt (pattern_def_stmt
,
8456 need_to_vectorize
, node
))
8462 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8464 case vect_internal_def
:
8467 case vect_reduction_def
:
8468 case vect_nested_cycle
:
8469 gcc_assert (!bb_vinfo
8470 && (relevance
== vect_used_in_outer
8471 || relevance
== vect_used_in_outer_by_reduction
8472 || relevance
== vect_used_by_reduction
8473 || relevance
== vect_unused_in_scope
8474 || relevance
== vect_used_only_live
));
8477 case vect_induction_def
:
8478 case vect_constant_def
:
8479 case vect_external_def
:
8480 case vect_unknown_def_type
:
8487 gcc_assert (PURE_SLP_STMT (stmt_info
));
8489 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8490 if (dump_enabled_p ())
8492 dump_printf_loc (MSG_NOTE
, vect_location
,
8493 "get vectype for scalar type: ");
8494 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8495 dump_printf (MSG_NOTE
, "\n");
8498 vectype
= get_vectype_for_scalar_type (scalar_type
);
8501 if (dump_enabled_p ())
8503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8504 "not SLPed: unsupported data-type ");
8505 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8507 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8512 if (dump_enabled_p ())
8514 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8515 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8516 dump_printf (MSG_NOTE
, "\n");
8519 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8522 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8524 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8525 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8526 || (is_gimple_call (stmt
)
8527 && gimple_call_lhs (stmt
) == NULL_TREE
));
8528 *need_to_vectorize
= true;
8531 if (PURE_SLP_STMT (stmt_info
) && !node
)
8533 dump_printf_loc (MSG_NOTE
, vect_location
,
8534 "handled only by SLP analysis\n");
8540 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8541 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8542 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8543 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8544 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8545 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8546 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8547 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8548 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8549 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8550 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8551 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8552 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8556 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8557 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8558 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8559 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8560 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8561 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8562 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8563 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8564 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8565 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8570 if (dump_enabled_p ())
8572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8573 "not vectorized: relevant stmt not ");
8574 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8575 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8584 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8585 need extra handling, except for vectorizable reductions. */
8586 if (STMT_VINFO_LIVE_P (stmt_info
)
8587 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8588 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8592 if (dump_enabled_p ())
8594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8595 "not vectorized: live stmt not ");
8596 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8597 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8607 /* Function vect_transform_stmt.
8609 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8612 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8613 bool *grouped_store
, slp_tree slp_node
,
8614 slp_instance slp_node_instance
)
8616 bool is_store
= false;
8617 gimple
*vec_stmt
= NULL
;
8618 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8621 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8622 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8624 switch (STMT_VINFO_TYPE (stmt_info
))
8626 case type_demotion_vec_info_type
:
8627 case type_promotion_vec_info_type
:
8628 case type_conversion_vec_info_type
:
8629 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8633 case induc_vec_info_type
:
8634 gcc_assert (!slp_node
);
8635 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8639 case shift_vec_info_type
:
8640 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8644 case op_vec_info_type
:
8645 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8649 case assignment_vec_info_type
:
8650 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8654 case load_vec_info_type
:
8655 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8660 case store_vec_info_type
:
8661 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8665 /* In case of interleaving, the whole chain is vectorized when the
8666 last store in the chain is reached. Store stmts before the last
8667 one are skipped, and there vec_stmt_info shouldn't be freed
8669 *grouped_store
= true;
8670 if (STMT_VINFO_VEC_STMT (stmt_info
))
8677 case condition_vec_info_type
:
8678 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8682 case comparison_vec_info_type
:
8683 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8687 case call_vec_info_type
:
8688 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8689 stmt
= gsi_stmt (*gsi
);
8690 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8694 case call_simd_clone_vec_info_type
:
8695 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8696 stmt
= gsi_stmt (*gsi
);
8699 case reduc_vec_info_type
:
8700 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8705 if (!STMT_VINFO_LIVE_P (stmt_info
))
8707 if (dump_enabled_p ())
8708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8709 "stmt not supported.\n");
8714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8715 This would break hybrid SLP vectorization. */
8717 gcc_assert (!vec_stmt
8718 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8721 is being vectorized, but outside the immediately enclosing loop. */
8723 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8724 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8725 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8726 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8727 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8728 || STMT_VINFO_RELEVANT (stmt_info
) ==
8729 vect_used_in_outer_by_reduction
))
8731 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8732 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8733 imm_use_iterator imm_iter
;
8734 use_operand_p use_p
;
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_NOTE
, vect_location
,
8740 "Record the vdef for outer-loop vectorization.\n");
8742 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8743 (to be used when vectorizing outer-loop stmts that use the DEF of
8745 if (gimple_code (stmt
) == GIMPLE_PHI
)
8746 scalar_dest
= PHI_RESULT (stmt
);
8748 scalar_dest
= gimple_assign_lhs (stmt
);
8750 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8752 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8754 exit_phi
= USE_STMT (use_p
);
8755 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8760 /* Handle stmts whose DEF is used outside the loop-nest that is
8761 being vectorized. */
8766 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8768 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8769 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8770 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8772 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8778 else if (STMT_VINFO_LIVE_P (stmt_info
)
8779 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8781 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8786 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8792 /* Remove a group of stores (for SLP or interleaving), free their
8796 vect_remove_stores (gimple
*first_stmt
)
8798 gimple
*next
= first_stmt
;
8800 gimple_stmt_iterator next_si
;
8804 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8806 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8807 if (is_pattern_stmt_p (stmt_info
))
8808 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8809 /* Free the attached stmt_vec_info and remove the stmt. */
8810 next_si
= gsi_for_stmt (next
);
8811 unlink_stmt_vdef (next
);
8812 gsi_remove (&next_si
, true);
8813 release_defs (next
);
8814 free_stmt_vec_info (next
);
8820 /* Function new_stmt_vec_info.
8822 Create and initialize a new stmt_vec_info struct for STMT. */
8825 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8828 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8830 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8831 STMT_VINFO_STMT (res
) = stmt
;
8833 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8834 STMT_VINFO_LIVE_P (res
) = false;
8835 STMT_VINFO_VECTYPE (res
) = NULL
;
8836 STMT_VINFO_VEC_STMT (res
) = NULL
;
8837 STMT_VINFO_VECTORIZABLE (res
) = true;
8838 STMT_VINFO_IN_PATTERN_P (res
) = false;
8839 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8840 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8841 STMT_VINFO_DATA_REF (res
) = NULL
;
8842 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8843 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8845 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8846 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8847 STMT_VINFO_DR_INIT (res
) = NULL
;
8848 STMT_VINFO_DR_STEP (res
) = NULL
;
8849 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8851 if (gimple_code (stmt
) == GIMPLE_PHI
8852 && is_loop_header_bb_p (gimple_bb (stmt
)))
8853 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8855 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8857 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8858 STMT_SLP_TYPE (res
) = loop_vect
;
8859 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8861 GROUP_FIRST_ELEMENT (res
) = NULL
;
8862 GROUP_NEXT_ELEMENT (res
) = NULL
;
8863 GROUP_SIZE (res
) = 0;
8864 GROUP_STORE_COUNT (res
) = 0;
8865 GROUP_GAP (res
) = 0;
8866 GROUP_SAME_DR_STMT (res
) = NULL
;
8872 /* Create a hash table for stmt_vec_info. */
8875 init_stmt_vec_info_vec (void)
8877 gcc_assert (!stmt_vec_info_vec
.exists ());
8878 stmt_vec_info_vec
.create (50);
8882 /* Free hash table for stmt_vec_info. */
8885 free_stmt_vec_info_vec (void)
8889 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8891 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8892 gcc_assert (stmt_vec_info_vec
.exists ());
8893 stmt_vec_info_vec
.release ();
8897 /* Free stmt vectorization related info. */
8900 free_stmt_vec_info (gimple
*stmt
)
8902 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8907 /* Check if this statement has a related "pattern stmt"
8908 (introduced by the vectorizer during the pattern recognition
8909 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8911 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8913 stmt_vec_info patt_info
8914 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8917 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8918 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8919 gimple_set_bb (patt_stmt
, NULL
);
8920 tree lhs
= gimple_get_lhs (patt_stmt
);
8921 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8922 release_ssa_name (lhs
);
8925 gimple_stmt_iterator si
;
8926 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8928 gimple
*seq_stmt
= gsi_stmt (si
);
8929 gimple_set_bb (seq_stmt
, NULL
);
8930 lhs
= gimple_get_lhs (seq_stmt
);
8931 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8932 release_ssa_name (lhs
);
8933 free_stmt_vec_info (seq_stmt
);
8936 free_stmt_vec_info (patt_stmt
);
8940 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8941 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8942 set_vinfo_for_stmt (stmt
, NULL
);
8947 /* Function get_vectype_for_scalar_type_and_size.
8949 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8953 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8955 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8956 machine_mode simd_mode
;
8957 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8964 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8965 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8968 /* For vector types of elements whose mode precision doesn't
8969 match their types precision we use a element type of mode
8970 precision. The vectorization routines will have to make sure
8971 they support the proper result truncation/extension.
8972 We also make sure to build vector types with INTEGER_TYPE
8973 component type only. */
8974 if (INTEGRAL_TYPE_P (scalar_type
)
8975 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8976 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8977 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8978 TYPE_UNSIGNED (scalar_type
));
8980 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8981 When the component mode passes the above test simply use a type
8982 corresponding to that mode. The theory is that any use that
8983 would cause problems with this will disable vectorization anyway. */
8984 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8985 && !INTEGRAL_TYPE_P (scalar_type
))
8986 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8988 /* We can't build a vector type of elements with alignment bigger than
8990 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8991 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8992 TYPE_UNSIGNED (scalar_type
));
8994 /* If we felt back to using the mode fail if there was
8995 no scalar type for it. */
8996 if (scalar_type
== NULL_TREE
)
8999 /* If no size was supplied use the mode the target prefers. Otherwise
9000 lookup a vector mode of the specified size. */
9002 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9004 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
9005 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9009 vectype
= build_vector_type (scalar_type
, nunits
);
9011 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9012 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9018 unsigned int current_vector_size
;
9020 /* Function get_vectype_for_scalar_type.
9022 Returns the vector type corresponding to SCALAR_TYPE as supported
9026 get_vectype_for_scalar_type (tree scalar_type
)
9029 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9030 current_vector_size
);
9032 && current_vector_size
== 0)
9033 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9037 /* Function get_mask_type_for_scalar_type.
9039 Returns the mask type corresponding to a result of comparison
9040 of vectors of specified SCALAR_TYPE as supported by target. */
9043 get_mask_type_for_scalar_type (tree scalar_type
)
9045 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9050 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9051 current_vector_size
);
9054 /* Function get_same_sized_vectype
9056 Returns a vector type corresponding to SCALAR_TYPE of size
9057 VECTOR_TYPE if supported by the target. */
9060 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9062 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9063 return build_same_sized_truth_vector_type (vector_type
);
9065 return get_vectype_for_scalar_type_and_size
9066 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9069 /* Function vect_is_simple_use.
9072 VINFO - the vect info of the loop or basic block that is being vectorized.
9073 OPERAND - operand in the loop or bb.
9075 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9076 DT - the type of definition
9078 Returns whether a stmt with OPERAND can be vectorized.
9079 For loops, supportable operands are constants, loop invariants, and operands
9080 that are defined by the current iteration of the loop. Unsupportable
9081 operands are those that are defined by a previous iteration of the loop (as
9082 is the case in reduction/induction computations).
9083 For basic blocks, supportable operands are constants and bb invariants.
9084 For now, operands defined outside the basic block are not supported. */
9087 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9088 gimple
**def_stmt
, enum vect_def_type
*dt
)
9091 *dt
= vect_unknown_def_type
;
9093 if (dump_enabled_p ())
9095 dump_printf_loc (MSG_NOTE
, vect_location
,
9096 "vect_is_simple_use: operand ");
9097 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9098 dump_printf (MSG_NOTE
, "\n");
9101 if (CONSTANT_CLASS_P (operand
))
9103 *dt
= vect_constant_def
;
9107 if (is_gimple_min_invariant (operand
))
9109 *dt
= vect_external_def
;
9113 if (TREE_CODE (operand
) != SSA_NAME
)
9115 if (dump_enabled_p ())
9116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9121 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9123 *dt
= vect_external_def
;
9127 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9128 if (dump_enabled_p ())
9130 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9131 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9134 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9135 *dt
= vect_external_def
;
9138 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9139 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9142 if (dump_enabled_p ())
9144 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9147 case vect_uninitialized_def
:
9148 dump_printf (MSG_NOTE
, "uninitialized\n");
9150 case vect_constant_def
:
9151 dump_printf (MSG_NOTE
, "constant\n");
9153 case vect_external_def
:
9154 dump_printf (MSG_NOTE
, "external\n");
9156 case vect_internal_def
:
9157 dump_printf (MSG_NOTE
, "internal\n");
9159 case vect_induction_def
:
9160 dump_printf (MSG_NOTE
, "induction\n");
9162 case vect_reduction_def
:
9163 dump_printf (MSG_NOTE
, "reduction\n");
9165 case vect_double_reduction_def
:
9166 dump_printf (MSG_NOTE
, "double reduction\n");
9168 case vect_nested_cycle
:
9169 dump_printf (MSG_NOTE
, "nested cycle\n");
9171 case vect_unknown_def_type
:
9172 dump_printf (MSG_NOTE
, "unknown\n");
9177 if (*dt
== vect_unknown_def_type
)
9179 if (dump_enabled_p ())
9180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9181 "Unsupported pattern.\n");
9185 switch (gimple_code (*def_stmt
))
9192 if (dump_enabled_p ())
9193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9194 "unsupported defining stmt:\n");
9201 /* Function vect_is_simple_use.
9203 Same as vect_is_simple_use but also determines the vector operand
9204 type of OPERAND and stores it to *VECTYPE. If the definition of
9205 OPERAND is vect_uninitialized_def, vect_constant_def or
9206 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9207 is responsible to compute the best suited vector type for the
9211 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9212 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9214 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9217 /* Now get a vector type if the def is internal, otherwise supply
9218 NULL_TREE and leave it up to the caller to figure out a proper
9219 type for the use stmt. */
9220 if (*dt
== vect_internal_def
9221 || *dt
== vect_induction_def
9222 || *dt
== vect_reduction_def
9223 || *dt
== vect_double_reduction_def
9224 || *dt
== vect_nested_cycle
)
9226 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9228 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9229 && !STMT_VINFO_RELEVANT (stmt_info
)
9230 && !STMT_VINFO_LIVE_P (stmt_info
))
9231 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9233 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9234 gcc_assert (*vectype
!= NULL_TREE
);
9236 else if (*dt
== vect_uninitialized_def
9237 || *dt
== vect_constant_def
9238 || *dt
== vect_external_def
)
9239 *vectype
= NULL_TREE
;
9247 /* Function supportable_widening_operation
9249 Check whether an operation represented by the code CODE is a
9250 widening operation that is supported by the target platform in
9251 vector form (i.e., when operating on arguments of type VECTYPE_IN
9252 producing a result of type VECTYPE_OUT).
9254 Widening operations we currently support are NOP (CONVERT), FLOAT
9255 and WIDEN_MULT. This function checks if these operations are supported
9256 by the target platform either directly (via vector tree-codes), or via
9260 - CODE1 and CODE2 are codes of vector operations to be used when
9261 vectorizing the operation, if available.
9262 - MULTI_STEP_CVT determines the number of required intermediate steps in
9263 case of multi-step conversion (like char->short->int - in that case
9264 MULTI_STEP_CVT will be 1).
9265 - INTERM_TYPES contains the intermediate type required to perform the
9266 widening operation (short in the above example). */
9269 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9270 tree vectype_out
, tree vectype_in
,
9271 enum tree_code
*code1
, enum tree_code
*code2
,
9272 int *multi_step_cvt
,
9273 vec
<tree
> *interm_types
)
9275 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9276 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9277 struct loop
*vect_loop
= NULL
;
9278 machine_mode vec_mode
;
9279 enum insn_code icode1
, icode2
;
9280 optab optab1
, optab2
;
9281 tree vectype
= vectype_in
;
9282 tree wide_vectype
= vectype_out
;
9283 enum tree_code c1
, c2
;
9285 tree prev_type
, intermediate_type
;
9286 machine_mode intermediate_mode
, prev_mode
;
9287 optab optab3
, optab4
;
9289 *multi_step_cvt
= 0;
9291 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9295 case WIDEN_MULT_EXPR
:
9296 /* The result of a vectorized widening operation usually requires
9297 two vectors (because the widened results do not fit into one vector).
9298 The generated vector results would normally be expected to be
9299 generated in the same order as in the original scalar computation,
9300 i.e. if 8 results are generated in each vector iteration, they are
9301 to be organized as follows:
9302 vect1: [res1,res2,res3,res4],
9303 vect2: [res5,res6,res7,res8].
9305 However, in the special case that the result of the widening
9306 operation is used in a reduction computation only, the order doesn't
9307 matter (because when vectorizing a reduction we change the order of
9308 the computation). Some targets can take advantage of this and
9309 generate more efficient code. For example, targets like Altivec,
9310 that support widen_mult using a sequence of {mult_even,mult_odd}
9311 generate the following vectors:
9312 vect1: [res1,res3,res5,res7],
9313 vect2: [res2,res4,res6,res8].
9315 When vectorizing outer-loops, we execute the inner-loop sequentially
9316 (each vectorized inner-loop iteration contributes to VF outer-loop
9317 iterations in parallel). We therefore don't allow to change the
9318 order of the computation in the inner-loop during outer-loop
9320 /* TODO: Another case in which order doesn't *really* matter is when we
9321 widen and then contract again, e.g. (short)((int)x * y >> 8).
9322 Normally, pack_trunc performs an even/odd permute, whereas the
9323 repack from an even/odd expansion would be an interleave, which
9324 would be significantly simpler for e.g. AVX2. */
9325 /* In any case, in order to avoid duplicating the code below, recurse
9326 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9327 are properly set up for the caller. If we fail, we'll continue with
9328 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9330 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9331 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9332 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9333 stmt
, vectype_out
, vectype_in
,
9334 code1
, code2
, multi_step_cvt
,
9337 /* Elements in a vector with vect_used_by_reduction property cannot
9338 be reordered if the use chain with this property does not have the
9339 same operation. One such an example is s += a * b, where elements
9340 in a and b cannot be reordered. Here we check if the vector defined
9341 by STMT is only directly used in the reduction statement. */
9342 tree lhs
= gimple_assign_lhs (stmt
);
9343 use_operand_p dummy
;
9345 stmt_vec_info use_stmt_info
= NULL
;
9346 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9347 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9348 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9351 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9352 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9365 case VEC_WIDEN_MULT_EVEN_EXPR
:
9366 /* Support the recursion induced just above. */
9367 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9368 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9371 case WIDEN_LSHIFT_EXPR
:
9372 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9373 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9377 c1
= VEC_UNPACK_LO_EXPR
;
9378 c2
= VEC_UNPACK_HI_EXPR
;
9382 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9383 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9386 case FIX_TRUNC_EXPR
:
9387 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9388 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9389 computing the operation. */
9396 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9399 if (code
== FIX_TRUNC_EXPR
)
9401 /* The signedness is determined from output operand. */
9402 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9403 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9407 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9408 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9411 if (!optab1
|| !optab2
)
9414 vec_mode
= TYPE_MODE (vectype
);
9415 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9416 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9422 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9423 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9424 /* For scalar masks we may have different boolean
9425 vector types having the same QImode. Thus we
9426 add additional check for elements number. */
9427 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9428 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9429 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9431 /* Check if it's a multi-step conversion that can be done using intermediate
9434 prev_type
= vectype
;
9435 prev_mode
= vec_mode
;
9437 if (!CONVERT_EXPR_CODE_P (code
))
9440 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9441 intermediate steps in promotion sequence. We try
9442 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9444 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9445 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9447 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9448 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9451 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9452 current_vector_size
);
9453 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9458 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9459 TYPE_UNSIGNED (prev_type
));
9461 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9462 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9464 if (!optab3
|| !optab4
9465 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9466 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9467 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9468 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9469 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9470 == CODE_FOR_nothing
)
9471 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9472 == CODE_FOR_nothing
))
9475 interm_types
->quick_push (intermediate_type
);
9476 (*multi_step_cvt
)++;
9478 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9479 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9480 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9481 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9482 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9484 prev_type
= intermediate_type
;
9485 prev_mode
= intermediate_mode
;
9488 interm_types
->release ();
9493 /* Function supportable_narrowing_operation
9495 Check whether an operation represented by the code CODE is a
9496 narrowing operation that is supported by the target platform in
9497 vector form (i.e., when operating on arguments of type VECTYPE_IN
9498 and producing a result of type VECTYPE_OUT).
9500 Narrowing operations we currently support are NOP (CONVERT) and
9501 FIX_TRUNC. This function checks if these operations are supported by
9502 the target platform directly via vector tree-codes.
9505 - CODE1 is the code of a vector operation to be used when
9506 vectorizing the operation, if available.
9507 - MULTI_STEP_CVT determines the number of required intermediate steps in
9508 case of multi-step conversion (like int->short->char - in that case
9509 MULTI_STEP_CVT will be 1).
9510 - INTERM_TYPES contains the intermediate type required to perform the
9511 narrowing operation (short in the above example). */
9514 supportable_narrowing_operation (enum tree_code code
,
9515 tree vectype_out
, tree vectype_in
,
9516 enum tree_code
*code1
, int *multi_step_cvt
,
9517 vec
<tree
> *interm_types
)
9519 machine_mode vec_mode
;
9520 enum insn_code icode1
;
9521 optab optab1
, interm_optab
;
9522 tree vectype
= vectype_in
;
9523 tree narrow_vectype
= vectype_out
;
9525 tree intermediate_type
, prev_type
;
9526 machine_mode intermediate_mode
, prev_mode
;
9530 *multi_step_cvt
= 0;
9534 c1
= VEC_PACK_TRUNC_EXPR
;
9537 case FIX_TRUNC_EXPR
:
9538 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9542 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9543 tree code and optabs used for computing the operation. */
9550 if (code
== FIX_TRUNC_EXPR
)
9551 /* The signedness is determined from output operand. */
9552 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9554 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9559 vec_mode
= TYPE_MODE (vectype
);
9560 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9565 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9566 /* For scalar masks we may have different boolean
9567 vector types having the same QImode. Thus we
9568 add additional check for elements number. */
9569 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9570 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9571 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9573 /* Check if it's a multi-step conversion that can be done using intermediate
9575 prev_mode
= vec_mode
;
9576 prev_type
= vectype
;
9577 if (code
== FIX_TRUNC_EXPR
)
9578 uns
= TYPE_UNSIGNED (vectype_out
);
9580 uns
= TYPE_UNSIGNED (vectype
);
9582 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9583 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9584 costly than signed. */
9585 if (code
== FIX_TRUNC_EXPR
&& uns
)
9587 enum insn_code icode2
;
9590 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9592 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9593 if (interm_optab
!= unknown_optab
9594 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9595 && insn_data
[icode1
].operand
[0].mode
9596 == insn_data
[icode2
].operand
[0].mode
)
9599 optab1
= interm_optab
;
9604 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9605 intermediate steps in promotion sequence. We try
9606 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9607 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9608 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9610 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9611 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9614 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9615 current_vector_size
);
9616 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9621 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9623 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9626 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9627 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9628 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9629 == CODE_FOR_nothing
))
9632 interm_types
->quick_push (intermediate_type
);
9633 (*multi_step_cvt
)++;
9635 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9636 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9637 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9638 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9640 prev_mode
= intermediate_mode
;
9641 prev_type
= intermediate_type
;
9642 optab1
= interm_optab
;
9645 interm_types
->release ();