1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type
{
63 /* Return the vectorized type for the given statement. */
66 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
68 return STMT_VINFO_VECTYPE (stmt_info
);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
74 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
76 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
77 basic_block bb
= gimple_bb (stmt
);
78 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
84 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
86 return (bb
->loop_father
== loop
->inner
);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
94 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
95 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
96 int misalign
, enum vect_cost_model_location where
)
100 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
101 stmt_info_for_cost si
= { count
, kind
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
110 count
, kind
, stmt_info
, misalign
, where
);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
118 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
128 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
129 tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
154 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
155 tree array
, unsigned HOST_WIDE_INT n
)
160 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
161 build_int_cst (size_type_node
, n
),
162 NULL_TREE
, NULL_TREE
);
164 new_stmt
= gimple_build_assign (array_ref
, vect
);
165 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
173 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
177 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
191 enum vect_relevant relevant
, bool live_p
)
193 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
194 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
195 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
196 gimple
*pattern_stmt
;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE
, vect_location
,
201 "mark relevant %d, live %d: ", relevant
, live_p
);
202 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE
, vect_location
,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info
= vinfo_for_stmt (pattern_stmt
);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
224 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
225 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
229 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
230 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
231 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
233 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE
, vect_location
,
238 "already marked relevant/live.\n");
242 worklist
->safe_push (stmt
);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
251 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
257 if (!is_gimple_assign (stmt
))
260 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
262 enum vect_def_type dt
= vect_uninitialized_def
;
264 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
268 "use not simple.\n");
272 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
291 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
292 enum vect_relevant
*relevant
, bool *live_p
)
294 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
296 imm_use_iterator imm_iter
;
300 *relevant
= vect_unused_in_scope
;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt
)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
306 != loop_exit_ctrl_vec_info_type
)
307 *relevant
= vect_used_in_scope
;
309 /* changing memory. */
310 if (gimple_code (stmt
) != GIMPLE_PHI
)
311 if (gimple_vdef (stmt
)
312 && !gimple_clobber_p (stmt
))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE
, vect_location
,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant
= vect_used_in_scope
;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
323 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
325 basic_block bb
= gimple_bb (USE_STMT (use_p
));
326 if (!flow_bb_inside_loop_p (loop
, bb
))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE
, vect_location
,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p
)))
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
338 gcc_assert (bb
== single_exit (loop
)->dest
);
345 if (*live_p
&& *relevant
== vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE
, vect_location
,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant
= vect_used_only_live
;
354 return (*live_p
|| *relevant
);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info
))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt
))
390 if (is_gimple_call (stmt
)
391 && gimple_call_internal_p (stmt
))
392 switch (gimple_call_internal_fn (stmt
))
395 operand
= gimple_call_arg (stmt
, 3);
400 operand
= gimple_call_arg (stmt
, 2);
410 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
412 operand
= gimple_assign_rhs1 (stmt
);
413 if (TREE_CODE (operand
) != SSA_NAME
)
424 Function process_use.
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
451 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
452 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
455 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
456 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
457 stmt_vec_info dstmt_vinfo
;
458 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
467 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
471 "not vectorized: unsupported use in stmt.\n");
475 if (!def_stmt
|| gimple_nop_p (def_stmt
))
478 def_bb
= gimple_bb (def_stmt
);
479 if (!flow_bb_inside_loop_p (loop
, def_bb
))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
492 bb
= gimple_bb (stmt
);
493 if (gimple_code (stmt
) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
495 && gimple_code (def_stmt
) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
497 && bb
->loop_father
== def_bb
->loop_father
)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE
, vect_location
,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
503 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
517 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE
, vect_location
,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
525 case vect_unused_in_scope
:
526 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
527 vect_used_in_scope
: vect_unused_in_scope
;
530 case vect_used_in_outer_by_reduction
:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
532 relevant
= vect_used_by_reduction
;
535 case vect_used_in_outer
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_in_scope
;
540 case vect_used_in_scope
:
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE
, vect_location
,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
563 case vect_unused_in_scope
:
564 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
566 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
569 case vect_used_by_reduction
:
570 case vect_used_only_live
:
571 relevant
= vect_used_in_outer_by_reduction
;
574 case vect_used_in_scope
:
575 relevant
= vect_used_in_outer
;
583 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
607 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
608 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
609 unsigned int nbbs
= loop
->num_nodes
;
610 gimple_stmt_iterator si
;
613 stmt_vec_info stmt_vinfo
;
617 enum vect_relevant relevant
;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE
, vect_location
,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec
<gimple
*, 64> worklist
;
625 /* 1. Init worklist. */
626 for (i
= 0; i
< nbbs
; i
++)
629 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
638 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
639 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
641 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
643 stmt
= gsi_stmt (si
);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
650 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
651 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
655 /* 2. Process_worklist */
656 while (worklist
.length () > 0)
661 stmt
= worklist
.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
671 stmt_vinfo
= vinfo_for_stmt (stmt
);
672 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
687 case vect_reduction_def
:
688 gcc_assert (relevant
!= vect_unused_in_scope
);
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_scope
691 && relevant
!= vect_used_by_reduction
692 && relevant
!= vect_used_only_live
)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
696 "unsupported use of reduction.\n");
701 case vect_nested_cycle
:
702 if (relevant
!= vect_unused_in_scope
703 && relevant
!= vect_used_in_outer_by_reduction
704 && relevant
!= vect_used_in_outer
)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
708 "unsupported use of nested cycle.\n");
714 case vect_double_reduction_def
:
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_by_reduction
717 && relevant
!= vect_used_only_live
)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
721 "unsupported use of double reduction.\n");
731 if (is_pattern_stmt_p (stmt_vinfo
))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt
))
738 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
739 tree op
= gimple_assign_rhs1 (stmt
);
742 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
744 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
745 relevant
, &worklist
, false)
746 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
747 relevant
, &worklist
, false))
751 for (; i
< gimple_num_ops (stmt
); i
++)
753 op
= gimple_op (stmt
, i
);
754 if (TREE_CODE (op
) == SSA_NAME
755 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
760 else if (is_gimple_call (stmt
))
762 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
764 tree arg
= gimple_call_arg (stmt
, i
);
765 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
772 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
774 tree op
= USE_FROM_PTR (use_p
);
775 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
782 gather_scatter_info gs_info
;
783 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
785 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
789 } /* while worklist */
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
802 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
803 enum vect_def_type
*dt
,
804 stmt_vector_for_cost
*prologue_cost_vec
,
805 stmt_vector_for_cost
*body_cost_vec
)
808 int inside_cost
= 0, prologue_cost
= 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info
))
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i
= 0; i
< 2; i
++)
816 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
817 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
818 stmt_info
, 0, vect_prologue
);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
822 stmt_info
, 0, vect_body
);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE
, vect_location
,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
838 enum vect_def_type
*dt
, int pwr
)
841 int inside_cost
= 0, prologue_cost
= 0;
842 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
843 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
844 void *target_cost_data
;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info
))
851 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
853 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
855 for (i
= 0; i
< pwr
+ 1; i
++)
857 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
859 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
860 vec_promote_demote
, stmt_info
, 0,
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i
= 0; i
< 2; i
++)
866 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
867 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
868 stmt_info
, 0, vect_prologue
);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE
, vect_location
,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
882 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
883 vect_memory_access_type memory_access_type
,
884 enum vect_def_type dt
, slp_tree slp_node
,
885 stmt_vector_for_cost
*prologue_cost_vec
,
886 stmt_vector_for_cost
*body_cost_vec
)
888 unsigned int inside_cost
= 0, prologue_cost
= 0;
889 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
890 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
891 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
893 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
894 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
895 stmt_info
, 0, vect_prologue
);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node
&& grouped_access_p
)
901 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
902 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
915 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
917 /* Uses a high and low interleave or shuffle operations for each
919 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
920 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
921 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
922 stmt_info
, 0, vect_body
);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE
, vect_location
,
926 "vect_model_store_cost: strided group_size = %d .\n",
930 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
931 /* Costs of the stores. */
932 if (memory_access_type
== VMAT_ELEMENTWISE
)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost
+= record_stmt_cost (body_cost_vec
,
935 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
936 scalar_store
, stmt_info
, 0, vect_body
);
938 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
940 if (memory_access_type
== VMAT_ELEMENTWISE
941 || memory_access_type
== VMAT_STRIDED_SLP
)
942 inside_cost
+= record_stmt_cost (body_cost_vec
,
943 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
944 vec_to_scalar
, stmt_info
, 0, vect_body
);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE
, vect_location
,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
953 /* Calculate cost of DR's memory access. */
955 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
956 unsigned int *inside_cost
,
957 stmt_vector_for_cost
*body_cost_vec
)
959 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
960 gimple
*stmt
= DR_STMT (dr
);
961 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
963 switch (alignment_support_scheme
)
967 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
968 vector_store
, stmt_info
, 0,
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: aligned.\n");
977 case dr_unaligned_supported
:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
981 unaligned_store
, stmt_info
,
982 DR_MISALIGNMENT (dr
), vect_body
);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_model_store_cost: unaligned supported by "
990 case dr_unaligned_unsupported
:
992 *inside_cost
= VECT_MAX_COST
;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
996 "vect_model_store_cost: unsupported access.\n");
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1014 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1015 vect_memory_access_type memory_access_type
,
1017 stmt_vector_for_cost
*prologue_cost_vec
,
1018 stmt_vector_for_cost
*body_cost_vec
)
1020 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1021 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1022 unsigned int inside_cost
= 0, prologue_cost
= 0;
1023 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node
&& grouped_access_p
)
1029 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1030 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1043 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1048 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1049 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1050 stmt_info
, 0, vect_body
);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE
, vect_location
,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1058 /* The loads themselves. */
1059 if (memory_access_type
== VMAT_ELEMENTWISE
)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1063 inside_cost
+= record_stmt_cost (body_cost_vec
,
1064 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1065 scalar_load
, stmt_info
, 0, vect_body
);
1068 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1069 &inside_cost
, &prologue_cost
,
1070 prologue_cost_vec
, body_cost_vec
, true);
1071 if (memory_access_type
== VMAT_ELEMENTWISE
1072 || memory_access_type
== VMAT_STRIDED_SLP
)
1073 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1074 stmt_info
, 0, vect_body
);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE
, vect_location
,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1083 /* Calculate cost of DR's memory access. */
1085 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1086 bool add_realign_cost
, unsigned int *inside_cost
,
1087 unsigned int *prologue_cost
,
1088 stmt_vector_for_cost
*prologue_cost_vec
,
1089 stmt_vector_for_cost
*body_cost_vec
,
1090 bool record_prologue_costs
)
1092 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1093 gimple
*stmt
= DR_STMT (dr
);
1094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1096 switch (alignment_support_scheme
)
1100 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1101 stmt_info
, 0, vect_body
);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE
, vect_location
,
1105 "vect_model_load_cost: aligned.\n");
1109 case dr_unaligned_supported
:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1113 unaligned_load
, stmt_info
,
1114 DR_MISALIGNMENT (dr
), vect_body
);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE
, vect_location
,
1118 "vect_model_load_cost: unaligned supported by "
1123 case dr_explicit_realign
:
1125 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1126 vector_load
, stmt_info
, 0, vect_body
);
1127 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1128 vec_perm
, stmt_info
, 0, vect_body
);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1133 if (targetm
.vectorize
.builtin_mask_for_load
)
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: explicit realign\n");
1143 case dr_explicit_realign_optimized
:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned software "
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost
&& record_prologue_costs
)
1159 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1160 vector_stmt
, stmt_info
,
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1164 vector_stmt
, stmt_info
,
1168 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1169 stmt_info
, 0, vect_body
);
1170 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1171 stmt_info
, 0, vect_body
);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE
, vect_location
,
1175 "vect_model_load_cost: explicit realign optimized"
1181 case dr_unaligned_unsupported
:
1183 *inside_cost
= VECT_MAX_COST
;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1187 "vect_model_load_cost: unsupported access.\n");
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1200 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1206 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1215 if (nested_in_vect_loop_p (loop
, stmt
))
1218 pe
= loop_preheader_edge (loop
);
1219 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1220 gcc_assert (!new_bb
);
1224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1226 gimple_stmt_iterator gsi_bb_start
;
1228 gcc_assert (bb_vinfo
);
1229 bb
= BB_VINFO_BB (bb_vinfo
);
1230 gsi_bb_start
= gsi_after_labels (bb
);
1231 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE
, vect_location
,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1262 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1263 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type
))
1269 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1270 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1272 if (CONSTANT_CLASS_P (val
))
1273 val
= integer_zerop (val
) ? false_val
: true_val
;
1276 new_temp
= make_ssa_name (TREE_TYPE (type
));
1277 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1278 val
, true_val
, false_val
);
1279 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1283 else if (CONSTANT_CLASS_P (val
))
1284 val
= fold_convert (TREE_TYPE (type
), val
);
1287 new_temp
= make_ssa_name (TREE_TYPE (type
));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1289 init_stmt
= gimple_build_assign (new_temp
,
1290 fold_build1 (VIEW_CONVERT_EXPR
,
1294 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1295 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1299 val
= build_vector_from_val (type
, val
);
1302 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1303 init_stmt
= gimple_build_assign (new_temp
, val
);
1304 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1314 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1318 stmt_vec_info def_stmt_info
= NULL
;
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def
:
1324 case vect_external_def
:
1325 /* Code should use vect_get_vec_def_for_operand. */
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def
:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1334 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1335 /* Get vectorized pattern statement. */
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1339 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1341 gcc_assert (vec_stmt
);
1342 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1343 vec_oprnd
= PHI_RESULT (vec_stmt
);
1344 else if (is_gimple_call (vec_stmt
))
1345 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1347 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def
:
1353 case vect_double_reduction_def
:
1354 case vect_nested_cycle
:
1355 /* Code should use get_initial_def_for_reduction. */
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def
:
1361 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1365 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1366 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1367 vec_oprnd
= PHI_RESULT (vec_stmt
);
1369 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1392 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1395 enum vect_def_type dt
;
1397 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1398 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1405 dump_printf (MSG_NOTE
, "\n");
1408 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1409 gcc_assert (is_simple_use
);
1410 if (def_stmt
&& dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1416 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1418 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1422 vector_type
= vectype
;
1423 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1425 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1427 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1429 gcc_assert (vector_type
);
1430 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1433 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1496 gimple
*vec_stmt_for_operand
;
1497 stmt_vec_info def_stmt_info
;
1499 /* Do nothing; can reuse same def. */
1500 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1503 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1504 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1505 gcc_assert (def_stmt_info
);
1506 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1507 gcc_assert (vec_stmt_for_operand
);
1508 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1509 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1521 vec
<tree
> *vec_oprnds0
,
1522 vec
<tree
> *vec_oprnds1
)
1524 tree vec_oprnd
= vec_oprnds0
->pop ();
1526 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1527 vec_oprnds0
->quick_push (vec_oprnd
);
1529 if (vec_oprnds1
&& vec_oprnds1
->length ())
1531 vec_oprnd
= vec_oprnds1
->pop ();
1532 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1533 vec_oprnds1
->quick_push (vec_oprnd
);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1543 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1544 vec
<tree
> *vec_oprnds0
,
1545 vec
<tree
> *vec_oprnds1
,
1546 slp_tree slp_node
, int reduc_index
)
1550 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1551 auto_vec
<tree
> ops (nops
);
1552 auto_vec
<vec
<tree
> > vec_defs (nops
);
1554 ops
.quick_push (op0
);
1556 ops
.quick_push (op1
);
1558 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1560 *vec_oprnds0
= vec_defs
[0];
1562 *vec_oprnds1
= vec_defs
[1];
1568 vec_oprnds0
->create (1);
1569 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1570 vec_oprnds0
->quick_push (vec_oprnd
);
1574 vec_oprnds1
->create (1);
1575 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1576 vec_oprnds1
->quick_push (vec_oprnd
);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1587 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1588 gimple_stmt_iterator
*gsi
)
1590 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1591 vec_info
*vinfo
= stmt_info
->vinfo
;
1593 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1595 if (!gsi_end_p (*gsi
)
1596 && gimple_has_mem_ops (vec_stmt
))
1598 gimple
*at_stmt
= gsi_stmt (*gsi
);
1599 tree vuse
= gimple_vuse (at_stmt
);
1600 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1602 tree vdef
= gimple_vdef (at_stmt
);
1603 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1609 && ((is_gimple_assign (vec_stmt
)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1611 || (is_gimple_call (vec_stmt
)
1612 && !(gimple_call_flags (vec_stmt
)
1613 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1615 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1616 gimple_set_vdef (vec_stmt
, new_vdef
);
1617 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1621 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1623 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1631 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1637 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1638 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1647 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1648 tree vectype_out
, tree vectype_in
)
1651 if (internal_fn_p (cfn
))
1652 ifn
= as_internal_fn (cfn
);
1654 ifn
= associated_internal_fn (fndecl
);
1655 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1657 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1658 if (info
.vectorizable
)
1660 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1661 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1662 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1663 OPTIMIZE_FOR_SPEED
))
1671 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1672 gimple_stmt_iterator
*);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1679 compare_step_with_zero (gimple
*stmt
)
1681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1682 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1684 if (loop_vinfo
&& nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), stmt
))
1685 step
= STMT_VINFO_DR_STEP (stmt_info
);
1687 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
1688 return tree_int_cst_compare (step
, size_zero_node
);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1695 perm_mask_for_reverse (tree vectype
)
1700 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1701 sel
= XALLOCAVEC (unsigned char, nunits
);
1703 for (i
= 0; i
< nunits
; ++i
)
1704 sel
[i
] = nunits
- 1 - i
;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
1708 return vect_gen_perm_mask_checked (vectype
, sel
);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1721 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1722 vec_load_store_type vls_type
,
1723 vect_memory_access_type
*memory_access_type
)
1725 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1726 vec_info
*vinfo
= stmt_info
->vinfo
;
1727 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1728 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1729 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1730 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1731 bool single_element_p
= (stmt
== first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info
));
1733 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1734 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p
= false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1753 if (STMT_VINFO_STRIDED_P (stmt_info
))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits
% group_size
== 0)
1759 *memory_access_type
= VMAT_STRIDED_SLP
;
1761 *memory_access_type
= VMAT_ELEMENTWISE
;
1765 overrun_p
= loop_vinfo
&& gap
!= 0;
1766 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1773 if (overrun_p
&& !can_overrun_p
)
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1777 "Peeling for outer loop is not supported\n");
1780 *memory_access_type
= VMAT_CONTIGUOUS
;
1785 /* We can always handle this case using elementwise accesses,
1786 but see if something more efficient is available. */
1787 *memory_access_type
= VMAT_ELEMENTWISE
;
1789 /* If there is a gap at the end of the group then these optimizations
1790 would access excess elements in the last iteration. */
1791 bool would_overrun_p
= (gap
!= 0);
1792 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1793 && (can_overrun_p
|| !would_overrun_p
)
1794 && compare_step_with_zero (stmt
) > 0)
1796 /* First try using LOAD/STORE_LANES. */
1797 if (vls_type
== VLS_LOAD
1798 ? vect_load_lanes_supported (vectype
, group_size
)
1799 : vect_store_lanes_supported (vectype
, group_size
))
1801 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1802 overrun_p
= would_overrun_p
;
1805 /* If that fails, try using permuting loads. */
1806 if (*memory_access_type
== VMAT_ELEMENTWISE
1807 && (vls_type
== VLS_LOAD
1808 ? vect_grouped_load_supported (vectype
, single_element_p
,
1810 : vect_grouped_store_supported (vectype
, group_size
)))
1812 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1813 overrun_p
= would_overrun_p
;
1818 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1820 /* STMT is the leader of the group. Check the operands of all the
1821 stmts of the group. */
1822 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1825 gcc_assert (gimple_assign_single_p (next_stmt
));
1826 tree op
= gimple_assign_rhs1 (next_stmt
);
1828 enum vect_def_type dt
;
1829 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1833 "use not simple.\n");
1836 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1842 gcc_assert (can_overrun_p
);
1843 if (dump_enabled_p ())
1844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1845 "Data access with gaps requires scalar "
1847 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1853 /* A subroutine of get_load_store_type, with a subset of the same
1854 arguments. Handle the case where STMT is a load or store that
1855 accesses consecutive elements with a negative step. */
1857 static vect_memory_access_type
1858 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1859 vec_load_store_type vls_type
,
1860 unsigned int ncopies
)
1862 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1863 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1864 dr_alignment_support alignment_support_scheme
;
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1870 "multiple types with negative step.\n");
1871 return VMAT_ELEMENTWISE
;
1874 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1875 if (alignment_support_scheme
!= dr_aligned
1876 && alignment_support_scheme
!= dr_unaligned_supported
)
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1880 "negative step but alignment required.\n");
1881 return VMAT_ELEMENTWISE
;
1884 if (vls_type
== VLS_STORE_INVARIANT
)
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_NOTE
, vect_location
,
1888 "negative step with invariant source;"
1889 " no permute needed.\n");
1890 return VMAT_CONTIGUOUS_DOWN
;
1893 if (!perm_mask_for_reverse (vectype
))
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1897 "negative step and reversing not supported.\n");
1898 return VMAT_ELEMENTWISE
;
1901 return VMAT_CONTIGUOUS_REVERSE
;
1904 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1905 if there is a memory access type that the vectorized form can use,
1906 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1907 or scatters, fill in GS_INFO accordingly.
1909 SLP says whether we're performing SLP rather than loop vectorization.
1910 VECTYPE is the vector type that the vectorized statements will use.
1911 NCOPIES is the number of vector statements that will be needed. */
1914 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1915 vec_load_store_type vls_type
, unsigned int ncopies
,
1916 vect_memory_access_type
*memory_access_type
,
1917 gather_scatter_info
*gs_info
)
1919 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1920 vec_info
*vinfo
= stmt_info
->vinfo
;
1921 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1922 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1924 *memory_access_type
= VMAT_GATHER_SCATTER
;
1926 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1928 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1929 &gs_info
->offset_dt
,
1930 &gs_info
->offset_vectype
))
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1934 "%s index use not simple.\n",
1935 vls_type
== VLS_LOAD
? "gather" : "scatter");
1939 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1941 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1942 memory_access_type
))
1945 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1948 *memory_access_type
= VMAT_ELEMENTWISE
;
1952 int cmp
= compare_step_with_zero (stmt
);
1954 *memory_access_type
= get_negative_load_store_type
1955 (stmt
, vectype
, vls_type
, ncopies
);
1958 gcc_assert (vls_type
== VLS_LOAD
);
1959 *memory_access_type
= VMAT_INVARIANT
;
1962 *memory_access_type
= VMAT_CONTIGUOUS
;
1965 /* FIXME: At the moment the cost model seems to underestimate the
1966 cost of using elementwise accesses. This check preserves the
1967 traditional behavior until that can be fixed. */
1968 if (*memory_access_type
== VMAT_ELEMENTWISE
1969 && !STMT_VINFO_STRIDED_P (stmt_info
))
1971 if (dump_enabled_p ())
1972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1973 "not falling back to elementwise accesses\n");
1979 /* Function vectorizable_mask_load_store.
1981 Check if STMT performs a conditional load or store that can be vectorized.
1982 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1983 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1984 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1987 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1988 gimple
**vec_stmt
, slp_tree slp_node
)
1990 tree vec_dest
= NULL
;
1991 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1992 stmt_vec_info prev_stmt_info
;
1993 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1994 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1995 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1996 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1997 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1998 tree rhs_vectype
= NULL_TREE
;
2003 tree dataref_ptr
= NULL_TREE
;
2005 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2009 gather_scatter_info gs_info
;
2010 vec_load_store_type vls_type
;
2013 enum vect_def_type dt
;
2015 if (slp_node
!= NULL
)
2018 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2019 gcc_assert (ncopies
>= 1);
2021 mask
= gimple_call_arg (stmt
, 2);
2023 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
2026 /* FORNOW. This restriction should be relaxed. */
2027 if (nested_in_vect_loop
&& ncopies
> 1)
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2031 "multiple types in nested loop.");
2035 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2038 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2042 if (!STMT_VINFO_DATA_REF (stmt_info
))
2045 elem_type
= TREE_TYPE (vectype
);
2047 if (TREE_CODE (mask
) != SSA_NAME
)
2050 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2054 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2056 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2057 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2060 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2062 tree rhs
= gimple_call_arg (stmt
, 3);
2063 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2065 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2066 vls_type
= VLS_STORE_INVARIANT
;
2068 vls_type
= VLS_STORE
;
2071 vls_type
= VLS_LOAD
;
2073 vect_memory_access_type memory_access_type
;
2074 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2075 &memory_access_type
, &gs_info
))
2078 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2080 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2082 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2083 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2085 if (dump_enabled_p ())
2086 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2087 "masked gather with integer mask not supported.");
2091 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2095 "unsupported access type for masked %s.\n",
2096 vls_type
== VLS_LOAD
? "load" : "store");
2099 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2100 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2101 TYPE_MODE (mask_vectype
),
2102 vls_type
== VLS_LOAD
)
2104 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2107 if (!vec_stmt
) /* transformation not required. */
2109 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2110 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2111 if (vls_type
== VLS_LOAD
)
2112 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2115 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2116 dt
, NULL
, NULL
, NULL
);
2119 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2123 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2125 tree vec_oprnd0
= NULL_TREE
, op
;
2126 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2127 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2128 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2129 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2130 tree mask_perm_mask
= NULL_TREE
;
2131 edge pe
= loop_preheader_edge (loop
);
2134 enum { NARROW
, NONE
, WIDEN
} modifier
;
2135 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2137 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2138 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2139 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2140 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2141 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2142 scaletype
= TREE_VALUE (arglist
);
2143 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2144 && types_compatible_p (srctype
, masktype
));
2146 if (nunits
== gather_off_nunits
)
2148 else if (nunits
== gather_off_nunits
/ 2)
2150 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
2153 for (i
= 0; i
< gather_off_nunits
; ++i
)
2154 sel
[i
] = i
| nunits
;
2156 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
2158 else if (nunits
== gather_off_nunits
* 2)
2160 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
2163 for (i
= 0; i
< nunits
; ++i
)
2164 sel
[i
] = i
< gather_off_nunits
2165 ? i
: i
+ nunits
- gather_off_nunits
;
2167 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
2169 for (i
= 0; i
< nunits
; ++i
)
2170 sel
[i
] = i
| gather_off_nunits
;
2171 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
2176 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2178 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2179 if (!is_gimple_min_invariant (ptr
))
2181 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2182 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2183 gcc_assert (!new_bb
);
2186 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2188 prev_stmt_info
= NULL
;
2189 for (j
= 0; j
< ncopies
; ++j
)
2191 if (modifier
== WIDEN
&& (j
& 1))
2192 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2193 perm_mask
, stmt
, gsi
);
2196 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2199 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2201 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2203 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2204 == TYPE_VECTOR_SUBPARTS (idxtype
));
2205 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2206 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2208 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2209 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2213 if (mask_perm_mask
&& (j
& 1))
2214 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2215 mask_perm_mask
, stmt
, gsi
);
2219 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2222 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2223 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2227 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2229 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2230 == TYPE_VECTOR_SUBPARTS (masktype
));
2231 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2232 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2234 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2235 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2241 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2244 if (!useless_type_conversion_p (vectype
, rettype
))
2246 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2247 == TYPE_VECTOR_SUBPARTS (rettype
));
2248 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2249 gimple_call_set_lhs (new_stmt
, op
);
2250 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2251 var
= make_ssa_name (vec_dest
);
2252 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2253 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2257 var
= make_ssa_name (vec_dest
, new_stmt
);
2258 gimple_call_set_lhs (new_stmt
, var
);
2261 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2263 if (modifier
== NARROW
)
2270 var
= permute_vec_elements (prev_res
, var
,
2271 perm_mask
, stmt
, gsi
);
2272 new_stmt
= SSA_NAME_DEF_STMT (var
);
2275 if (prev_stmt_info
== NULL
)
2276 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2278 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2279 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2282 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2284 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2286 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2287 stmt_info
= vinfo_for_stmt (stmt
);
2289 tree lhs
= gimple_call_lhs (stmt
);
2290 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2291 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2292 set_vinfo_for_stmt (stmt
, NULL
);
2293 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2294 gsi_replace (gsi
, new_stmt
, true);
2297 else if (vls_type
!= VLS_LOAD
)
2299 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2300 prev_stmt_info
= NULL
;
2301 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2302 for (i
= 0; i
< ncopies
; i
++)
2304 unsigned align
, misalign
;
2308 tree rhs
= gimple_call_arg (stmt
, 3);
2309 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2310 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2311 /* We should have catched mismatched types earlier. */
2312 gcc_assert (useless_type_conversion_p (vectype
,
2313 TREE_TYPE (vec_rhs
)));
2314 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2315 NULL_TREE
, &dummy
, gsi
,
2316 &ptr_incr
, false, &inv_p
);
2317 gcc_assert (!inv_p
);
2321 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2322 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2323 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2324 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2325 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2326 TYPE_SIZE_UNIT (vectype
));
2329 align
= TYPE_ALIGN_UNIT (vectype
);
2330 if (aligned_access_p (dr
))
2332 else if (DR_MISALIGNMENT (dr
) == -1)
2334 align
= TYPE_ALIGN_UNIT (elem_type
);
2338 misalign
= DR_MISALIGNMENT (dr
);
2339 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2341 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2342 misalign
? least_bit_hwi (misalign
) : align
);
2344 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2345 ptr
, vec_mask
, vec_rhs
);
2346 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2348 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2350 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2351 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2356 tree vec_mask
= NULL_TREE
;
2357 prev_stmt_info
= NULL
;
2358 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2359 for (i
= 0; i
< ncopies
; i
++)
2361 unsigned align
, misalign
;
2365 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2366 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2367 NULL_TREE
, &dummy
, gsi
,
2368 &ptr_incr
, false, &inv_p
);
2369 gcc_assert (!inv_p
);
2373 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2374 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2375 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2376 TYPE_SIZE_UNIT (vectype
));
2379 align
= TYPE_ALIGN_UNIT (vectype
);
2380 if (aligned_access_p (dr
))
2382 else if (DR_MISALIGNMENT (dr
) == -1)
2384 align
= TYPE_ALIGN_UNIT (elem_type
);
2388 misalign
= DR_MISALIGNMENT (dr
);
2389 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2391 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2392 misalign
? least_bit_hwi (misalign
) : align
);
2394 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2396 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2397 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2399 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2401 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2402 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2406 if (vls_type
== VLS_LOAD
)
2408 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2410 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2412 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2413 stmt_info
= vinfo_for_stmt (stmt
);
2415 tree lhs
= gimple_call_lhs (stmt
);
2416 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2417 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2418 set_vinfo_for_stmt (stmt
, NULL
);
2419 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2420 gsi_replace (gsi
, new_stmt
, true);
2426 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2427 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2428 in a single step. On success, store the binary pack code in
2432 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2433 tree_code
*convert_code
)
2435 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2436 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2440 int multi_step_cvt
= 0;
2441 auto_vec
<tree
, 8> interm_types
;
2442 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2443 &code
, &multi_step_cvt
,
2448 *convert_code
= code
;
2452 /* Function vectorizable_call.
2454 Check if GS performs a function call that can be vectorized.
2455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2456 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2460 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2467 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2468 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2469 tree vectype_out
, vectype_in
;
2472 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2473 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2474 vec_info
*vinfo
= stmt_info
->vinfo
;
2475 tree fndecl
, new_temp
, rhs_type
;
2477 enum vect_def_type dt
[3]
2478 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2479 gimple
*new_stmt
= NULL
;
2481 vec
<tree
> vargs
= vNULL
;
2482 enum { NARROW
, NONE
, WIDEN
} modifier
;
2486 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2489 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2493 /* Is GS a vectorizable call? */
2494 stmt
= dyn_cast
<gcall
*> (gs
);
2498 if (gimple_call_internal_p (stmt
)
2499 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2500 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2501 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2504 if (gimple_call_lhs (stmt
) == NULL_TREE
2505 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2508 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2510 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2512 /* Process function arguments. */
2513 rhs_type
= NULL_TREE
;
2514 vectype_in
= NULL_TREE
;
2515 nargs
= gimple_call_num_args (stmt
);
2517 /* Bail out if the function has more than three arguments, we do not have
2518 interesting builtin functions to vectorize with more than two arguments
2519 except for fma. No arguments is also not good. */
2520 if (nargs
== 0 || nargs
> 3)
2523 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2524 if (gimple_call_internal_p (stmt
)
2525 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2528 rhs_type
= unsigned_type_node
;
2531 for (i
= 0; i
< nargs
; i
++)
2535 op
= gimple_call_arg (stmt
, i
);
2537 /* We can only handle calls with arguments of the same type. */
2539 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2541 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2543 "argument types differ.\n");
2547 rhs_type
= TREE_TYPE (op
);
2549 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2553 "use not simple.\n");
2558 vectype_in
= opvectype
;
2560 && opvectype
!= vectype_in
)
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2564 "argument vector types differ.\n");
2568 /* If all arguments are external or constant defs use a vector type with
2569 the same size as the output vector type. */
2571 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2573 gcc_assert (vectype_in
);
2576 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2579 "no vectype for scalar type ");
2580 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2581 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2588 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2589 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2590 if (nunits_in
== nunits_out
/ 2)
2592 else if (nunits_out
== nunits_in
)
2594 else if (nunits_out
== nunits_in
/ 2)
2599 /* We only handle functions that do not read or clobber memory. */
2600 if (gimple_vuse (stmt
))
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2604 "function reads from or writes to memory.\n");
2608 /* For now, we only vectorize functions if a target specific builtin
2609 is available. TODO -- in some cases, it might be profitable to
2610 insert the calls for pieces of the vector, in order to be able
2611 to vectorize other operations in the loop. */
2613 internal_fn ifn
= IFN_LAST
;
2614 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2615 tree callee
= gimple_call_fndecl (stmt
);
2617 /* First try using an internal function. */
2618 tree_code convert_code
= ERROR_MARK
;
2620 && (modifier
== NONE
2621 || (modifier
== NARROW
2622 && simple_integer_narrowing (vectype_out
, vectype_in
,
2624 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2627 /* If that fails, try asking for a target-specific built-in function. */
2628 if (ifn
== IFN_LAST
)
2630 if (cfn
!= CFN_LAST
)
2631 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2632 (cfn
, vectype_out
, vectype_in
);
2634 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2635 (callee
, vectype_out
, vectype_in
);
2638 if (ifn
== IFN_LAST
&& !fndecl
)
2640 if (cfn
== CFN_GOMP_SIMD_LANE
2643 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2644 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2645 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2646 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2648 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2649 { 0, 1, 2, ... vf - 1 } vector. */
2650 gcc_assert (nargs
== 0);
2654 if (dump_enabled_p ())
2655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2656 "function is not vectorizable.\n");
2663 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2664 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2666 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2668 /* Sanity check: make sure that at least one copy of the vectorized stmt
2669 needs to be generated. */
2670 gcc_assert (ncopies
>= 1);
2672 if (!vec_stmt
) /* transformation not required. */
2674 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2675 if (dump_enabled_p ())
2676 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2678 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2679 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2680 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2681 vec_promote_demote
, stmt_info
, 0, vect_body
);
2688 if (dump_enabled_p ())
2689 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2692 scalar_dest
= gimple_call_lhs (stmt
);
2693 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2695 prev_stmt_info
= NULL
;
2696 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2698 tree prev_res
= NULL_TREE
;
2699 for (j
= 0; j
< ncopies
; ++j
)
2701 /* Build argument list for the vectorized call. */
2703 vargs
.create (nargs
);
2709 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2710 vec
<tree
> vec_oprnds0
;
2712 for (i
= 0; i
< nargs
; i
++)
2713 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2714 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2715 vec_oprnds0
= vec_defs
[0];
2717 /* Arguments are ready. Create the new vector stmt. */
2718 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2721 for (k
= 0; k
< nargs
; k
++)
2723 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2724 vargs
[k
] = vec_oprndsk
[i
];
2726 if (modifier
== NARROW
)
2728 tree half_res
= make_ssa_name (vectype_in
);
2729 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2730 gimple_call_set_lhs (new_stmt
, half_res
);
2731 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2734 prev_res
= half_res
;
2737 new_temp
= make_ssa_name (vec_dest
);
2738 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2739 prev_res
, half_res
);
2743 if (ifn
!= IFN_LAST
)
2744 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2746 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2747 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2748 gimple_call_set_lhs (new_stmt
, new_temp
);
2750 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2751 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2754 for (i
= 0; i
< nargs
; i
++)
2756 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2757 vec_oprndsi
.release ();
2762 for (i
= 0; i
< nargs
; i
++)
2764 op
= gimple_call_arg (stmt
, i
);
2767 = vect_get_vec_def_for_operand (op
, stmt
);
2770 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2772 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2775 vargs
.quick_push (vec_oprnd0
);
2778 if (gimple_call_internal_p (stmt
)
2779 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2781 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2783 for (k
= 0; k
< nunits_out
; ++k
)
2784 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2785 tree cst
= build_vector (vectype_out
, v
);
2787 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2788 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2789 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2790 new_temp
= make_ssa_name (vec_dest
);
2791 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2793 else if (modifier
== NARROW
)
2795 tree half_res
= make_ssa_name (vectype_in
);
2796 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2797 gimple_call_set_lhs (new_stmt
, half_res
);
2798 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2801 prev_res
= half_res
;
2804 new_temp
= make_ssa_name (vec_dest
);
2805 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2806 prev_res
, half_res
);
2810 if (ifn
!= IFN_LAST
)
2811 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2813 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2814 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2815 gimple_call_set_lhs (new_stmt
, new_temp
);
2817 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2819 if (j
== (modifier
== NARROW
? 1 : 0))
2820 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2822 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2824 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2827 else if (modifier
== NARROW
)
2829 for (j
= 0; j
< ncopies
; ++j
)
2831 /* Build argument list for the vectorized call. */
2833 vargs
.create (nargs
* 2);
2839 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2840 vec
<tree
> vec_oprnds0
;
2842 for (i
= 0; i
< nargs
; i
++)
2843 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2844 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2845 vec_oprnds0
= vec_defs
[0];
2847 /* Arguments are ready. Create the new vector stmt. */
2848 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2852 for (k
= 0; k
< nargs
; k
++)
2854 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2855 vargs
.quick_push (vec_oprndsk
[i
]);
2856 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2858 if (ifn
!= IFN_LAST
)
2859 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2861 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2862 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2863 gimple_call_set_lhs (new_stmt
, new_temp
);
2864 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2865 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2868 for (i
= 0; i
< nargs
; i
++)
2870 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2871 vec_oprndsi
.release ();
2876 for (i
= 0; i
< nargs
; i
++)
2878 op
= gimple_call_arg (stmt
, i
);
2882 = vect_get_vec_def_for_operand (op
, stmt
);
2884 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2888 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2890 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2892 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2895 vargs
.quick_push (vec_oprnd0
);
2896 vargs
.quick_push (vec_oprnd1
);
2899 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2900 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2901 gimple_call_set_lhs (new_stmt
, new_temp
);
2902 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2905 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2907 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2909 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2912 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2915 /* No current target implements this case. */
2920 /* The call in STMT might prevent it from being removed in dce.
2921 We however cannot remove it here, due to the way the ssa name
2922 it defines is mapped to the new definition. So just replace
2923 rhs of the statement with something harmless. */
2928 type
= TREE_TYPE (scalar_dest
);
2929 if (is_pattern_stmt_p (stmt_info
))
2930 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2932 lhs
= gimple_call_lhs (stmt
);
2934 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2935 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2936 set_vinfo_for_stmt (stmt
, NULL
);
2937 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2938 gsi_replace (gsi
, new_stmt
, false);
2944 struct simd_call_arg_info
2948 enum vect_def_type dt
;
2949 HOST_WIDE_INT linear_step
;
2951 bool simd_lane_linear
;
2954 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2955 is linear within simd lane (but not within whole loop), note it in
2959 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2960 struct simd_call_arg_info
*arginfo
)
2962 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2964 if (!is_gimple_assign (def_stmt
)
2965 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2966 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2969 tree base
= gimple_assign_rhs1 (def_stmt
);
2970 HOST_WIDE_INT linear_step
= 0;
2971 tree v
= gimple_assign_rhs2 (def_stmt
);
2972 while (TREE_CODE (v
) == SSA_NAME
)
2975 def_stmt
= SSA_NAME_DEF_STMT (v
);
2976 if (is_gimple_assign (def_stmt
))
2977 switch (gimple_assign_rhs_code (def_stmt
))
2980 t
= gimple_assign_rhs2 (def_stmt
);
2981 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2983 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2984 v
= gimple_assign_rhs1 (def_stmt
);
2987 t
= gimple_assign_rhs2 (def_stmt
);
2988 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2990 linear_step
= tree_to_shwi (t
);
2991 v
= gimple_assign_rhs1 (def_stmt
);
2994 t
= gimple_assign_rhs1 (def_stmt
);
2995 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2996 || (TYPE_PRECISION (TREE_TYPE (v
))
2997 < TYPE_PRECISION (TREE_TYPE (t
))))
3006 else if (is_gimple_call (def_stmt
)
3007 && gimple_call_internal_p (def_stmt
)
3008 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
3010 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3011 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3016 arginfo
->linear_step
= linear_step
;
3018 arginfo
->simd_lane_linear
= true;
3024 /* Function vectorizable_simd_clone_call.
3026 Check if STMT performs a function call that can be vectorized
3027 by calling a simd clone of the function.
3028 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3029 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3030 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3033 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3034 gimple
**vec_stmt
, slp_tree slp_node
)
3039 tree vec_oprnd0
= NULL_TREE
;
3040 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3042 unsigned int nunits
;
3043 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3044 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3045 vec_info
*vinfo
= stmt_info
->vinfo
;
3046 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3047 tree fndecl
, new_temp
;
3049 gimple
*new_stmt
= NULL
;
3051 auto_vec
<simd_call_arg_info
> arginfo
;
3052 vec
<tree
> vargs
= vNULL
;
3054 tree lhs
, rtype
, ratype
;
3055 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
3057 /* Is STMT a vectorizable call? */
3058 if (!is_gimple_call (stmt
))
3061 fndecl
= gimple_call_fndecl (stmt
);
3062 if (fndecl
== NULL_TREE
)
3065 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3066 if (node
== NULL
|| node
->simd_clones
== NULL
)
3069 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3072 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3076 if (gimple_call_lhs (stmt
)
3077 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3080 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3082 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3084 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3091 /* Process function arguments. */
3092 nargs
= gimple_call_num_args (stmt
);
3094 /* Bail out if the function has zero arguments. */
3098 arginfo
.reserve (nargs
, true);
3100 for (i
= 0; i
< nargs
; i
++)
3102 simd_call_arg_info thisarginfo
;
3105 thisarginfo
.linear_step
= 0;
3106 thisarginfo
.align
= 0;
3107 thisarginfo
.op
= NULL_TREE
;
3108 thisarginfo
.simd_lane_linear
= false;
3110 op
= gimple_call_arg (stmt
, i
);
3111 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3112 &thisarginfo
.vectype
)
3113 || thisarginfo
.dt
== vect_uninitialized_def
)
3115 if (dump_enabled_p ())
3116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3117 "use not simple.\n");
3121 if (thisarginfo
.dt
== vect_constant_def
3122 || thisarginfo
.dt
== vect_external_def
)
3123 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3125 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3127 /* For linear arguments, the analyze phase should have saved
3128 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3129 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3130 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3132 gcc_assert (vec_stmt
);
3133 thisarginfo
.linear_step
3134 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3136 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3137 thisarginfo
.simd_lane_linear
3138 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3139 == boolean_true_node
);
3140 /* If loop has been peeled for alignment, we need to adjust it. */
3141 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3142 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3143 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3145 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3146 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3147 tree opt
= TREE_TYPE (thisarginfo
.op
);
3148 bias
= fold_convert (TREE_TYPE (step
), bias
);
3149 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3151 = fold_build2 (POINTER_TYPE_P (opt
)
3152 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3153 thisarginfo
.op
, bias
);
3157 && thisarginfo
.dt
!= vect_constant_def
3158 && thisarginfo
.dt
!= vect_external_def
3160 && TREE_CODE (op
) == SSA_NAME
3161 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3163 && tree_fits_shwi_p (iv
.step
))
3165 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3166 thisarginfo
.op
= iv
.base
;
3168 else if ((thisarginfo
.dt
== vect_constant_def
3169 || thisarginfo
.dt
== vect_external_def
)
3170 && POINTER_TYPE_P (TREE_TYPE (op
)))
3171 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3172 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3174 if (POINTER_TYPE_P (TREE_TYPE (op
))
3175 && !thisarginfo
.linear_step
3177 && thisarginfo
.dt
!= vect_constant_def
3178 && thisarginfo
.dt
!= vect_external_def
3181 && TREE_CODE (op
) == SSA_NAME
)
3182 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3184 arginfo
.quick_push (thisarginfo
);
3187 unsigned int badness
= 0;
3188 struct cgraph_node
*bestn
= NULL
;
3189 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3190 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3192 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3193 n
= n
->simdclone
->next_clone
)
3195 unsigned int this_badness
= 0;
3196 if (n
->simdclone
->simdlen
3197 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3198 || n
->simdclone
->nargs
!= nargs
)
3200 if (n
->simdclone
->simdlen
3201 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3202 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3203 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3204 if (n
->simdclone
->inbranch
)
3205 this_badness
+= 2048;
3206 int target_badness
= targetm
.simd_clone
.usable (n
);
3207 if (target_badness
< 0)
3209 this_badness
+= target_badness
* 512;
3210 /* FORNOW: Have to add code to add the mask argument. */
3211 if (n
->simdclone
->inbranch
)
3213 for (i
= 0; i
< nargs
; i
++)
3215 switch (n
->simdclone
->args
[i
].arg_type
)
3217 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3218 if (!useless_type_conversion_p
3219 (n
->simdclone
->args
[i
].orig_type
,
3220 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3222 else if (arginfo
[i
].dt
== vect_constant_def
3223 || arginfo
[i
].dt
== vect_external_def
3224 || arginfo
[i
].linear_step
)
3227 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3228 if (arginfo
[i
].dt
!= vect_constant_def
3229 && arginfo
[i
].dt
!= vect_external_def
)
3232 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3234 if (arginfo
[i
].dt
== vect_constant_def
3235 || arginfo
[i
].dt
== vect_external_def
3236 || (arginfo
[i
].linear_step
3237 != n
->simdclone
->args
[i
].linear_step
))
3240 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3241 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3242 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3243 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3244 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3245 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3249 case SIMD_CLONE_ARG_TYPE_MASK
:
3252 if (i
== (size_t) -1)
3254 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3259 if (arginfo
[i
].align
)
3260 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3261 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3263 if (i
== (size_t) -1)
3265 if (bestn
== NULL
|| this_badness
< badness
)
3268 badness
= this_badness
;
3275 for (i
= 0; i
< nargs
; i
++)
3276 if ((arginfo
[i
].dt
== vect_constant_def
3277 || arginfo
[i
].dt
== vect_external_def
)
3278 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3281 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3283 if (arginfo
[i
].vectype
== NULL
3284 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3285 > bestn
->simdclone
->simdlen
))
3289 fndecl
= bestn
->decl
;
3290 nunits
= bestn
->simdclone
->simdlen
;
3291 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3293 /* If the function isn't const, only allow it in simd loops where user
3294 has asserted that at least nunits consecutive iterations can be
3295 performed using SIMD instructions. */
3296 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3297 && gimple_vuse (stmt
))
3300 /* Sanity check: make sure that at least one copy of the vectorized stmt
3301 needs to be generated. */
3302 gcc_assert (ncopies
>= 1);
3304 if (!vec_stmt
) /* transformation not required. */
3306 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3307 for (i
= 0; i
< nargs
; i
++)
3308 if ((bestn
->simdclone
->args
[i
].arg_type
3309 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3310 || (bestn
->simdclone
->args
[i
].arg_type
3311 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3313 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3315 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3316 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3317 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3318 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3319 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3320 tree sll
= arginfo
[i
].simd_lane_linear
3321 ? boolean_true_node
: boolean_false_node
;
3322 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3324 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3325 if (dump_enabled_p ())
3326 dump_printf_loc (MSG_NOTE
, vect_location
,
3327 "=== vectorizable_simd_clone_call ===\n");
3328 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3338 scalar_dest
= gimple_call_lhs (stmt
);
3339 vec_dest
= NULL_TREE
;
3344 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3345 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3346 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3349 rtype
= TREE_TYPE (ratype
);
3353 prev_stmt_info
= NULL
;
3354 for (j
= 0; j
< ncopies
; ++j
)
3356 /* Build argument list for the vectorized call. */
3358 vargs
.create (nargs
);
3362 for (i
= 0; i
< nargs
; i
++)
3364 unsigned int k
, l
, m
, o
;
3366 op
= gimple_call_arg (stmt
, i
);
3367 switch (bestn
->simdclone
->args
[i
].arg_type
)
3369 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3370 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3371 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3372 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3374 if (TYPE_VECTOR_SUBPARTS (atype
)
3375 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3377 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3378 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3379 / TYPE_VECTOR_SUBPARTS (atype
));
3380 gcc_assert ((k
& (k
- 1)) == 0);
3383 = vect_get_vec_def_for_operand (op
, stmt
);
3386 vec_oprnd0
= arginfo
[i
].op
;
3387 if ((m
& (k
- 1)) == 0)
3389 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3392 arginfo
[i
].op
= vec_oprnd0
;
3394 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3396 bitsize_int ((m
& (k
- 1)) * prec
));
3398 = gimple_build_assign (make_ssa_name (atype
),
3400 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3401 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3405 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3406 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3407 gcc_assert ((k
& (k
- 1)) == 0);
3408 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3410 vec_alloc (ctor_elts
, k
);
3413 for (l
= 0; l
< k
; l
++)
3415 if (m
== 0 && l
== 0)
3417 = vect_get_vec_def_for_operand (op
, stmt
);
3420 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3422 arginfo
[i
].op
= vec_oprnd0
;
3425 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3429 vargs
.safe_push (vec_oprnd0
);
3432 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3434 = gimple_build_assign (make_ssa_name (atype
),
3436 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3437 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3442 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3443 vargs
.safe_push (op
);
3445 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3446 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3451 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3456 edge pe
= loop_preheader_edge (loop
);
3457 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3458 gcc_assert (!new_bb
);
3460 if (arginfo
[i
].simd_lane_linear
)
3462 vargs
.safe_push (arginfo
[i
].op
);
3465 tree phi_res
= copy_ssa_name (op
);
3466 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3467 set_vinfo_for_stmt (new_phi
,
3468 new_stmt_vec_info (new_phi
, loop_vinfo
));
3469 add_phi_arg (new_phi
, arginfo
[i
].op
,
3470 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3472 = POINTER_TYPE_P (TREE_TYPE (op
))
3473 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3474 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3475 ? sizetype
: TREE_TYPE (op
);
3477 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3479 tree tcst
= wide_int_to_tree (type
, cst
);
3480 tree phi_arg
= copy_ssa_name (op
);
3482 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3483 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3484 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3485 set_vinfo_for_stmt (new_stmt
,
3486 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3487 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3489 arginfo
[i
].op
= phi_res
;
3490 vargs
.safe_push (phi_res
);
3495 = POINTER_TYPE_P (TREE_TYPE (op
))
3496 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3497 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3498 ? sizetype
: TREE_TYPE (op
);
3500 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3502 tree tcst
= wide_int_to_tree (type
, cst
);
3503 new_temp
= make_ssa_name (TREE_TYPE (op
));
3504 new_stmt
= gimple_build_assign (new_temp
, code
,
3505 arginfo
[i
].op
, tcst
);
3506 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3507 vargs
.safe_push (new_temp
);
3510 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3511 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3512 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3513 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3514 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3515 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3521 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3524 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3526 new_temp
= create_tmp_var (ratype
);
3527 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3528 == TYPE_VECTOR_SUBPARTS (rtype
))
3529 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3531 new_temp
= make_ssa_name (rtype
, new_stmt
);
3532 gimple_call_set_lhs (new_stmt
, new_temp
);
3534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3538 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3541 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3542 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3543 gcc_assert ((k
& (k
- 1)) == 0);
3544 for (l
= 0; l
< k
; l
++)
3549 t
= build_fold_addr_expr (new_temp
);
3550 t
= build2 (MEM_REF
, vectype
, t
,
3551 build_int_cst (TREE_TYPE (t
),
3552 l
* prec
/ BITS_PER_UNIT
));
3555 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3556 size_int (prec
), bitsize_int (l
* prec
));
3558 = gimple_build_assign (make_ssa_name (vectype
), t
);
3559 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3560 if (j
== 0 && l
== 0)
3561 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3563 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3565 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3570 tree clobber
= build_constructor (ratype
, NULL
);
3571 TREE_THIS_VOLATILE (clobber
) = 1;
3572 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3573 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3577 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3579 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3580 / TYPE_VECTOR_SUBPARTS (rtype
));
3581 gcc_assert ((k
& (k
- 1)) == 0);
3582 if ((j
& (k
- 1)) == 0)
3583 vec_alloc (ret_ctor_elts
, k
);
3586 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3587 for (m
= 0; m
< o
; m
++)
3589 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3590 size_int (m
), NULL_TREE
, NULL_TREE
);
3592 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3593 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3594 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3595 gimple_assign_lhs (new_stmt
));
3597 tree clobber
= build_constructor (ratype
, NULL
);
3598 TREE_THIS_VOLATILE (clobber
) = 1;
3599 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3600 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3603 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3604 if ((j
& (k
- 1)) != k
- 1)
3606 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3608 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3609 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3611 if ((unsigned) j
== k
- 1)
3612 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3616 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3621 tree t
= build_fold_addr_expr (new_temp
);
3622 t
= build2 (MEM_REF
, vectype
, t
,
3623 build_int_cst (TREE_TYPE (t
), 0));
3625 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3626 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3627 tree clobber
= build_constructor (ratype
, NULL
);
3628 TREE_THIS_VOLATILE (clobber
) = 1;
3629 vect_finish_stmt_generation (stmt
,
3630 gimple_build_assign (new_temp
,
3636 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3638 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3640 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3645 /* The call in STMT might prevent it from being removed in dce.
3646 We however cannot remove it here, due to the way the ssa name
3647 it defines is mapped to the new definition. So just replace
3648 rhs of the statement with something harmless. */
3655 type
= TREE_TYPE (scalar_dest
);
3656 if (is_pattern_stmt_p (stmt_info
))
3657 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3659 lhs
= gimple_call_lhs (stmt
);
3660 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3663 new_stmt
= gimple_build_nop ();
3664 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3665 set_vinfo_for_stmt (stmt
, NULL
);
3666 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3667 gsi_replace (gsi
, new_stmt
, true);
3668 unlink_stmt_vdef (stmt
);
3674 /* Function vect_gen_widened_results_half
3676 Create a vector stmt whose code, type, number of arguments, and result
3677 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3678 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3679 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3680 needs to be created (DECL is a function-decl of a target-builtin).
3681 STMT is the original scalar stmt that we are vectorizing. */
3684 vect_gen_widened_results_half (enum tree_code code
,
3686 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3687 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3693 /* Generate half of the widened result: */
3694 if (code
== CALL_EXPR
)
3696 /* Target specific support */
3697 if (op_type
== binary_op
)
3698 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3700 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3701 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3702 gimple_call_set_lhs (new_stmt
, new_temp
);
3706 /* Generic support */
3707 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3708 if (op_type
!= binary_op
)
3710 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3711 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3712 gimple_assign_set_lhs (new_stmt
, new_temp
);
3714 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3720 /* Get vectorized definitions for loop-based vectorization. For the first
3721 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3722 scalar operand), and for the rest we get a copy with
3723 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3724 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3725 The vectors are collected into VEC_OPRNDS. */
3728 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3729 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3733 /* Get first vector operand. */
3734 /* All the vector operands except the very first one (that is scalar oprnd)
3736 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3737 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3739 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3741 vec_oprnds
->quick_push (vec_oprnd
);
3743 /* Get second vector operand. */
3744 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3745 vec_oprnds
->quick_push (vec_oprnd
);
3749 /* For conversion in multiple steps, continue to get operands
3752 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3756 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3757 For multi-step conversions store the resulting vectors and call the function
3761 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3762 int multi_step_cvt
, gimple
*stmt
,
3764 gimple_stmt_iterator
*gsi
,
3765 slp_tree slp_node
, enum tree_code code
,
3766 stmt_vec_info
*prev_stmt_info
)
3769 tree vop0
, vop1
, new_tmp
, vec_dest
;
3771 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3773 vec_dest
= vec_dsts
.pop ();
3775 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3777 /* Create demotion operation. */
3778 vop0
= (*vec_oprnds
)[i
];
3779 vop1
= (*vec_oprnds
)[i
+ 1];
3780 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3781 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3782 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3783 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3786 /* Store the resulting vector for next recursive call. */
3787 (*vec_oprnds
)[i
/2] = new_tmp
;
3790 /* This is the last step of the conversion sequence. Store the
3791 vectors in SLP_NODE or in vector info of the scalar statement
3792 (or in STMT_VINFO_RELATED_STMT chain). */
3794 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3797 if (!*prev_stmt_info
)
3798 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3800 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3802 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3807 /* For multi-step demotion operations we first generate demotion operations
3808 from the source type to the intermediate types, and then combine the
3809 results (stored in VEC_OPRNDS) in demotion operation to the destination
3813 /* At each level of recursion we have half of the operands we had at the
3815 vec_oprnds
->truncate ((i
+1)/2);
3816 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3817 stmt
, vec_dsts
, gsi
, slp_node
,
3818 VEC_PACK_TRUNC_EXPR
,
3822 vec_dsts
.quick_push (vec_dest
);
3826 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3827 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3828 the resulting vectors and call the function recursively. */
3831 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3832 vec
<tree
> *vec_oprnds1
,
3833 gimple
*stmt
, tree vec_dest
,
3834 gimple_stmt_iterator
*gsi
,
3835 enum tree_code code1
,
3836 enum tree_code code2
, tree decl1
,
3837 tree decl2
, int op_type
)
3840 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3841 gimple
*new_stmt1
, *new_stmt2
;
3842 vec
<tree
> vec_tmp
= vNULL
;
3844 vec_tmp
.create (vec_oprnds0
->length () * 2);
3845 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3847 if (op_type
== binary_op
)
3848 vop1
= (*vec_oprnds1
)[i
];
3852 /* Generate the two halves of promotion operation. */
3853 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3854 op_type
, vec_dest
, gsi
, stmt
);
3855 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3856 op_type
, vec_dest
, gsi
, stmt
);
3857 if (is_gimple_call (new_stmt1
))
3859 new_tmp1
= gimple_call_lhs (new_stmt1
);
3860 new_tmp2
= gimple_call_lhs (new_stmt2
);
3864 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3865 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3868 /* Store the results for the next step. */
3869 vec_tmp
.quick_push (new_tmp1
);
3870 vec_tmp
.quick_push (new_tmp2
);
3873 vec_oprnds0
->release ();
3874 *vec_oprnds0
= vec_tmp
;
3878 /* Check if STMT performs a conversion operation, that can be vectorized.
3879 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3880 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3881 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3884 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3885 gimple
**vec_stmt
, slp_tree slp_node
)
3889 tree op0
, op1
= NULL_TREE
;
3890 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3891 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3892 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3893 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3894 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3895 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3898 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3899 gimple
*new_stmt
= NULL
;
3900 stmt_vec_info prev_stmt_info
;
3903 tree vectype_out
, vectype_in
;
3905 tree lhs_type
, rhs_type
;
3906 enum { NARROW
, NONE
, WIDEN
} modifier
;
3907 vec
<tree
> vec_oprnds0
= vNULL
;
3908 vec
<tree
> vec_oprnds1
= vNULL
;
3910 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3911 vec_info
*vinfo
= stmt_info
->vinfo
;
3912 int multi_step_cvt
= 0;
3913 vec
<tree
> interm_types
= vNULL
;
3914 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3916 machine_mode rhs_mode
;
3917 unsigned short fltsz
;
3919 /* Is STMT a vectorizable conversion? */
3921 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3924 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3928 if (!is_gimple_assign (stmt
))
3931 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3934 code
= gimple_assign_rhs_code (stmt
);
3935 if (!CONVERT_EXPR_CODE_P (code
)
3936 && code
!= FIX_TRUNC_EXPR
3937 && code
!= FLOAT_EXPR
3938 && code
!= WIDEN_MULT_EXPR
3939 && code
!= WIDEN_LSHIFT_EXPR
)
3942 op_type
= TREE_CODE_LENGTH (code
);
3944 /* Check types of lhs and rhs. */
3945 scalar_dest
= gimple_assign_lhs (stmt
);
3946 lhs_type
= TREE_TYPE (scalar_dest
);
3947 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3949 op0
= gimple_assign_rhs1 (stmt
);
3950 rhs_type
= TREE_TYPE (op0
);
3952 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3953 && !((INTEGRAL_TYPE_P (lhs_type
)
3954 && INTEGRAL_TYPE_P (rhs_type
))
3955 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3956 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3959 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3960 && ((INTEGRAL_TYPE_P (lhs_type
)
3961 && (TYPE_PRECISION (lhs_type
)
3962 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3963 || (INTEGRAL_TYPE_P (rhs_type
)
3964 && (TYPE_PRECISION (rhs_type
)
3965 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3967 if (dump_enabled_p ())
3968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3969 "type conversion to/from bit-precision unsupported."
3974 /* Check the operands of the operation. */
3975 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3979 "use not simple.\n");
3982 if (op_type
== binary_op
)
3986 op1
= gimple_assign_rhs2 (stmt
);
3987 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3988 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3990 if (CONSTANT_CLASS_P (op0
))
3991 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3993 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3997 if (dump_enabled_p ())
3998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3999 "use not simple.\n");
4004 /* If op0 is an external or constant defs use a vector type of
4005 the same size as the output vector type. */
4007 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4009 gcc_assert (vectype_in
);
4012 if (dump_enabled_p ())
4014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4015 "no vectype for scalar type ");
4016 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4017 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4023 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4024 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4026 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4029 "can't convert between boolean and non "
4031 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4032 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4038 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4039 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4040 if (nunits_in
< nunits_out
)
4042 else if (nunits_out
== nunits_in
)
4047 /* Multiple types in SLP are handled by creating the appropriate number of
4048 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4052 else if (modifier
== NARROW
)
4053 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
4055 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4057 /* Sanity check: make sure that at least one copy of the vectorized stmt
4058 needs to be generated. */
4059 gcc_assert (ncopies
>= 1);
4061 /* Supportable by target? */
4065 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4067 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4074 "conversion not supported by target.\n");
4078 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4079 &code1
, &code2
, &multi_step_cvt
,
4082 /* Binary widening operation can only be supported directly by the
4084 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4088 if (code
!= FLOAT_EXPR
4089 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4090 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4093 rhs_mode
= TYPE_MODE (rhs_type
);
4094 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
4095 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
4096 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
4097 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
4100 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4101 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4102 if (cvt_type
== NULL_TREE
)
4105 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4107 if (!supportable_convert_operation (code
, vectype_out
,
4108 cvt_type
, &decl1
, &codecvt1
))
4111 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4112 cvt_type
, &codecvt1
,
4113 &codecvt2
, &multi_step_cvt
,
4117 gcc_assert (multi_step_cvt
== 0);
4119 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4120 vectype_in
, &code1
, &code2
,
4121 &multi_step_cvt
, &interm_types
))
4125 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
4128 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4129 codecvt2
= ERROR_MARK
;
4133 interm_types
.safe_push (cvt_type
);
4134 cvt_type
= NULL_TREE
;
4139 gcc_assert (op_type
== unary_op
);
4140 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4141 &code1
, &multi_step_cvt
,
4145 if (code
!= FIX_TRUNC_EXPR
4146 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
4147 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
4150 rhs_mode
= TYPE_MODE (rhs_type
);
4152 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4153 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4154 if (cvt_type
== NULL_TREE
)
4156 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4159 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4160 &code1
, &multi_step_cvt
,
4169 if (!vec_stmt
) /* transformation not required. */
4171 if (dump_enabled_p ())
4172 dump_printf_loc (MSG_NOTE
, vect_location
,
4173 "=== vectorizable_conversion ===\n");
4174 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4176 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4177 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4179 else if (modifier
== NARROW
)
4181 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4182 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4186 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4187 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4189 interm_types
.release ();
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_NOTE
, vect_location
,
4196 "transform conversion. ncopies = %d.\n", ncopies
);
4198 if (op_type
== binary_op
)
4200 if (CONSTANT_CLASS_P (op0
))
4201 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4202 else if (CONSTANT_CLASS_P (op1
))
4203 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4206 /* In case of multi-step conversion, we first generate conversion operations
4207 to the intermediate types, and then from that types to the final one.
4208 We create vector destinations for the intermediate type (TYPES) received
4209 from supportable_*_operation, and store them in the correct order
4210 for future use in vect_create_vectorized_*_stmts (). */
4211 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4212 vec_dest
= vect_create_destination_var (scalar_dest
,
4213 (cvt_type
&& modifier
== WIDEN
)
4214 ? cvt_type
: vectype_out
);
4215 vec_dsts
.quick_push (vec_dest
);
4219 for (i
= interm_types
.length () - 1;
4220 interm_types
.iterate (i
, &intermediate_type
); i
--)
4222 vec_dest
= vect_create_destination_var (scalar_dest
,
4224 vec_dsts
.quick_push (vec_dest
);
4229 vec_dest
= vect_create_destination_var (scalar_dest
,
4231 ? vectype_out
: cvt_type
);
4235 if (modifier
== WIDEN
)
4237 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4238 if (op_type
== binary_op
)
4239 vec_oprnds1
.create (1);
4241 else if (modifier
== NARROW
)
4242 vec_oprnds0
.create (
4243 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4245 else if (code
== WIDEN_LSHIFT_EXPR
)
4246 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4249 prev_stmt_info
= NULL
;
4253 for (j
= 0; j
< ncopies
; j
++)
4256 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4259 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4261 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4263 /* Arguments are ready, create the new vector stmt. */
4264 if (code1
== CALL_EXPR
)
4266 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4267 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4268 gimple_call_set_lhs (new_stmt
, new_temp
);
4272 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4273 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4274 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4275 gimple_assign_set_lhs (new_stmt
, new_temp
);
4278 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4280 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4283 if (!prev_stmt_info
)
4284 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4287 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4294 /* In case the vectorization factor (VF) is bigger than the number
4295 of elements that we can fit in a vectype (nunits), we have to
4296 generate more than one vector stmt - i.e - we need to "unroll"
4297 the vector stmt by a factor VF/nunits. */
4298 for (j
= 0; j
< ncopies
; j
++)
4305 if (code
== WIDEN_LSHIFT_EXPR
)
4310 /* Store vec_oprnd1 for every vector stmt to be created
4311 for SLP_NODE. We check during the analysis that all
4312 the shift arguments are the same. */
4313 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4314 vec_oprnds1
.quick_push (vec_oprnd1
);
4316 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4320 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4321 &vec_oprnds1
, slp_node
, -1);
4325 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4326 vec_oprnds0
.quick_push (vec_oprnd0
);
4327 if (op_type
== binary_op
)
4329 if (code
== WIDEN_LSHIFT_EXPR
)
4332 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4333 vec_oprnds1
.quick_push (vec_oprnd1
);
4339 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4340 vec_oprnds0
.truncate (0);
4341 vec_oprnds0
.quick_push (vec_oprnd0
);
4342 if (op_type
== binary_op
)
4344 if (code
== WIDEN_LSHIFT_EXPR
)
4347 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4349 vec_oprnds1
.truncate (0);
4350 vec_oprnds1
.quick_push (vec_oprnd1
);
4354 /* Arguments are ready. Create the new vector stmts. */
4355 for (i
= multi_step_cvt
; i
>= 0; i
--)
4357 tree this_dest
= vec_dsts
[i
];
4358 enum tree_code c1
= code1
, c2
= code2
;
4359 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4364 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4366 stmt
, this_dest
, gsi
,
4367 c1
, c2
, decl1
, decl2
,
4371 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4375 if (codecvt1
== CALL_EXPR
)
4377 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4378 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4379 gimple_call_set_lhs (new_stmt
, new_temp
);
4383 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4384 new_temp
= make_ssa_name (vec_dest
);
4385 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4389 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4392 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4395 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4398 if (!prev_stmt_info
)
4399 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4401 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4402 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4407 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4411 /* In case the vectorization factor (VF) is bigger than the number
4412 of elements that we can fit in a vectype (nunits), we have to
4413 generate more than one vector stmt - i.e - we need to "unroll"
4414 the vector stmt by a factor VF/nunits. */
4415 for (j
= 0; j
< ncopies
; j
++)
4419 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4423 vec_oprnds0
.truncate (0);
4424 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4425 vect_pow2 (multi_step_cvt
) - 1);
4428 /* Arguments are ready. Create the new vector stmts. */
4430 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4432 if (codecvt1
== CALL_EXPR
)
4434 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4435 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4436 gimple_call_set_lhs (new_stmt
, new_temp
);
4440 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4441 new_temp
= make_ssa_name (vec_dest
);
4442 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4446 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4447 vec_oprnds0
[i
] = new_temp
;
4450 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4451 stmt
, vec_dsts
, gsi
,
4456 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4460 vec_oprnds0
.release ();
4461 vec_oprnds1
.release ();
4462 interm_types
.release ();
4468 /* Function vectorizable_assignment.
4470 Check if STMT performs an assignment (copy) that can be vectorized.
4471 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4472 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4473 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4476 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4477 gimple
**vec_stmt
, slp_tree slp_node
)
4482 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4483 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4486 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4489 vec
<tree
> vec_oprnds
= vNULL
;
4491 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4492 vec_info
*vinfo
= stmt_info
->vinfo
;
4493 gimple
*new_stmt
= NULL
;
4494 stmt_vec_info prev_stmt_info
= NULL
;
4495 enum tree_code code
;
4498 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4501 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4505 /* Is vectorizable assignment? */
4506 if (!is_gimple_assign (stmt
))
4509 scalar_dest
= gimple_assign_lhs (stmt
);
4510 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4513 code
= gimple_assign_rhs_code (stmt
);
4514 if (gimple_assign_single_p (stmt
)
4515 || code
== PAREN_EXPR
4516 || CONVERT_EXPR_CODE_P (code
))
4517 op
= gimple_assign_rhs1 (stmt
);
4521 if (code
== VIEW_CONVERT_EXPR
)
4522 op
= TREE_OPERAND (op
, 0);
4524 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4525 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4527 /* Multiple types in SLP are handled by creating the appropriate number of
4528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4533 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4535 gcc_assert (ncopies
>= 1);
4537 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4541 "use not simple.\n");
4545 /* We can handle NOP_EXPR conversions that do not change the number
4546 of elements or the vector size. */
4547 if ((CONVERT_EXPR_CODE_P (code
)
4548 || code
== VIEW_CONVERT_EXPR
)
4550 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4551 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4552 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4555 /* We do not handle bit-precision changes. */
4556 if ((CONVERT_EXPR_CODE_P (code
)
4557 || code
== VIEW_CONVERT_EXPR
)
4558 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4559 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4560 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4561 || ((TYPE_PRECISION (TREE_TYPE (op
))
4562 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4563 /* But a conversion that does not change the bit-pattern is ok. */
4564 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4565 > TYPE_PRECISION (TREE_TYPE (op
)))
4566 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4567 /* Conversion between boolean types of different sizes is
4568 a simple assignment in case their vectypes are same
4570 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4571 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4573 if (dump_enabled_p ())
4574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4575 "type conversion to/from bit-precision "
4580 if (!vec_stmt
) /* transformation not required. */
4582 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_NOTE
, vect_location
,
4585 "=== vectorizable_assignment ===\n");
4586 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4591 if (dump_enabled_p ())
4592 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4595 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4598 for (j
= 0; j
< ncopies
; j
++)
4602 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4604 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4606 /* Arguments are ready. create the new vector stmt. */
4607 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4609 if (CONVERT_EXPR_CODE_P (code
)
4610 || code
== VIEW_CONVERT_EXPR
)
4611 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4612 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4613 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4614 gimple_assign_set_lhs (new_stmt
, new_temp
);
4615 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4617 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4624 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4626 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4628 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4631 vec_oprnds
.release ();
4636 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4637 either as shift by a scalar or by a vector. */
4640 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4643 machine_mode vec_mode
;
4648 vectype
= get_vectype_for_scalar_type (scalar_type
);
4652 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4654 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4656 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4658 || (optab_handler (optab
, TYPE_MODE (vectype
))
4659 == CODE_FOR_nothing
))
4663 vec_mode
= TYPE_MODE (vectype
);
4664 icode
= (int) optab_handler (optab
, vec_mode
);
4665 if (icode
== CODE_FOR_nothing
)
4672 /* Function vectorizable_shift.
4674 Check if STMT performs a shift operation that can be vectorized.
4675 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4676 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4677 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4680 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4681 gimple
**vec_stmt
, slp_tree slp_node
)
4685 tree op0
, op1
= NULL
;
4686 tree vec_oprnd1
= NULL_TREE
;
4687 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4689 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4690 enum tree_code code
;
4691 machine_mode vec_mode
;
4695 machine_mode optab_op2_mode
;
4697 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4698 gimple
*new_stmt
= NULL
;
4699 stmt_vec_info prev_stmt_info
;
4706 vec
<tree
> vec_oprnds0
= vNULL
;
4707 vec
<tree
> vec_oprnds1
= vNULL
;
4710 bool scalar_shift_arg
= true;
4711 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4712 vec_info
*vinfo
= stmt_info
->vinfo
;
4715 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4718 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4722 /* Is STMT a vectorizable binary/unary operation? */
4723 if (!is_gimple_assign (stmt
))
4726 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4729 code
= gimple_assign_rhs_code (stmt
);
4731 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4732 || code
== RROTATE_EXPR
))
4735 scalar_dest
= gimple_assign_lhs (stmt
);
4736 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4737 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4738 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4742 "bit-precision shifts not supported.\n");
4746 op0
= gimple_assign_rhs1 (stmt
);
4747 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4749 if (dump_enabled_p ())
4750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4751 "use not simple.\n");
4754 /* If op0 is an external or constant def use a vector type with
4755 the same size as the output vector type. */
4757 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4759 gcc_assert (vectype
);
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4764 "no vectype for scalar type\n");
4768 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4769 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4770 if (nunits_out
!= nunits_in
)
4773 op1
= gimple_assign_rhs2 (stmt
);
4774 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4778 "use not simple.\n");
4783 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4787 /* Multiple types in SLP are handled by creating the appropriate number of
4788 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4793 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4795 gcc_assert (ncopies
>= 1);
4797 /* Determine whether the shift amount is a vector, or scalar. If the
4798 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4800 if ((dt
[1] == vect_internal_def
4801 || dt
[1] == vect_induction_def
)
4803 scalar_shift_arg
= false;
4804 else if (dt
[1] == vect_constant_def
4805 || dt
[1] == vect_external_def
4806 || dt
[1] == vect_internal_def
)
4808 /* In SLP, need to check whether the shift count is the same,
4809 in loops if it is a constant or invariant, it is always
4813 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4816 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4817 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4818 scalar_shift_arg
= false;
4821 /* If the shift amount is computed by a pattern stmt we cannot
4822 use the scalar amount directly thus give up and use a vector
4824 if (dt
[1] == vect_internal_def
)
4826 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4827 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4828 scalar_shift_arg
= false;
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4835 "operand mode requires invariant argument.\n");
4839 /* Vector shifted by vector. */
4840 if (!scalar_shift_arg
)
4842 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4843 if (dump_enabled_p ())
4844 dump_printf_loc (MSG_NOTE
, vect_location
,
4845 "vector/vector shift/rotate found.\n");
4848 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4849 if (op1_vectype
== NULL_TREE
4850 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4852 if (dump_enabled_p ())
4853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4854 "unusable type for last operand in"
4855 " vector/vector shift/rotate.\n");
4859 /* See if the machine has a vector shifted by scalar insn and if not
4860 then see if it has a vector shifted by vector insn. */
4863 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4865 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_NOTE
, vect_location
,
4869 "vector/scalar shift/rotate found.\n");
4873 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4875 && (optab_handler (optab
, TYPE_MODE (vectype
))
4876 != CODE_FOR_nothing
))
4878 scalar_shift_arg
= false;
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE
, vect_location
,
4882 "vector/vector shift/rotate found.\n");
4884 /* Unlike the other binary operators, shifts/rotates have
4885 the rhs being int, instead of the same type as the lhs,
4886 so make sure the scalar is the right type if we are
4887 dealing with vectors of long long/long/short/char. */
4888 if (dt
[1] == vect_constant_def
)
4889 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4890 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4894 && TYPE_MODE (TREE_TYPE (vectype
))
4895 != TYPE_MODE (TREE_TYPE (op1
)))
4897 if (dump_enabled_p ())
4898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4899 "unusable type for last operand in"
4900 " vector/vector shift/rotate.\n");
4903 if (vec_stmt
&& !slp_node
)
4905 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4906 op1
= vect_init_vector (stmt
, op1
,
4907 TREE_TYPE (vectype
), NULL
);
4914 /* Supportable by target? */
4917 if (dump_enabled_p ())
4918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4922 vec_mode
= TYPE_MODE (vectype
);
4923 icode
= (int) optab_handler (optab
, vec_mode
);
4924 if (icode
== CODE_FOR_nothing
)
4926 if (dump_enabled_p ())
4927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4928 "op not supported by target.\n");
4929 /* Check only during analysis. */
4930 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4931 || (vf
< vect_min_worthwhile_factor (code
)
4934 if (dump_enabled_p ())
4935 dump_printf_loc (MSG_NOTE
, vect_location
,
4936 "proceeding using word mode.\n");
4939 /* Worthwhile without SIMD support? Check only during analysis. */
4940 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4941 && vf
< vect_min_worthwhile_factor (code
)
4944 if (dump_enabled_p ())
4945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4946 "not worthwhile without SIMD support.\n");
4950 if (!vec_stmt
) /* transformation not required. */
4952 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4953 if (dump_enabled_p ())
4954 dump_printf_loc (MSG_NOTE
, vect_location
,
4955 "=== vectorizable_shift ===\n");
4956 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4962 if (dump_enabled_p ())
4963 dump_printf_loc (MSG_NOTE
, vect_location
,
4964 "transform binary/unary operation.\n");
4967 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4969 prev_stmt_info
= NULL
;
4970 for (j
= 0; j
< ncopies
; j
++)
4975 if (scalar_shift_arg
)
4977 /* Vector shl and shr insn patterns can be defined with scalar
4978 operand 2 (shift operand). In this case, use constant or loop
4979 invariant op1 directly, without extending it to vector mode
4981 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4982 if (!VECTOR_MODE_P (optab_op2_mode
))
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_NOTE
, vect_location
,
4986 "operand 1 using scalar mode.\n");
4988 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4989 vec_oprnds1
.quick_push (vec_oprnd1
);
4992 /* Store vec_oprnd1 for every vector stmt to be created
4993 for SLP_NODE. We check during the analysis that all
4994 the shift arguments are the same.
4995 TODO: Allow different constants for different vector
4996 stmts generated for an SLP instance. */
4997 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4998 vec_oprnds1
.quick_push (vec_oprnd1
);
5003 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5004 (a special case for certain kind of vector shifts); otherwise,
5005 operand 1 should be of a vector type (the usual case). */
5007 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5010 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5014 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5016 /* Arguments are ready. Create the new vector stmt. */
5017 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5019 vop1
= vec_oprnds1
[i
];
5020 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5021 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5022 gimple_assign_set_lhs (new_stmt
, new_temp
);
5023 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5025 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5032 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5034 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5035 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5038 vec_oprnds0
.release ();
5039 vec_oprnds1
.release ();
5045 /* Function vectorizable_operation.
5047 Check if STMT performs a binary, unary or ternary operation that can
5049 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5050 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5051 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5054 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5055 gimple
**vec_stmt
, slp_tree slp_node
)
5059 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5060 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5062 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5063 enum tree_code code
;
5064 machine_mode vec_mode
;
5068 bool target_support_p
;
5070 enum vect_def_type dt
[3]
5071 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5072 gimple
*new_stmt
= NULL
;
5073 stmt_vec_info prev_stmt_info
;
5079 vec
<tree
> vec_oprnds0
= vNULL
;
5080 vec
<tree
> vec_oprnds1
= vNULL
;
5081 vec
<tree
> vec_oprnds2
= vNULL
;
5082 tree vop0
, vop1
, vop2
;
5083 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5084 vec_info
*vinfo
= stmt_info
->vinfo
;
5087 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5090 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5094 /* Is STMT a vectorizable binary/unary operation? */
5095 if (!is_gimple_assign (stmt
))
5098 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5101 code
= gimple_assign_rhs_code (stmt
);
5103 /* For pointer addition, we should use the normal plus for
5104 the vector addition. */
5105 if (code
== POINTER_PLUS_EXPR
)
5108 /* Support only unary or binary operations. */
5109 op_type
= TREE_CODE_LENGTH (code
);
5110 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5114 "num. args = %d (not unary/binary/ternary op).\n",
5119 scalar_dest
= gimple_assign_lhs (stmt
);
5120 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5122 /* Most operations cannot handle bit-precision types without extra
5124 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5125 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5126 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
5127 /* Exception are bitwise binary operations. */
5128 && code
!= BIT_IOR_EXPR
5129 && code
!= BIT_XOR_EXPR
5130 && code
!= BIT_AND_EXPR
)
5132 if (dump_enabled_p ())
5133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5134 "bit-precision arithmetic not supported.\n");
5138 op0
= gimple_assign_rhs1 (stmt
);
5139 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5141 if (dump_enabled_p ())
5142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5143 "use not simple.\n");
5146 /* If op0 is an external or constant def use a vector type with
5147 the same size as the output vector type. */
5150 /* For boolean type we cannot determine vectype by
5151 invariant value (don't know whether it is a vector
5152 of booleans or vector of integers). We use output
5153 vectype because operations on boolean don't change
5155 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
5157 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
5159 if (dump_enabled_p ())
5160 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5161 "not supported operation on bool value.\n");
5164 vectype
= vectype_out
;
5167 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5170 gcc_assert (vectype
);
5173 if (dump_enabled_p ())
5175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5176 "no vectype for scalar type ");
5177 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5179 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5185 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5186 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5187 if (nunits_out
!= nunits_in
)
5190 if (op_type
== binary_op
|| op_type
== ternary_op
)
5192 op1
= gimple_assign_rhs2 (stmt
);
5193 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5195 if (dump_enabled_p ())
5196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5197 "use not simple.\n");
5201 if (op_type
== ternary_op
)
5203 op2
= gimple_assign_rhs3 (stmt
);
5204 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5208 "use not simple.\n");
5214 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5218 /* Multiple types in SLP are handled by creating the appropriate number of
5219 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5224 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
5226 gcc_assert (ncopies
>= 1);
5228 /* Shifts are handled in vectorizable_shift (). */
5229 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5230 || code
== RROTATE_EXPR
)
5233 /* Supportable by target? */
5235 vec_mode
= TYPE_MODE (vectype
);
5236 if (code
== MULT_HIGHPART_EXPR
)
5237 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5240 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5243 if (dump_enabled_p ())
5244 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5248 target_support_p
= (optab_handler (optab
, vec_mode
)
5249 != CODE_FOR_nothing
);
5252 if (!target_support_p
)
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5256 "op not supported by target.\n");
5257 /* Check only during analysis. */
5258 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5259 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_NOTE
, vect_location
,
5263 "proceeding using word mode.\n");
5266 /* Worthwhile without SIMD support? Check only during analysis. */
5267 if (!VECTOR_MODE_P (vec_mode
)
5269 && vf
< vect_min_worthwhile_factor (code
))
5271 if (dump_enabled_p ())
5272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5273 "not worthwhile without SIMD support.\n");
5277 if (!vec_stmt
) /* transformation not required. */
5279 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_NOTE
, vect_location
,
5282 "=== vectorizable_operation ===\n");
5283 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5289 if (dump_enabled_p ())
5290 dump_printf_loc (MSG_NOTE
, vect_location
,
5291 "transform binary/unary operation.\n");
5294 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5296 /* In case the vectorization factor (VF) is bigger than the number
5297 of elements that we can fit in a vectype (nunits), we have to generate
5298 more than one vector stmt - i.e - we need to "unroll" the
5299 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5300 from one copy of the vector stmt to the next, in the field
5301 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5302 stages to find the correct vector defs to be used when vectorizing
5303 stmts that use the defs of the current stmt. The example below
5304 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5305 we need to create 4 vectorized stmts):
5307 before vectorization:
5308 RELATED_STMT VEC_STMT
5312 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5314 RELATED_STMT VEC_STMT
5315 VS1_0: vx0 = memref0 VS1_1 -
5316 VS1_1: vx1 = memref1 VS1_2 -
5317 VS1_2: vx2 = memref2 VS1_3 -
5318 VS1_3: vx3 = memref3 - -
5319 S1: x = load - VS1_0
5322 step2: vectorize stmt S2 (done here):
5323 To vectorize stmt S2 we first need to find the relevant vector
5324 def for the first operand 'x'. This is, as usual, obtained from
5325 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5326 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5327 relevant vector def 'vx0'. Having found 'vx0' we can generate
5328 the vector stmt VS2_0, and as usual, record it in the
5329 STMT_VINFO_VEC_STMT of stmt S2.
5330 When creating the second copy (VS2_1), we obtain the relevant vector
5331 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5332 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5333 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5334 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5335 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5336 chain of stmts and pointers:
5337 RELATED_STMT VEC_STMT
5338 VS1_0: vx0 = memref0 VS1_1 -
5339 VS1_1: vx1 = memref1 VS1_2 -
5340 VS1_2: vx2 = memref2 VS1_3 -
5341 VS1_3: vx3 = memref3 - -
5342 S1: x = load - VS1_0
5343 VS2_0: vz0 = vx0 + v1 VS2_1 -
5344 VS2_1: vz1 = vx1 + v1 VS2_2 -
5345 VS2_2: vz2 = vx2 + v1 VS2_3 -
5346 VS2_3: vz3 = vx3 + v1 - -
5347 S2: z = x + 1 - VS2_0 */
5349 prev_stmt_info
= NULL
;
5350 for (j
= 0; j
< ncopies
; j
++)
5355 if (op_type
== binary_op
|| op_type
== ternary_op
)
5356 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5359 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5361 if (op_type
== ternary_op
)
5362 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5367 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5368 if (op_type
== ternary_op
)
5370 tree vec_oprnd
= vec_oprnds2
.pop ();
5371 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5376 /* Arguments are ready. Create the new vector stmt. */
5377 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5379 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5380 ? vec_oprnds1
[i
] : NULL_TREE
);
5381 vop2
= ((op_type
== ternary_op
)
5382 ? vec_oprnds2
[i
] : NULL_TREE
);
5383 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5384 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5385 gimple_assign_set_lhs (new_stmt
, new_temp
);
5386 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5388 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5395 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5397 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5398 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5401 vec_oprnds0
.release ();
5402 vec_oprnds1
.release ();
5403 vec_oprnds2
.release ();
5408 /* A helper function to ensure data reference DR's base alignment
5412 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5417 if (DR_VECT_AUX (dr
)->base_misaligned
)
5419 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5420 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5422 if (decl_in_symtab_p (base_decl
))
5423 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5426 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5427 DECL_USER_ALIGN (base_decl
) = 1;
5429 DR_VECT_AUX (dr
)->base_misaligned
= false;
5434 /* Function get_group_alias_ptr_type.
5436 Return the alias type for the group starting at FIRST_STMT. */
5439 get_group_alias_ptr_type (gimple
*first_stmt
)
5441 struct data_reference
*first_dr
, *next_dr
;
5444 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5445 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5448 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5449 if (get_alias_set (DR_REF (first_dr
))
5450 != get_alias_set (DR_REF (next_dr
)))
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_NOTE
, vect_location
,
5454 "conflicting alias set types.\n");
5455 return ptr_type_node
;
5457 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5459 return reference_alias_ptr_type (DR_REF (first_dr
));
5463 /* Function vectorizable_store.
5465 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5467 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5468 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5472 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5478 tree vec_oprnd
= NULL_TREE
;
5479 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5480 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5482 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5483 struct loop
*loop
= NULL
;
5484 machine_mode vec_mode
;
5486 enum dr_alignment_support alignment_support_scheme
;
5488 enum vect_def_type dt
;
5489 stmt_vec_info prev_stmt_info
= NULL
;
5490 tree dataref_ptr
= NULL_TREE
;
5491 tree dataref_offset
= NULL_TREE
;
5492 gimple
*ptr_incr
= NULL
;
5495 gimple
*next_stmt
, *first_stmt
;
5497 unsigned int group_size
, i
;
5498 vec
<tree
> oprnds
= vNULL
;
5499 vec
<tree
> result_chain
= vNULL
;
5501 tree offset
= NULL_TREE
;
5502 vec
<tree
> vec_oprnds
= vNULL
;
5503 bool slp
= (slp_node
!= NULL
);
5504 unsigned int vec_num
;
5505 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5506 vec_info
*vinfo
= stmt_info
->vinfo
;
5508 gather_scatter_info gs_info
;
5509 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5512 vec_load_store_type vls_type
;
5515 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5518 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5522 /* Is vectorizable store? */
5524 if (!is_gimple_assign (stmt
))
5527 scalar_dest
= gimple_assign_lhs (stmt
);
5528 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5529 && is_pattern_stmt_p (stmt_info
))
5530 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5531 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5532 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5533 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5534 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5535 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5536 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5537 && TREE_CODE (scalar_dest
) != MEM_REF
)
5540 /* Cannot have hybrid store SLP -- that would mean storing to the
5541 same location twice. */
5542 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5544 gcc_assert (gimple_assign_single_p (stmt
));
5546 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5547 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5551 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5552 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5557 /* Multiple types in SLP are handled by creating the appropriate number of
5558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5563 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5565 gcc_assert (ncopies
>= 1);
5567 /* FORNOW. This restriction should be relaxed. */
5568 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5572 "multiple types in nested loop.\n");
5576 op
= gimple_assign_rhs1 (stmt
);
5578 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5582 "use not simple.\n");
5586 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5587 vls_type
= VLS_STORE_INVARIANT
;
5589 vls_type
= VLS_STORE
;
5591 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5594 elem_type
= TREE_TYPE (vectype
);
5595 vec_mode
= TYPE_MODE (vectype
);
5597 /* FORNOW. In some cases can vectorize even if data-type not supported
5598 (e.g. - array initialization with 0). */
5599 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5602 if (!STMT_VINFO_DATA_REF (stmt_info
))
5605 vect_memory_access_type memory_access_type
;
5606 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5607 &memory_access_type
, &gs_info
))
5610 if (!vec_stmt
) /* transformation not required. */
5612 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5613 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5614 /* The SLP costs are calculated during SLP analysis. */
5615 if (!PURE_SLP_STMT (stmt_info
))
5616 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5620 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5624 ensure_base_align (stmt_info
, dr
);
5626 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5628 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5629 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5630 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5631 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5632 edge pe
= loop_preheader_edge (loop
);
5635 enum { NARROW
, NONE
, WIDEN
} modifier
;
5636 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5638 if (nunits
== (unsigned int) scatter_off_nunits
)
5640 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5642 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5645 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5646 sel
[i
] = i
| nunits
;
5648 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
5649 gcc_assert (perm_mask
!= NULL_TREE
);
5651 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5653 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5656 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5657 sel
[i
] = i
| scatter_off_nunits
;
5659 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5660 gcc_assert (perm_mask
!= NULL_TREE
);
5666 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5667 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5668 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5669 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5670 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5671 scaletype
= TREE_VALUE (arglist
);
5673 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5674 && TREE_CODE (rettype
) == VOID_TYPE
);
5676 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5677 if (!is_gimple_min_invariant (ptr
))
5679 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5680 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5681 gcc_assert (!new_bb
);
5684 /* Currently we support only unconditional scatter stores,
5685 so mask should be all ones. */
5686 mask
= build_int_cst (masktype
, -1);
5687 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5689 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5691 prev_stmt_info
= NULL
;
5692 for (j
= 0; j
< ncopies
; ++j
)
5697 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5699 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5701 else if (modifier
!= NONE
&& (j
& 1))
5703 if (modifier
== WIDEN
)
5706 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5707 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5710 else if (modifier
== NARROW
)
5712 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5715 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5724 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5726 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5730 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5732 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5733 == TYPE_VECTOR_SUBPARTS (srctype
));
5734 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5735 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5736 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5737 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5741 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5743 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5744 == TYPE_VECTOR_SUBPARTS (idxtype
));
5745 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5746 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5747 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5748 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5753 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5755 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5757 if (prev_stmt_info
== NULL
)
5758 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5760 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5761 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5766 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5769 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5770 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5771 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5773 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5776 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5778 /* We vectorize all the stmts of the interleaving group when we
5779 reach the last stmt in the group. */
5780 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5781 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5790 grouped_store
= false;
5791 /* VEC_NUM is the number of vect stmts to be created for this
5793 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5794 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5795 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5796 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5797 op
= gimple_assign_rhs1 (first_stmt
);
5800 /* VEC_NUM is the number of vect stmts to be created for this
5802 vec_num
= group_size
;
5804 ref_type
= get_group_alias_ptr_type (first_stmt
);
5810 group_size
= vec_num
= 1;
5811 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
5814 if (dump_enabled_p ())
5815 dump_printf_loc (MSG_NOTE
, vect_location
,
5816 "transform store. ncopies = %d\n", ncopies
);
5818 if (memory_access_type
== VMAT_ELEMENTWISE
5819 || memory_access_type
== VMAT_STRIDED_SLP
)
5821 gimple_stmt_iterator incr_gsi
;
5827 gimple_seq stmts
= NULL
;
5828 tree stride_base
, stride_step
, alias_off
;
5832 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5835 = fold_build_pointer_plus
5836 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5837 size_binop (PLUS_EXPR
,
5838 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5839 convert_to_ptrofftype (DR_INIT (first_dr
))));
5840 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5842 /* For a store with loop-invariant (but other than power-of-2)
5843 stride (i.e. not a grouped access) like so:
5845 for (i = 0; i < n; i += stride)
5848 we generate a new induction variable and new stores from
5849 the components of the (vectorized) rhs:
5851 for (j = 0; ; j += VF*stride)
5856 array[j + stride] = tmp2;
5860 unsigned nstores
= nunits
;
5862 tree ltype
= elem_type
;
5865 if (group_size
< nunits
5866 && nunits
% group_size
== 0)
5868 nstores
= nunits
/ group_size
;
5870 ltype
= build_vector_type (elem_type
, group_size
);
5872 else if (group_size
>= nunits
5873 && group_size
% nunits
== 0)
5879 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5880 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5883 ivstep
= stride_step
;
5884 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5885 build_int_cst (TREE_TYPE (ivstep
), vf
));
5887 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5889 create_iv (stride_base
, ivstep
, NULL
,
5890 loop
, &incr_gsi
, insert_after
,
5892 incr
= gsi_stmt (incr_gsi
);
5893 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5895 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5897 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5899 prev_stmt_info
= NULL
;
5900 alias_off
= build_int_cst (ref_type
, 0);
5901 next_stmt
= first_stmt
;
5902 for (g
= 0; g
< group_size
; g
++)
5904 running_off
= offvar
;
5907 tree size
= TYPE_SIZE_UNIT (ltype
);
5908 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5910 tree newoff
= copy_ssa_name (running_off
, NULL
);
5911 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5913 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5914 running_off
= newoff
;
5916 unsigned int group_el
= 0;
5917 unsigned HOST_WIDE_INT
5918 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
5919 for (j
= 0; j
< ncopies
; j
++)
5921 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5922 and first_stmt == stmt. */
5927 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5929 vec_oprnd
= vec_oprnds
[0];
5933 gcc_assert (gimple_assign_single_p (next_stmt
));
5934 op
= gimple_assign_rhs1 (next_stmt
);
5935 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5941 vec_oprnd
= vec_oprnds
[j
];
5944 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5945 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5949 for (i
= 0; i
< nstores
; i
++)
5951 tree newref
, newoff
;
5952 gimple
*incr
, *assign
;
5953 tree size
= TYPE_SIZE (ltype
);
5954 /* Extract the i'th component. */
5955 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5956 bitsize_int (i
), size
);
5957 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5960 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5964 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
5966 newref
= build2 (MEM_REF
, ltype
,
5967 running_off
, this_off
);
5969 /* And store it to *running_off. */
5970 assign
= gimple_build_assign (newref
, elem
);
5971 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5975 || group_el
== group_size
)
5977 newoff
= copy_ssa_name (running_off
, NULL
);
5978 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5979 running_off
, stride_step
);
5980 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5982 running_off
= newoff
;
5985 if (g
== group_size
- 1
5988 if (j
== 0 && i
== 0)
5989 STMT_VINFO_VEC_STMT (stmt_info
)
5990 = *vec_stmt
= assign
;
5992 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5993 prev_stmt_info
= vinfo_for_stmt (assign
);
5997 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6004 auto_vec
<tree
> dr_chain (group_size
);
6005 oprnds
.create (group_size
);
6007 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6008 gcc_assert (alignment_support_scheme
);
6009 /* Targets with store-lane instructions must not require explicit
6011 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6012 || alignment_support_scheme
== dr_aligned
6013 || alignment_support_scheme
== dr_unaligned_supported
);
6015 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6016 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6017 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6019 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6020 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6022 aggr_type
= vectype
;
6024 /* In case the vectorization factor (VF) is bigger than the number
6025 of elements that we can fit in a vectype (nunits), we have to generate
6026 more than one vector stmt - i.e - we need to "unroll" the
6027 vector stmt by a factor VF/nunits. For more details see documentation in
6028 vect_get_vec_def_for_copy_stmt. */
6030 /* In case of interleaving (non-unit grouped access):
6037 We create vectorized stores starting from base address (the access of the
6038 first stmt in the chain (S2 in the above example), when the last store stmt
6039 of the chain (S4) is reached:
6042 VS2: &base + vec_size*1 = vx0
6043 VS3: &base + vec_size*2 = vx1
6044 VS4: &base + vec_size*3 = vx3
6046 Then permutation statements are generated:
6048 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6049 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6052 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6053 (the order of the data-refs in the output of vect_permute_store_chain
6054 corresponds to the order of scalar stmts in the interleaving chain - see
6055 the documentation of vect_permute_store_chain()).
6057 In case of both multiple types and interleaving, above vector stores and
6058 permutation stmts are created for every copy. The result vector stmts are
6059 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6060 STMT_VINFO_RELATED_STMT for the next copies.
6063 prev_stmt_info
= NULL
;
6064 for (j
= 0; j
< ncopies
; j
++)
6071 /* Get vectorized arguments for SLP_NODE. */
6072 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6073 NULL
, slp_node
, -1);
6075 vec_oprnd
= vec_oprnds
[0];
6079 /* For interleaved stores we collect vectorized defs for all the
6080 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6081 used as an input to vect_permute_store_chain(), and OPRNDS as
6082 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6084 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6085 OPRNDS are of size 1. */
6086 next_stmt
= first_stmt
;
6087 for (i
= 0; i
< group_size
; i
++)
6089 /* Since gaps are not supported for interleaved stores,
6090 GROUP_SIZE is the exact number of stmts in the chain.
6091 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6092 there is no interleaving, GROUP_SIZE is 1, and only one
6093 iteration of the loop will be executed. */
6094 gcc_assert (next_stmt
6095 && gimple_assign_single_p (next_stmt
));
6096 op
= gimple_assign_rhs1 (next_stmt
);
6098 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6099 dr_chain
.quick_push (vec_oprnd
);
6100 oprnds
.quick_push (vec_oprnd
);
6101 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6105 /* We should have catched mismatched types earlier. */
6106 gcc_assert (useless_type_conversion_p (vectype
,
6107 TREE_TYPE (vec_oprnd
)));
6108 bool simd_lane_access_p
6109 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6110 if (simd_lane_access_p
6111 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6112 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6113 && integer_zerop (DR_OFFSET (first_dr
))
6114 && integer_zerop (DR_INIT (first_dr
))
6115 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6116 get_alias_set (TREE_TYPE (ref_type
))))
6118 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6119 dataref_offset
= build_int_cst (ref_type
, 0);
6124 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6125 simd_lane_access_p
? loop
: NULL
,
6126 offset
, &dummy
, gsi
, &ptr_incr
,
6127 simd_lane_access_p
, &inv_p
);
6128 gcc_assert (bb_vinfo
|| !inv_p
);
6132 /* For interleaved stores we created vectorized defs for all the
6133 defs stored in OPRNDS in the previous iteration (previous copy).
6134 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6135 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6137 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6138 OPRNDS are of size 1. */
6139 for (i
= 0; i
< group_size
; i
++)
6142 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6143 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6144 dr_chain
[i
] = vec_oprnd
;
6145 oprnds
[i
] = vec_oprnd
;
6149 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6150 TYPE_SIZE_UNIT (aggr_type
));
6152 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6153 TYPE_SIZE_UNIT (aggr_type
));
6156 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6160 /* Combine all the vectors into an array. */
6161 vec_array
= create_vector_array (vectype
, vec_num
);
6162 for (i
= 0; i
< vec_num
; i
++)
6164 vec_oprnd
= dr_chain
[i
];
6165 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6169 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6170 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6171 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
6172 gimple_call_set_lhs (new_stmt
, data_ref
);
6173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6181 result_chain
.create (group_size
);
6183 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6187 next_stmt
= first_stmt
;
6188 for (i
= 0; i
< vec_num
; i
++)
6190 unsigned align
, misalign
;
6193 /* Bump the vector pointer. */
6194 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6198 vec_oprnd
= vec_oprnds
[i
];
6199 else if (grouped_store
)
6200 /* For grouped stores vectorized defs are interleaved in
6201 vect_permute_store_chain(). */
6202 vec_oprnd
= result_chain
[i
];
6204 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
6208 : build_int_cst (ref_type
, 0));
6209 align
= TYPE_ALIGN_UNIT (vectype
);
6210 if (aligned_access_p (first_dr
))
6212 else if (DR_MISALIGNMENT (first_dr
) == -1)
6214 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6215 align
= TYPE_ALIGN_UNIT (elem_type
);
6217 align
= get_object_alignment (DR_REF (first_dr
))
6220 TREE_TYPE (data_ref
)
6221 = build_aligned_type (TREE_TYPE (data_ref
),
6222 align
* BITS_PER_UNIT
);
6226 TREE_TYPE (data_ref
)
6227 = build_aligned_type (TREE_TYPE (data_ref
),
6228 TYPE_ALIGN (elem_type
));
6229 misalign
= DR_MISALIGNMENT (first_dr
);
6231 if (dataref_offset
== NULL_TREE
6232 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6233 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6236 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6238 tree perm_mask
= perm_mask_for_reverse (vectype
);
6240 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6242 tree new_temp
= make_ssa_name (perm_dest
);
6244 /* Generate the permute statement. */
6246 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6247 vec_oprnd
, perm_mask
);
6248 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6250 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6251 vec_oprnd
= new_temp
;
6254 /* Arguments are ready. Create the new vector stmt. */
6255 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6256 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6261 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6269 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6271 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6272 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6277 result_chain
.release ();
6278 vec_oprnds
.release ();
6283 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6284 VECTOR_CST mask. No checks are made that the target platform supports the
6285 mask, so callers may wish to test can_vec_perm_p separately, or use
6286 vect_gen_perm_mask_checked. */
6289 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6291 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6294 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6296 mask_elt_type
= lang_hooks
.types
.type_for_mode
6297 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6298 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6300 mask_elts
= XALLOCAVEC (tree
, nunits
);
6301 for (i
= nunits
- 1; i
>= 0; i
--)
6302 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6303 mask_vec
= build_vector (mask_type
, mask_elts
);
6308 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6309 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6312 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6314 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6315 return vect_gen_perm_mask_any (vectype
, sel
);
6318 /* Given a vector variable X and Y, that was generated for the scalar
6319 STMT, generate instructions to permute the vector elements of X and Y
6320 using permutation mask MASK_VEC, insert them at *GSI and return the
6321 permuted vector variable. */
6324 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6325 gimple_stmt_iterator
*gsi
)
6327 tree vectype
= TREE_TYPE (x
);
6328 tree perm_dest
, data_ref
;
6331 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6332 data_ref
= make_ssa_name (perm_dest
);
6334 /* Generate the permute statement. */
6335 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6336 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6341 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6342 inserting them on the loops preheader edge. Returns true if we
6343 were successful in doing so (and thus STMT can be moved then),
6344 otherwise returns false. */
6347 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6353 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6355 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6356 if (!gimple_nop_p (def_stmt
)
6357 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6359 /* Make sure we don't need to recurse. While we could do
6360 so in simple cases when there are more complex use webs
6361 we don't have an easy way to preserve stmt order to fulfil
6362 dependencies within them. */
6365 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6367 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6369 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6370 if (!gimple_nop_p (def_stmt2
)
6371 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6381 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6383 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6384 if (!gimple_nop_p (def_stmt
)
6385 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6387 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6388 gsi_remove (&gsi
, false);
6389 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6396 /* vectorizable_load.
6398 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6405 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6406 slp_tree slp_node
, slp_instance slp_node_instance
)
6409 tree vec_dest
= NULL
;
6410 tree data_ref
= NULL
;
6411 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6412 stmt_vec_info prev_stmt_info
;
6413 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6414 struct loop
*loop
= NULL
;
6415 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6416 bool nested_in_vect_loop
= false;
6417 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6421 gimple
*new_stmt
= NULL
;
6423 enum dr_alignment_support alignment_support_scheme
;
6424 tree dataref_ptr
= NULL_TREE
;
6425 tree dataref_offset
= NULL_TREE
;
6426 gimple
*ptr_incr
= NULL
;
6428 int i
, j
, group_size
, group_gap_adj
;
6429 tree msq
= NULL_TREE
, lsq
;
6430 tree offset
= NULL_TREE
;
6431 tree byte_offset
= NULL_TREE
;
6432 tree realignment_token
= NULL_TREE
;
6434 vec
<tree
> dr_chain
= vNULL
;
6435 bool grouped_load
= false;
6437 gimple
*first_stmt_for_drptr
= NULL
;
6439 bool compute_in_loop
= false;
6440 struct loop
*at_loop
;
6442 bool slp
= (slp_node
!= NULL
);
6443 bool slp_perm
= false;
6444 enum tree_code code
;
6445 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6448 gather_scatter_info gs_info
;
6449 vec_info
*vinfo
= stmt_info
->vinfo
;
6452 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6455 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6459 /* Is vectorizable load? */
6460 if (!is_gimple_assign (stmt
))
6463 scalar_dest
= gimple_assign_lhs (stmt
);
6464 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6467 code
= gimple_assign_rhs_code (stmt
);
6468 if (code
!= ARRAY_REF
6469 && code
!= BIT_FIELD_REF
6470 && code
!= INDIRECT_REF
6471 && code
!= COMPONENT_REF
6472 && code
!= IMAGPART_EXPR
6473 && code
!= REALPART_EXPR
6475 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6478 if (!STMT_VINFO_DATA_REF (stmt_info
))
6481 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6482 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6486 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6487 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6488 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6493 /* Multiple types in SLP are handled by creating the appropriate number of
6494 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6499 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6501 gcc_assert (ncopies
>= 1);
6503 /* FORNOW. This restriction should be relaxed. */
6504 if (nested_in_vect_loop
&& ncopies
> 1)
6506 if (dump_enabled_p ())
6507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6508 "multiple types in nested loop.\n");
6512 /* Invalidate assumptions made by dependence analysis when vectorization
6513 on the unrolled body effectively re-orders stmts. */
6515 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6516 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6517 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6519 if (dump_enabled_p ())
6520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6521 "cannot perform implicit CSE when unrolling "
6522 "with negative dependence distance\n");
6526 elem_type
= TREE_TYPE (vectype
);
6527 mode
= TYPE_MODE (vectype
);
6529 /* FORNOW. In some cases can vectorize even if data-type not supported
6530 (e.g. - data copies). */
6531 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6533 if (dump_enabled_p ())
6534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6535 "Aligned load, but unsupported type.\n");
6539 /* Check if the load is a part of an interleaving chain. */
6540 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6542 grouped_load
= true;
6544 gcc_assert (!nested_in_vect_loop
);
6545 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6547 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6548 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6550 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6553 /* ??? The following is overly pessimistic (as well as the loop
6554 case above) in the case we can statically determine the excess
6555 elements loaded are within the bounds of a decl that is accessed.
6556 Likewise for BB vectorizations using masked loads is a possibility. */
6557 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6560 "BB vectorization with gaps at the end of a load "
6561 "is not supported\n");
6565 /* Invalidate assumptions made by dependence analysis when vectorization
6566 on the unrolled body effectively re-orders stmts. */
6567 if (!PURE_SLP_STMT (stmt_info
)
6568 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6569 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6570 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6572 if (dump_enabled_p ())
6573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6574 "cannot perform implicit CSE when performing "
6575 "group loads with negative dependence distance\n");
6579 /* Similarly when the stmt is a load that is both part of a SLP
6580 instance and a loop vectorized stmt via the same-dr mechanism
6581 we have to give up. */
6582 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6583 && (STMT_SLP_TYPE (stmt_info
)
6584 != STMT_SLP_TYPE (vinfo_for_stmt
6585 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6587 if (dump_enabled_p ())
6588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6589 "conflicting SLP types for CSEd load\n");
6594 vect_memory_access_type memory_access_type
;
6595 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6596 &memory_access_type
, &gs_info
))
6599 if (!vec_stmt
) /* transformation not required. */
6602 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6603 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6604 /* The SLP costs are calculated during SLP analysis. */
6605 if (!PURE_SLP_STMT (stmt_info
))
6606 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6612 gcc_assert (memory_access_type
6613 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6615 if (dump_enabled_p ())
6616 dump_printf_loc (MSG_NOTE
, vect_location
,
6617 "transform load. ncopies = %d\n", ncopies
);
6621 ensure_base_align (stmt_info
, dr
);
6623 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6625 tree vec_oprnd0
= NULL_TREE
, op
;
6626 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6627 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6628 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6629 edge pe
= loop_preheader_edge (loop
);
6632 enum { NARROW
, NONE
, WIDEN
} modifier
;
6633 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6635 if (nunits
== gather_off_nunits
)
6637 else if (nunits
== gather_off_nunits
/ 2)
6639 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6642 for (i
= 0; i
< gather_off_nunits
; ++i
)
6643 sel
[i
] = i
| nunits
;
6645 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
, sel
);
6647 else if (nunits
== gather_off_nunits
* 2)
6649 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6652 for (i
= 0; i
< nunits
; ++i
)
6653 sel
[i
] = i
< gather_off_nunits
6654 ? i
: i
+ nunits
- gather_off_nunits
;
6656 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6662 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6663 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6664 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6665 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6666 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6667 scaletype
= TREE_VALUE (arglist
);
6668 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6670 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6672 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6673 if (!is_gimple_min_invariant (ptr
))
6675 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6676 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6677 gcc_assert (!new_bb
);
6680 /* Currently we support only unconditional gather loads,
6681 so mask should be all ones. */
6682 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6683 mask
= build_int_cst (masktype
, -1);
6684 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6686 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6687 mask
= build_vector_from_val (masktype
, mask
);
6688 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6690 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6694 for (j
= 0; j
< 6; ++j
)
6696 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6697 mask
= build_real (TREE_TYPE (masktype
), r
);
6698 mask
= build_vector_from_val (masktype
, mask
);
6699 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6704 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6706 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6707 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6708 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6712 for (j
= 0; j
< 6; ++j
)
6714 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6715 merge
= build_real (TREE_TYPE (rettype
), r
);
6719 merge
= build_vector_from_val (rettype
, merge
);
6720 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6722 prev_stmt_info
= NULL
;
6723 for (j
= 0; j
< ncopies
; ++j
)
6725 if (modifier
== WIDEN
&& (j
& 1))
6726 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6727 perm_mask
, stmt
, gsi
);
6730 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6733 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6735 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6737 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6738 == TYPE_VECTOR_SUBPARTS (idxtype
));
6739 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6740 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6742 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6743 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6748 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6750 if (!useless_type_conversion_p (vectype
, rettype
))
6752 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6753 == TYPE_VECTOR_SUBPARTS (rettype
));
6754 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6755 gimple_call_set_lhs (new_stmt
, op
);
6756 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6757 var
= make_ssa_name (vec_dest
);
6758 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6760 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6764 var
= make_ssa_name (vec_dest
, new_stmt
);
6765 gimple_call_set_lhs (new_stmt
, var
);
6768 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6770 if (modifier
== NARROW
)
6777 var
= permute_vec_elements (prev_res
, var
,
6778 perm_mask
, stmt
, gsi
);
6779 new_stmt
= SSA_NAME_DEF_STMT (var
);
6782 if (prev_stmt_info
== NULL
)
6783 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6785 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6786 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6791 if (memory_access_type
== VMAT_ELEMENTWISE
6792 || memory_access_type
== VMAT_STRIDED_SLP
)
6794 gimple_stmt_iterator incr_gsi
;
6800 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6801 gimple_seq stmts
= NULL
;
6802 tree stride_base
, stride_step
, alias_off
;
6804 gcc_assert (!nested_in_vect_loop
);
6806 if (slp
&& grouped_load
)
6808 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6809 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6810 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6811 ref_type
= get_group_alias_ptr_type (first_stmt
);
6818 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6822 = fold_build_pointer_plus
6823 (DR_BASE_ADDRESS (first_dr
),
6824 size_binop (PLUS_EXPR
,
6825 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6826 convert_to_ptrofftype (DR_INIT (first_dr
))));
6827 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6829 /* For a load with loop-invariant (but other than power-of-2)
6830 stride (i.e. not a grouped access) like so:
6832 for (i = 0; i < n; i += stride)
6835 we generate a new induction variable and new accesses to
6836 form a new vector (or vectors, depending on ncopies):
6838 for (j = 0; ; j += VF*stride)
6840 tmp2 = array[j + stride];
6842 vectemp = {tmp1, tmp2, ...}
6845 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6846 build_int_cst (TREE_TYPE (stride_step
), vf
));
6848 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6850 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6851 loop
, &incr_gsi
, insert_after
,
6853 incr
= gsi_stmt (incr_gsi
);
6854 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6856 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6857 &stmts
, true, NULL_TREE
);
6859 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6861 prev_stmt_info
= NULL
;
6862 running_off
= offvar
;
6863 alias_off
= build_int_cst (ref_type
, 0);
6864 int nloads
= nunits
;
6866 tree ltype
= TREE_TYPE (vectype
);
6867 auto_vec
<tree
> dr_chain
;
6868 if (memory_access_type
== VMAT_STRIDED_SLP
)
6870 nloads
= nunits
/ group_size
;
6871 if (group_size
< nunits
)
6874 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6881 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6885 /* For SLP permutation support we need to load the whole group,
6886 not only the number of vector stmts the permutation result
6890 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6891 dr_chain
.create (ncopies
);
6894 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6897 unsigned HOST_WIDE_INT
6898 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6899 for (j
= 0; j
< ncopies
; j
++)
6902 vec_alloc (v
, nloads
);
6903 for (i
= 0; i
< nloads
; i
++)
6905 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6907 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6908 build2 (MEM_REF
, ltype
,
6909 running_off
, this_off
));
6910 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6912 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
6913 gimple_assign_lhs (new_stmt
));
6917 || group_el
== group_size
)
6919 tree newoff
= copy_ssa_name (running_off
);
6920 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6921 running_off
, stride_step
);
6922 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6924 running_off
= newoff
;
6930 tree vec_inv
= build_constructor (vectype
, v
);
6931 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6932 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6938 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6940 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6945 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6947 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6948 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6952 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6953 slp_node_instance
, false);
6959 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6960 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6961 /* For SLP vectorization we directly vectorize a subchain
6962 without permutation. */
6963 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6964 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6965 /* For BB vectorization always use the first stmt to base
6966 the data ref pointer on. */
6968 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6970 /* Check if the chain of loads is already vectorized. */
6971 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6972 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6973 ??? But we can only do so if there is exactly one
6974 as we have no way to get at the rest. Leave the CSE
6976 ??? With the group load eventually participating
6977 in multiple different permutations (having multiple
6978 slp nodes which refer to the same group) the CSE
6979 is even wrong code. See PR56270. */
6982 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6985 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6988 /* VEC_NUM is the number of vect stmts to be created for this group. */
6991 grouped_load
= false;
6992 /* For SLP permutation support we need to load the whole group,
6993 not only the number of vector stmts the permutation result
6996 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6998 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6999 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7002 vec_num
= group_size
;
7004 ref_type
= get_group_alias_ptr_type (first_stmt
);
7010 group_size
= vec_num
= 1;
7012 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7015 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7016 gcc_assert (alignment_support_scheme
);
7017 /* Targets with load-lane instructions must not require explicit
7019 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7020 || alignment_support_scheme
== dr_aligned
7021 || alignment_support_scheme
== dr_unaligned_supported
);
7023 /* In case the vectorization factor (VF) is bigger than the number
7024 of elements that we can fit in a vectype (nunits), we have to generate
7025 more than one vector stmt - i.e - we need to "unroll" the
7026 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7027 from one copy of the vector stmt to the next, in the field
7028 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7029 stages to find the correct vector defs to be used when vectorizing
7030 stmts that use the defs of the current stmt. The example below
7031 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7032 need to create 4 vectorized stmts):
7034 before vectorization:
7035 RELATED_STMT VEC_STMT
7039 step 1: vectorize stmt S1:
7040 We first create the vector stmt VS1_0, and, as usual, record a
7041 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7042 Next, we create the vector stmt VS1_1, and record a pointer to
7043 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7044 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7046 RELATED_STMT VEC_STMT
7047 VS1_0: vx0 = memref0 VS1_1 -
7048 VS1_1: vx1 = memref1 VS1_2 -
7049 VS1_2: vx2 = memref2 VS1_3 -
7050 VS1_3: vx3 = memref3 - -
7051 S1: x = load - VS1_0
7054 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7055 information we recorded in RELATED_STMT field is used to vectorize
7058 /* In case of interleaving (non-unit grouped access):
7065 Vectorized loads are created in the order of memory accesses
7066 starting from the access of the first stmt of the chain:
7069 VS2: vx1 = &base + vec_size*1
7070 VS3: vx3 = &base + vec_size*2
7071 VS4: vx4 = &base + vec_size*3
7073 Then permutation statements are generated:
7075 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7076 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7079 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7080 (the order of the data-refs in the output of vect_permute_load_chain
7081 corresponds to the order of scalar stmts in the interleaving chain - see
7082 the documentation of vect_permute_load_chain()).
7083 The generation of permutation stmts and recording them in
7084 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7086 In case of both multiple types and interleaving, the vector loads and
7087 permutation stmts above are created for every copy. The result vector
7088 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7089 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7091 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7092 on a target that supports unaligned accesses (dr_unaligned_supported)
7093 we generate the following code:
7097 p = p + indx * vectype_size;
7102 Otherwise, the data reference is potentially unaligned on a target that
7103 does not support unaligned accesses (dr_explicit_realign_optimized) -
7104 then generate the following code, in which the data in each iteration is
7105 obtained by two vector loads, one from the previous iteration, and one
7106 from the current iteration:
7108 msq_init = *(floor(p1))
7109 p2 = initial_addr + VS - 1;
7110 realignment_token = call target_builtin;
7113 p2 = p2 + indx * vectype_size
7115 vec_dest = realign_load (msq, lsq, realignment_token)
7120 /* If the misalignment remains the same throughout the execution of the
7121 loop, we can create the init_addr and permutation mask at the loop
7122 preheader. Otherwise, it needs to be created inside the loop.
7123 This can only occur when vectorizing memory accesses in the inner-loop
7124 nested within an outer-loop that is being vectorized. */
7126 if (nested_in_vect_loop
7127 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7128 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7130 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7131 compute_in_loop
= true;
7134 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7135 || alignment_support_scheme
== dr_explicit_realign
)
7136 && !compute_in_loop
)
7138 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7139 alignment_support_scheme
, NULL_TREE
,
7141 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7143 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7144 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7151 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7152 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7154 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7155 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7157 aggr_type
= vectype
;
7159 prev_stmt_info
= NULL
;
7160 for (j
= 0; j
< ncopies
; j
++)
7162 /* 1. Create the vector or array pointer update chain. */
7165 bool simd_lane_access_p
7166 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7167 if (simd_lane_access_p
7168 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7169 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7170 && integer_zerop (DR_OFFSET (first_dr
))
7171 && integer_zerop (DR_INIT (first_dr
))
7172 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7173 get_alias_set (TREE_TYPE (ref_type
)))
7174 && (alignment_support_scheme
== dr_aligned
7175 || alignment_support_scheme
== dr_unaligned_supported
))
7177 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7178 dataref_offset
= build_int_cst (ref_type
, 0);
7181 else if (first_stmt_for_drptr
7182 && first_stmt
!= first_stmt_for_drptr
)
7185 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7186 at_loop
, offset
, &dummy
, gsi
,
7187 &ptr_incr
, simd_lane_access_p
,
7188 &inv_p
, byte_offset
);
7189 /* Adjust the pointer by the difference to first_stmt. */
7190 data_reference_p ptrdr
7191 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7192 tree diff
= fold_convert (sizetype
,
7193 size_binop (MINUS_EXPR
,
7196 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7201 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7202 offset
, &dummy
, gsi
, &ptr_incr
,
7203 simd_lane_access_p
, &inv_p
,
7206 else if (dataref_offset
)
7207 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7208 TYPE_SIZE_UNIT (aggr_type
));
7210 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7211 TYPE_SIZE_UNIT (aggr_type
));
7213 if (grouped_load
|| slp_perm
)
7214 dr_chain
.create (vec_num
);
7216 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7220 vec_array
= create_vector_array (vectype
, vec_num
);
7223 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7224 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7225 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7226 gimple_call_set_lhs (new_stmt
, vec_array
);
7227 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7229 /* Extract each vector into an SSA_NAME. */
7230 for (i
= 0; i
< vec_num
; i
++)
7232 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7234 dr_chain
.quick_push (new_temp
);
7237 /* Record the mapping between SSA_NAMEs and statements. */
7238 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7242 for (i
= 0; i
< vec_num
; i
++)
7245 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7248 /* 2. Create the vector-load in the loop. */
7249 switch (alignment_support_scheme
)
7252 case dr_unaligned_supported
:
7254 unsigned int align
, misalign
;
7257 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7260 : build_int_cst (ref_type
, 0));
7261 align
= TYPE_ALIGN_UNIT (vectype
);
7262 if (alignment_support_scheme
== dr_aligned
)
7264 gcc_assert (aligned_access_p (first_dr
));
7267 else if (DR_MISALIGNMENT (first_dr
) == -1)
7269 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7270 align
= TYPE_ALIGN_UNIT (elem_type
);
7272 align
= (get_object_alignment (DR_REF (first_dr
))
7275 TREE_TYPE (data_ref
)
7276 = build_aligned_type (TREE_TYPE (data_ref
),
7277 align
* BITS_PER_UNIT
);
7281 TREE_TYPE (data_ref
)
7282 = build_aligned_type (TREE_TYPE (data_ref
),
7283 TYPE_ALIGN (elem_type
));
7284 misalign
= DR_MISALIGNMENT (first_dr
);
7286 if (dataref_offset
== NULL_TREE
7287 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7288 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7292 case dr_explicit_realign
:
7296 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7298 if (compute_in_loop
)
7299 msq
= vect_setup_realignment (first_stmt
, gsi
,
7301 dr_explicit_realign
,
7304 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7305 ptr
= copy_ssa_name (dataref_ptr
);
7307 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7308 new_stmt
= gimple_build_assign
7309 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7311 (TREE_TYPE (dataref_ptr
),
7312 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7313 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7315 = build2 (MEM_REF
, vectype
, ptr
,
7316 build_int_cst (ref_type
, 0));
7317 vec_dest
= vect_create_destination_var (scalar_dest
,
7319 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7320 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7321 gimple_assign_set_lhs (new_stmt
, new_temp
);
7322 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7323 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7324 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7327 bump
= size_binop (MULT_EXPR
, vs
,
7328 TYPE_SIZE_UNIT (elem_type
));
7329 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7330 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7331 new_stmt
= gimple_build_assign
7332 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7335 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7336 ptr
= copy_ssa_name (ptr
, new_stmt
);
7337 gimple_assign_set_lhs (new_stmt
, ptr
);
7338 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7340 = build2 (MEM_REF
, vectype
, ptr
,
7341 build_int_cst (ref_type
, 0));
7344 case dr_explicit_realign_optimized
:
7345 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7346 new_temp
= copy_ssa_name (dataref_ptr
);
7348 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7349 new_stmt
= gimple_build_assign
7350 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7352 (TREE_TYPE (dataref_ptr
),
7353 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7354 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7356 = build2 (MEM_REF
, vectype
, new_temp
,
7357 build_int_cst (ref_type
, 0));
7362 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7363 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7364 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7365 gimple_assign_set_lhs (new_stmt
, new_temp
);
7366 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7368 /* 3. Handle explicit realignment if necessary/supported.
7370 vec_dest = realign_load (msq, lsq, realignment_token) */
7371 if (alignment_support_scheme
== dr_explicit_realign_optimized
7372 || alignment_support_scheme
== dr_explicit_realign
)
7374 lsq
= gimple_assign_lhs (new_stmt
);
7375 if (!realignment_token
)
7376 realignment_token
= dataref_ptr
;
7377 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7378 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7379 msq
, lsq
, realignment_token
);
7380 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7381 gimple_assign_set_lhs (new_stmt
, new_temp
);
7382 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7384 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7387 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7388 add_phi_arg (phi
, lsq
,
7389 loop_latch_edge (containing_loop
),
7395 /* 4. Handle invariant-load. */
7396 if (inv_p
&& !bb_vinfo
)
7398 gcc_assert (!grouped_load
);
7399 /* If we have versioned for aliasing or the loop doesn't
7400 have any data dependencies that would preclude this,
7401 then we are sure this is a loop invariant load and
7402 thus we can insert it on the preheader edge. */
7403 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7404 && !nested_in_vect_loop
7405 && hoist_defs_of_uses (stmt
, loop
))
7407 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_NOTE
, vect_location
,
7410 "hoisting out of the vectorized "
7412 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7414 tree tem
= copy_ssa_name (scalar_dest
);
7415 gsi_insert_on_edge_immediate
7416 (loop_preheader_edge (loop
),
7417 gimple_build_assign (tem
,
7419 (gimple_assign_rhs1 (stmt
))));
7420 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7421 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7422 set_vinfo_for_stmt (new_stmt
,
7423 new_stmt_vec_info (new_stmt
, vinfo
));
7427 gimple_stmt_iterator gsi2
= *gsi
;
7429 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7431 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7435 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7437 tree perm_mask
= perm_mask_for_reverse (vectype
);
7438 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7439 perm_mask
, stmt
, gsi
);
7440 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7443 /* Collect vector loads and later create their permutation in
7444 vect_transform_grouped_load (). */
7445 if (grouped_load
|| slp_perm
)
7446 dr_chain
.quick_push (new_temp
);
7448 /* Store vector loads in the corresponding SLP_NODE. */
7449 if (slp
&& !slp_perm
)
7450 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7452 /* Bump the vector pointer to account for a gap or for excess
7453 elements loaded for a permuted SLP load. */
7454 if (group_gap_adj
!= 0)
7458 = wide_int_to_tree (sizetype
,
7459 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7460 group_gap_adj
, &ovf
));
7461 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7466 if (slp
&& !slp_perm
)
7471 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7472 slp_node_instance
, false))
7474 dr_chain
.release ();
7482 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7483 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7484 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7489 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7491 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7492 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7495 dr_chain
.release ();
7501 /* Function vect_is_simple_cond.
7504 LOOP - the loop that is being vectorized.
7505 COND - Condition that is checked for simple use.
7508 *COMP_VECTYPE - the vector type for the comparison.
7510 Returns whether a COND can be vectorized. Checks whether
7511 condition operands are supportable using vec_is_simple_use. */
7514 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7517 enum vect_def_type dt
;
7518 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7521 if (TREE_CODE (cond
) == SSA_NAME
7522 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7524 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7525 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7528 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7533 if (!COMPARISON_CLASS_P (cond
))
7536 lhs
= TREE_OPERAND (cond
, 0);
7537 rhs
= TREE_OPERAND (cond
, 1);
7539 if (TREE_CODE (lhs
) == SSA_NAME
)
7541 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7542 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7545 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7546 && TREE_CODE (lhs
) != FIXED_CST
)
7549 if (TREE_CODE (rhs
) == SSA_NAME
)
7551 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7552 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7555 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7556 && TREE_CODE (rhs
) != FIXED_CST
)
7559 if (vectype1
&& vectype2
7560 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7563 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7567 /* vectorizable_condition.
7569 Check if STMT is conditional modify expression that can be vectorized.
7570 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7571 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7574 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7575 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7576 else clause if it is 2).
7578 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7581 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7582 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7585 tree scalar_dest
= NULL_TREE
;
7586 tree vec_dest
= NULL_TREE
;
7587 tree cond_expr
, then_clause
, else_clause
;
7588 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7589 tree comp_vectype
= NULL_TREE
;
7590 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7591 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7594 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7595 enum vect_def_type dt
, dts
[4];
7597 enum tree_code code
;
7598 stmt_vec_info prev_stmt_info
= NULL
;
7600 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7601 vec
<tree
> vec_oprnds0
= vNULL
;
7602 vec
<tree
> vec_oprnds1
= vNULL
;
7603 vec
<tree
> vec_oprnds2
= vNULL
;
7604 vec
<tree
> vec_oprnds3
= vNULL
;
7606 bool masked
= false;
7608 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7611 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7613 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7616 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7617 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7621 /* FORNOW: not yet supported. */
7622 if (STMT_VINFO_LIVE_P (stmt_info
))
7624 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7626 "value used after loop.\n");
7631 /* Is vectorizable conditional operation? */
7632 if (!is_gimple_assign (stmt
))
7635 code
= gimple_assign_rhs_code (stmt
);
7637 if (code
!= COND_EXPR
)
7640 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7641 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7642 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7647 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7649 gcc_assert (ncopies
>= 1);
7650 if (reduc_index
&& ncopies
> 1)
7651 return false; /* FORNOW */
7653 cond_expr
= gimple_assign_rhs1 (stmt
);
7654 then_clause
= gimple_assign_rhs2 (stmt
);
7655 else_clause
= gimple_assign_rhs3 (stmt
);
7657 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7662 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7665 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7669 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7672 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7675 masked
= !COMPARISON_CLASS_P (cond_expr
);
7676 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7678 if (vec_cmp_type
== NULL_TREE
)
7683 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7684 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7691 vec_oprnds0
.create (1);
7692 vec_oprnds1
.create (1);
7693 vec_oprnds2
.create (1);
7694 vec_oprnds3
.create (1);
7698 scalar_dest
= gimple_assign_lhs (stmt
);
7699 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7701 /* Handle cond expr. */
7702 for (j
= 0; j
< ncopies
; j
++)
7704 gassign
*new_stmt
= NULL
;
7709 auto_vec
<tree
, 4> ops
;
7710 auto_vec
<vec
<tree
>, 4> vec_defs
;
7713 ops
.safe_push (cond_expr
);
7716 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7717 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7719 ops
.safe_push (then_clause
);
7720 ops
.safe_push (else_clause
);
7721 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7722 vec_oprnds3
= vec_defs
.pop ();
7723 vec_oprnds2
= vec_defs
.pop ();
7725 vec_oprnds1
= vec_defs
.pop ();
7726 vec_oprnds0
= vec_defs
.pop ();
7734 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7736 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7742 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7743 stmt
, comp_vectype
);
7744 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7745 loop_vinfo
, >emp
, &dts
[0]);
7748 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7749 stmt
, comp_vectype
);
7750 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7751 loop_vinfo
, >emp
, &dts
[1]);
7753 if (reduc_index
== 1)
7754 vec_then_clause
= reduc_def
;
7757 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7759 vect_is_simple_use (then_clause
, loop_vinfo
,
7762 if (reduc_index
== 2)
7763 vec_else_clause
= reduc_def
;
7766 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7768 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7775 = vect_get_vec_def_for_stmt_copy (dts
[0],
7776 vec_oprnds0
.pop ());
7779 = vect_get_vec_def_for_stmt_copy (dts
[1],
7780 vec_oprnds1
.pop ());
7782 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7783 vec_oprnds2
.pop ());
7784 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7785 vec_oprnds3
.pop ());
7790 vec_oprnds0
.quick_push (vec_cond_lhs
);
7792 vec_oprnds1
.quick_push (vec_cond_rhs
);
7793 vec_oprnds2
.quick_push (vec_then_clause
);
7794 vec_oprnds3
.quick_push (vec_else_clause
);
7797 /* Arguments are ready. Create the new vector stmt. */
7798 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7800 vec_then_clause
= vec_oprnds2
[i
];
7801 vec_else_clause
= vec_oprnds3
[i
];
7804 vec_compare
= vec_cond_lhs
;
7807 vec_cond_rhs
= vec_oprnds1
[i
];
7808 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7809 vec_cond_lhs
, vec_cond_rhs
);
7811 new_temp
= make_ssa_name (vec_dest
);
7812 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7813 vec_compare
, vec_then_clause
,
7815 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7817 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7824 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7826 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7828 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7831 vec_oprnds0
.release ();
7832 vec_oprnds1
.release ();
7833 vec_oprnds2
.release ();
7834 vec_oprnds3
.release ();
7839 /* vectorizable_comparison.
7841 Check if STMT is comparison expression that can be vectorized.
7842 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7843 comparison, put it in VEC_STMT, and insert it at GSI.
7845 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7848 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7849 gimple
**vec_stmt
, tree reduc_def
,
7852 tree lhs
, rhs1
, rhs2
;
7853 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7854 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7855 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7856 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7858 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7859 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7862 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7863 stmt_vec_info prev_stmt_info
= NULL
;
7865 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7866 vec
<tree
> vec_oprnds0
= vNULL
;
7867 vec
<tree
> vec_oprnds1
= vNULL
;
7872 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7875 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7878 mask_type
= vectype
;
7879 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7884 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7886 gcc_assert (ncopies
>= 1);
7887 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7888 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7892 if (STMT_VINFO_LIVE_P (stmt_info
))
7894 if (dump_enabled_p ())
7895 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7896 "value used after loop.\n");
7900 if (!is_gimple_assign (stmt
))
7903 code
= gimple_assign_rhs_code (stmt
);
7905 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7908 rhs1
= gimple_assign_rhs1 (stmt
);
7909 rhs2
= gimple_assign_rhs2 (stmt
);
7911 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7912 &dts
[0], &vectype1
))
7915 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7916 &dts
[1], &vectype2
))
7919 if (vectype1
&& vectype2
7920 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7923 vectype
= vectype1
? vectype1
: vectype2
;
7925 /* Invariant comparison. */
7928 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7929 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7932 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7935 /* Can't compare mask and non-mask types. */
7936 if (vectype1
&& vectype2
7937 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
7940 /* Boolean values may have another representation in vectors
7941 and therefore we prefer bit operations over comparison for
7942 them (which also works for scalar masks). We store opcodes
7943 to use in bitop1 and bitop2. Statement is vectorized as
7944 BITOP2 (rhs1 BITOP1 rhs2) or
7945 rhs1 BITOP2 (BITOP1 rhs2)
7946 depending on bitop1 and bitop2 arity. */
7947 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
7949 if (code
== GT_EXPR
)
7951 bitop1
= BIT_NOT_EXPR
;
7952 bitop2
= BIT_AND_EXPR
;
7954 else if (code
== GE_EXPR
)
7956 bitop1
= BIT_NOT_EXPR
;
7957 bitop2
= BIT_IOR_EXPR
;
7959 else if (code
== LT_EXPR
)
7961 bitop1
= BIT_NOT_EXPR
;
7962 bitop2
= BIT_AND_EXPR
;
7963 std::swap (rhs1
, rhs2
);
7964 std::swap (dts
[0], dts
[1]);
7966 else if (code
== LE_EXPR
)
7968 bitop1
= BIT_NOT_EXPR
;
7969 bitop2
= BIT_IOR_EXPR
;
7970 std::swap (rhs1
, rhs2
);
7971 std::swap (dts
[0], dts
[1]);
7975 bitop1
= BIT_XOR_EXPR
;
7976 if (code
== EQ_EXPR
)
7977 bitop2
= BIT_NOT_EXPR
;
7983 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7984 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
7986 if (bitop1
== NOP_EXPR
)
7987 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7990 machine_mode mode
= TYPE_MODE (vectype
);
7993 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
7994 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
7997 if (bitop2
!= NOP_EXPR
)
7999 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8000 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8010 vec_oprnds0
.create (1);
8011 vec_oprnds1
.create (1);
8015 lhs
= gimple_assign_lhs (stmt
);
8016 mask
= vect_create_destination_var (lhs
, mask_type
);
8018 /* Handle cmp expr. */
8019 for (j
= 0; j
< ncopies
; j
++)
8021 gassign
*new_stmt
= NULL
;
8026 auto_vec
<tree
, 2> ops
;
8027 auto_vec
<vec
<tree
>, 2> vec_defs
;
8029 ops
.safe_push (rhs1
);
8030 ops
.safe_push (rhs2
);
8031 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
8032 vec_oprnds1
= vec_defs
.pop ();
8033 vec_oprnds0
= vec_defs
.pop ();
8037 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8038 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8043 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8044 vec_oprnds0
.pop ());
8045 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8046 vec_oprnds1
.pop ());
8051 vec_oprnds0
.quick_push (vec_rhs1
);
8052 vec_oprnds1
.quick_push (vec_rhs2
);
8055 /* Arguments are ready. Create the new vector stmt. */
8056 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8058 vec_rhs2
= vec_oprnds1
[i
];
8060 new_temp
= make_ssa_name (mask
);
8061 if (bitop1
== NOP_EXPR
)
8063 new_stmt
= gimple_build_assign (new_temp
, code
,
8064 vec_rhs1
, vec_rhs2
);
8065 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8069 if (bitop1
== BIT_NOT_EXPR
)
8070 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8072 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8074 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8075 if (bitop2
!= NOP_EXPR
)
8077 tree res
= make_ssa_name (mask
);
8078 if (bitop2
== BIT_NOT_EXPR
)
8079 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8081 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8083 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8087 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8094 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8096 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8098 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8101 vec_oprnds0
.release ();
8102 vec_oprnds1
.release ();
8107 /* Make sure the statement is vectorizable. */
8110 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
8112 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8113 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8114 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8116 tree scalar_type
, vectype
;
8117 gimple
*pattern_stmt
;
8118 gimple_seq pattern_def_seq
;
8120 if (dump_enabled_p ())
8122 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8123 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8126 if (gimple_has_volatile_ops (stmt
))
8128 if (dump_enabled_p ())
8129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8130 "not vectorized: stmt has volatile operands\n");
8135 /* Skip stmts that do not need to be vectorized. In loops this is expected
8137 - the COND_EXPR which is the loop exit condition
8138 - any LABEL_EXPRs in the loop
8139 - computations that are used only for array indexing or loop control.
8140 In basic blocks we only analyze statements that are a part of some SLP
8141 instance, therefore, all the statements are relevant.
8143 Pattern statement needs to be analyzed instead of the original statement
8144 if the original statement is not relevant. Otherwise, we analyze both
8145 statements. In basic blocks we are called from some SLP instance
8146 traversal, don't analyze pattern stmts instead, the pattern stmts
8147 already will be part of SLP instance. */
8149 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8150 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8151 && !STMT_VINFO_LIVE_P (stmt_info
))
8153 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8155 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8156 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8158 /* Analyze PATTERN_STMT instead of the original stmt. */
8159 stmt
= pattern_stmt
;
8160 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8161 if (dump_enabled_p ())
8163 dump_printf_loc (MSG_NOTE
, vect_location
,
8164 "==> examining pattern statement: ");
8165 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8170 if (dump_enabled_p ())
8171 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8176 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8179 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8180 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8182 /* Analyze PATTERN_STMT too. */
8183 if (dump_enabled_p ())
8185 dump_printf_loc (MSG_NOTE
, vect_location
,
8186 "==> examining pattern statement: ");
8187 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8190 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8194 if (is_pattern_stmt_p (stmt_info
)
8196 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8198 gimple_stmt_iterator si
;
8200 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8202 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8203 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8204 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8206 /* Analyze def stmt of STMT if it's a pattern stmt. */
8207 if (dump_enabled_p ())
8209 dump_printf_loc (MSG_NOTE
, vect_location
,
8210 "==> examining pattern def statement: ");
8211 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8214 if (!vect_analyze_stmt (pattern_def_stmt
,
8215 need_to_vectorize
, node
))
8221 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8223 case vect_internal_def
:
8226 case vect_reduction_def
:
8227 case vect_nested_cycle
:
8228 gcc_assert (!bb_vinfo
8229 && (relevance
== vect_used_in_outer
8230 || relevance
== vect_used_in_outer_by_reduction
8231 || relevance
== vect_used_by_reduction
8232 || relevance
== vect_unused_in_scope
8233 || relevance
== vect_used_only_live
));
8236 case vect_induction_def
:
8237 case vect_constant_def
:
8238 case vect_external_def
:
8239 case vect_unknown_def_type
:
8246 gcc_assert (PURE_SLP_STMT (stmt_info
));
8248 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8249 if (dump_enabled_p ())
8251 dump_printf_loc (MSG_NOTE
, vect_location
,
8252 "get vectype for scalar type: ");
8253 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8254 dump_printf (MSG_NOTE
, "\n");
8257 vectype
= get_vectype_for_scalar_type (scalar_type
);
8260 if (dump_enabled_p ())
8262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8263 "not SLPed: unsupported data-type ");
8264 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8266 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8271 if (dump_enabled_p ())
8273 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8274 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8275 dump_printf (MSG_NOTE
, "\n");
8278 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8281 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8283 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8284 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8285 || (is_gimple_call (stmt
)
8286 && gimple_call_lhs (stmt
) == NULL_TREE
));
8287 *need_to_vectorize
= true;
8290 if (PURE_SLP_STMT (stmt_info
) && !node
)
8292 dump_printf_loc (MSG_NOTE
, vect_location
,
8293 "handled only by SLP analysis\n");
8299 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8300 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8301 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8302 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8303 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8304 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8305 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8306 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8307 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8308 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8309 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8310 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8311 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8315 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8316 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8317 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8318 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8319 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8320 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8321 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8322 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8323 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8324 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8329 if (dump_enabled_p ())
8331 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8332 "not vectorized: relevant stmt not ");
8333 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8334 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8343 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8344 need extra handling, except for vectorizable reductions. */
8345 if (STMT_VINFO_LIVE_P (stmt_info
)
8346 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8347 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8351 if (dump_enabled_p ())
8353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8354 "not vectorized: live stmt not ");
8355 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8356 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8366 /* Function vect_transform_stmt.
8368 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8371 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8372 bool *grouped_store
, slp_tree slp_node
,
8373 slp_instance slp_node_instance
)
8375 bool is_store
= false;
8376 gimple
*vec_stmt
= NULL
;
8377 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8380 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8381 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8383 switch (STMT_VINFO_TYPE (stmt_info
))
8385 case type_demotion_vec_info_type
:
8386 case type_promotion_vec_info_type
:
8387 case type_conversion_vec_info_type
:
8388 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8392 case induc_vec_info_type
:
8393 gcc_assert (!slp_node
);
8394 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8398 case shift_vec_info_type
:
8399 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8403 case op_vec_info_type
:
8404 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8408 case assignment_vec_info_type
:
8409 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8413 case load_vec_info_type
:
8414 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8419 case store_vec_info_type
:
8420 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8424 /* In case of interleaving, the whole chain is vectorized when the
8425 last store in the chain is reached. Store stmts before the last
8426 one are skipped, and there vec_stmt_info shouldn't be freed
8428 *grouped_store
= true;
8429 if (STMT_VINFO_VEC_STMT (stmt_info
))
8436 case condition_vec_info_type
:
8437 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8441 case comparison_vec_info_type
:
8442 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8446 case call_vec_info_type
:
8447 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8448 stmt
= gsi_stmt (*gsi
);
8449 if (is_gimple_call (stmt
)
8450 && gimple_call_internal_p (stmt
)
8451 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8455 case call_simd_clone_vec_info_type
:
8456 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8457 stmt
= gsi_stmt (*gsi
);
8460 case reduc_vec_info_type
:
8461 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8466 if (!STMT_VINFO_LIVE_P (stmt_info
))
8468 if (dump_enabled_p ())
8469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8470 "stmt not supported.\n");
8475 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8476 This would break hybrid SLP vectorization. */
8478 gcc_assert (!vec_stmt
8479 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8481 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8482 is being vectorized, but outside the immediately enclosing loop. */
8484 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8485 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8486 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8487 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8488 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8489 || STMT_VINFO_RELEVANT (stmt_info
) ==
8490 vect_used_in_outer_by_reduction
))
8492 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8493 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8494 imm_use_iterator imm_iter
;
8495 use_operand_p use_p
;
8499 if (dump_enabled_p ())
8500 dump_printf_loc (MSG_NOTE
, vect_location
,
8501 "Record the vdef for outer-loop vectorization.\n");
8503 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8504 (to be used when vectorizing outer-loop stmts that use the DEF of
8506 if (gimple_code (stmt
) == GIMPLE_PHI
)
8507 scalar_dest
= PHI_RESULT (stmt
);
8509 scalar_dest
= gimple_assign_lhs (stmt
);
8511 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8513 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8515 exit_phi
= USE_STMT (use_p
);
8516 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8521 /* Handle stmts whose DEF is used outside the loop-nest that is
8522 being vectorized. */
8527 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8529 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8530 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8531 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8533 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8539 else if (STMT_VINFO_LIVE_P (stmt_info
)
8540 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8542 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8547 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8553 /* Remove a group of stores (for SLP or interleaving), free their
8557 vect_remove_stores (gimple
*first_stmt
)
8559 gimple
*next
= first_stmt
;
8561 gimple_stmt_iterator next_si
;
8565 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8567 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8568 if (is_pattern_stmt_p (stmt_info
))
8569 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8570 /* Free the attached stmt_vec_info and remove the stmt. */
8571 next_si
= gsi_for_stmt (next
);
8572 unlink_stmt_vdef (next
);
8573 gsi_remove (&next_si
, true);
8574 release_defs (next
);
8575 free_stmt_vec_info (next
);
8581 /* Function new_stmt_vec_info.
8583 Create and initialize a new stmt_vec_info struct for STMT. */
8586 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8589 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8591 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8592 STMT_VINFO_STMT (res
) = stmt
;
8594 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8595 STMT_VINFO_LIVE_P (res
) = false;
8596 STMT_VINFO_VECTYPE (res
) = NULL
;
8597 STMT_VINFO_VEC_STMT (res
) = NULL
;
8598 STMT_VINFO_VECTORIZABLE (res
) = true;
8599 STMT_VINFO_IN_PATTERN_P (res
) = false;
8600 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8601 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8602 STMT_VINFO_DATA_REF (res
) = NULL
;
8603 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8604 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
8606 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8607 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8608 STMT_VINFO_DR_INIT (res
) = NULL
;
8609 STMT_VINFO_DR_STEP (res
) = NULL
;
8610 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8612 if (gimple_code (stmt
) == GIMPLE_PHI
8613 && is_loop_header_bb_p (gimple_bb (stmt
)))
8614 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8616 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8618 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8619 STMT_SLP_TYPE (res
) = loop_vect
;
8620 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8622 GROUP_FIRST_ELEMENT (res
) = NULL
;
8623 GROUP_NEXT_ELEMENT (res
) = NULL
;
8624 GROUP_SIZE (res
) = 0;
8625 GROUP_STORE_COUNT (res
) = 0;
8626 GROUP_GAP (res
) = 0;
8627 GROUP_SAME_DR_STMT (res
) = NULL
;
8633 /* Create a hash table for stmt_vec_info. */
8636 init_stmt_vec_info_vec (void)
8638 gcc_assert (!stmt_vec_info_vec
.exists ());
8639 stmt_vec_info_vec
.create (50);
8643 /* Free hash table for stmt_vec_info. */
8646 free_stmt_vec_info_vec (void)
8650 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8652 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8653 gcc_assert (stmt_vec_info_vec
.exists ());
8654 stmt_vec_info_vec
.release ();
8658 /* Free stmt vectorization related info. */
8661 free_stmt_vec_info (gimple
*stmt
)
8663 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8668 /* Check if this statement has a related "pattern stmt"
8669 (introduced by the vectorizer during the pattern recognition
8670 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8672 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8674 stmt_vec_info patt_info
8675 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8678 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8679 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8680 gimple_set_bb (patt_stmt
, NULL
);
8681 tree lhs
= gimple_get_lhs (patt_stmt
);
8682 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8683 release_ssa_name (lhs
);
8686 gimple_stmt_iterator si
;
8687 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8689 gimple
*seq_stmt
= gsi_stmt (si
);
8690 gimple_set_bb (seq_stmt
, NULL
);
8691 lhs
= gimple_get_lhs (seq_stmt
);
8692 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8693 release_ssa_name (lhs
);
8694 free_stmt_vec_info (seq_stmt
);
8697 free_stmt_vec_info (patt_stmt
);
8701 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8702 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8703 set_vinfo_for_stmt (stmt
, NULL
);
8708 /* Function get_vectype_for_scalar_type_and_size.
8710 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8714 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8716 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8717 machine_mode simd_mode
;
8718 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8725 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8726 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8729 /* For vector types of elements whose mode precision doesn't
8730 match their types precision we use a element type of mode
8731 precision. The vectorization routines will have to make sure
8732 they support the proper result truncation/extension.
8733 We also make sure to build vector types with INTEGER_TYPE
8734 component type only. */
8735 if (INTEGRAL_TYPE_P (scalar_type
)
8736 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8737 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8738 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8739 TYPE_UNSIGNED (scalar_type
));
8741 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8742 When the component mode passes the above test simply use a type
8743 corresponding to that mode. The theory is that any use that
8744 would cause problems with this will disable vectorization anyway. */
8745 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8746 && !INTEGRAL_TYPE_P (scalar_type
))
8747 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8749 /* We can't build a vector type of elements with alignment bigger than
8751 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8752 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8753 TYPE_UNSIGNED (scalar_type
));
8755 /* If we felt back to using the mode fail if there was
8756 no scalar type for it. */
8757 if (scalar_type
== NULL_TREE
)
8760 /* If no size was supplied use the mode the target prefers. Otherwise
8761 lookup a vector mode of the specified size. */
8763 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8765 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8766 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8770 vectype
= build_vector_type (scalar_type
, nunits
);
8772 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8773 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8779 unsigned int current_vector_size
;
8781 /* Function get_vectype_for_scalar_type.
8783 Returns the vector type corresponding to SCALAR_TYPE as supported
8787 get_vectype_for_scalar_type (tree scalar_type
)
8790 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8791 current_vector_size
);
8793 && current_vector_size
== 0)
8794 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8798 /* Function get_mask_type_for_scalar_type.
8800 Returns the mask type corresponding to a result of comparison
8801 of vectors of specified SCALAR_TYPE as supported by target. */
8804 get_mask_type_for_scalar_type (tree scalar_type
)
8806 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8811 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8812 current_vector_size
);
8815 /* Function get_same_sized_vectype
8817 Returns a vector type corresponding to SCALAR_TYPE of size
8818 VECTOR_TYPE if supported by the target. */
8821 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8823 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8824 return build_same_sized_truth_vector_type (vector_type
);
8826 return get_vectype_for_scalar_type_and_size
8827 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8830 /* Function vect_is_simple_use.
8833 VINFO - the vect info of the loop or basic block that is being vectorized.
8834 OPERAND - operand in the loop or bb.
8836 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8837 DT - the type of definition
8839 Returns whether a stmt with OPERAND can be vectorized.
8840 For loops, supportable operands are constants, loop invariants, and operands
8841 that are defined by the current iteration of the loop. Unsupportable
8842 operands are those that are defined by a previous iteration of the loop (as
8843 is the case in reduction/induction computations).
8844 For basic blocks, supportable operands are constants and bb invariants.
8845 For now, operands defined outside the basic block are not supported. */
8848 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8849 gimple
**def_stmt
, enum vect_def_type
*dt
)
8852 *dt
= vect_unknown_def_type
;
8854 if (dump_enabled_p ())
8856 dump_printf_loc (MSG_NOTE
, vect_location
,
8857 "vect_is_simple_use: operand ");
8858 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8859 dump_printf (MSG_NOTE
, "\n");
8862 if (CONSTANT_CLASS_P (operand
))
8864 *dt
= vect_constant_def
;
8868 if (is_gimple_min_invariant (operand
))
8870 *dt
= vect_external_def
;
8874 if (TREE_CODE (operand
) != SSA_NAME
)
8876 if (dump_enabled_p ())
8877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8882 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8884 *dt
= vect_external_def
;
8888 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8889 if (dump_enabled_p ())
8891 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8892 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8895 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8896 *dt
= vect_external_def
;
8899 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8900 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8903 if (dump_enabled_p ())
8905 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8908 case vect_uninitialized_def
:
8909 dump_printf (MSG_NOTE
, "uninitialized\n");
8911 case vect_constant_def
:
8912 dump_printf (MSG_NOTE
, "constant\n");
8914 case vect_external_def
:
8915 dump_printf (MSG_NOTE
, "external\n");
8917 case vect_internal_def
:
8918 dump_printf (MSG_NOTE
, "internal\n");
8920 case vect_induction_def
:
8921 dump_printf (MSG_NOTE
, "induction\n");
8923 case vect_reduction_def
:
8924 dump_printf (MSG_NOTE
, "reduction\n");
8926 case vect_double_reduction_def
:
8927 dump_printf (MSG_NOTE
, "double reduction\n");
8929 case vect_nested_cycle
:
8930 dump_printf (MSG_NOTE
, "nested cycle\n");
8932 case vect_unknown_def_type
:
8933 dump_printf (MSG_NOTE
, "unknown\n");
8938 if (*dt
== vect_unknown_def_type
)
8940 if (dump_enabled_p ())
8941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8942 "Unsupported pattern.\n");
8946 switch (gimple_code (*def_stmt
))
8953 if (dump_enabled_p ())
8954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8955 "unsupported defining stmt:\n");
8962 /* Function vect_is_simple_use.
8964 Same as vect_is_simple_use but also determines the vector operand
8965 type of OPERAND and stores it to *VECTYPE. If the definition of
8966 OPERAND is vect_uninitialized_def, vect_constant_def or
8967 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8968 is responsible to compute the best suited vector type for the
8972 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8973 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8975 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8978 /* Now get a vector type if the def is internal, otherwise supply
8979 NULL_TREE and leave it up to the caller to figure out a proper
8980 type for the use stmt. */
8981 if (*dt
== vect_internal_def
8982 || *dt
== vect_induction_def
8983 || *dt
== vect_reduction_def
8984 || *dt
== vect_double_reduction_def
8985 || *dt
== vect_nested_cycle
)
8987 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8989 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8990 && !STMT_VINFO_RELEVANT (stmt_info
)
8991 && !STMT_VINFO_LIVE_P (stmt_info
))
8992 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8994 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8995 gcc_assert (*vectype
!= NULL_TREE
);
8997 else if (*dt
== vect_uninitialized_def
8998 || *dt
== vect_constant_def
8999 || *dt
== vect_external_def
)
9000 *vectype
= NULL_TREE
;
9008 /* Function supportable_widening_operation
9010 Check whether an operation represented by the code CODE is a
9011 widening operation that is supported by the target platform in
9012 vector form (i.e., when operating on arguments of type VECTYPE_IN
9013 producing a result of type VECTYPE_OUT).
9015 Widening operations we currently support are NOP (CONVERT), FLOAT
9016 and WIDEN_MULT. This function checks if these operations are supported
9017 by the target platform either directly (via vector tree-codes), or via
9021 - CODE1 and CODE2 are codes of vector operations to be used when
9022 vectorizing the operation, if available.
9023 - MULTI_STEP_CVT determines the number of required intermediate steps in
9024 case of multi-step conversion (like char->short->int - in that case
9025 MULTI_STEP_CVT will be 1).
9026 - INTERM_TYPES contains the intermediate type required to perform the
9027 widening operation (short in the above example). */
9030 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9031 tree vectype_out
, tree vectype_in
,
9032 enum tree_code
*code1
, enum tree_code
*code2
,
9033 int *multi_step_cvt
,
9034 vec
<tree
> *interm_types
)
9036 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9037 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9038 struct loop
*vect_loop
= NULL
;
9039 machine_mode vec_mode
;
9040 enum insn_code icode1
, icode2
;
9041 optab optab1
, optab2
;
9042 tree vectype
= vectype_in
;
9043 tree wide_vectype
= vectype_out
;
9044 enum tree_code c1
, c2
;
9046 tree prev_type
, intermediate_type
;
9047 machine_mode intermediate_mode
, prev_mode
;
9048 optab optab3
, optab4
;
9050 *multi_step_cvt
= 0;
9052 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9056 case WIDEN_MULT_EXPR
:
9057 /* The result of a vectorized widening operation usually requires
9058 two vectors (because the widened results do not fit into one vector).
9059 The generated vector results would normally be expected to be
9060 generated in the same order as in the original scalar computation,
9061 i.e. if 8 results are generated in each vector iteration, they are
9062 to be organized as follows:
9063 vect1: [res1,res2,res3,res4],
9064 vect2: [res5,res6,res7,res8].
9066 However, in the special case that the result of the widening
9067 operation is used in a reduction computation only, the order doesn't
9068 matter (because when vectorizing a reduction we change the order of
9069 the computation). Some targets can take advantage of this and
9070 generate more efficient code. For example, targets like Altivec,
9071 that support widen_mult using a sequence of {mult_even,mult_odd}
9072 generate the following vectors:
9073 vect1: [res1,res3,res5,res7],
9074 vect2: [res2,res4,res6,res8].
9076 When vectorizing outer-loops, we execute the inner-loop sequentially
9077 (each vectorized inner-loop iteration contributes to VF outer-loop
9078 iterations in parallel). We therefore don't allow to change the
9079 order of the computation in the inner-loop during outer-loop
9081 /* TODO: Another case in which order doesn't *really* matter is when we
9082 widen and then contract again, e.g. (short)((int)x * y >> 8).
9083 Normally, pack_trunc performs an even/odd permute, whereas the
9084 repack from an even/odd expansion would be an interleave, which
9085 would be significantly simpler for e.g. AVX2. */
9086 /* In any case, in order to avoid duplicating the code below, recurse
9087 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9088 are properly set up for the caller. If we fail, we'll continue with
9089 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9091 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9092 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9093 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9094 stmt
, vectype_out
, vectype_in
,
9095 code1
, code2
, multi_step_cvt
,
9098 /* Elements in a vector with vect_used_by_reduction property cannot
9099 be reordered if the use chain with this property does not have the
9100 same operation. One such an example is s += a * b, where elements
9101 in a and b cannot be reordered. Here we check if the vector defined
9102 by STMT is only directly used in the reduction statement. */
9103 tree lhs
= gimple_assign_lhs (stmt
);
9104 use_operand_p dummy
;
9106 stmt_vec_info use_stmt_info
= NULL
;
9107 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9108 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9109 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9112 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9113 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9126 case VEC_WIDEN_MULT_EVEN_EXPR
:
9127 /* Support the recursion induced just above. */
9128 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9129 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9132 case WIDEN_LSHIFT_EXPR
:
9133 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9134 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9138 c1
= VEC_UNPACK_LO_EXPR
;
9139 c2
= VEC_UNPACK_HI_EXPR
;
9143 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9144 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9147 case FIX_TRUNC_EXPR
:
9148 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9149 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9150 computing the operation. */
9157 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9160 if (code
== FIX_TRUNC_EXPR
)
9162 /* The signedness is determined from output operand. */
9163 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9164 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9168 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9169 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9172 if (!optab1
|| !optab2
)
9175 vec_mode
= TYPE_MODE (vectype
);
9176 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9177 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9183 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9184 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9185 /* For scalar masks we may have different boolean
9186 vector types having the same QImode. Thus we
9187 add additional check for elements number. */
9188 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9189 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9190 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9192 /* Check if it's a multi-step conversion that can be done using intermediate
9195 prev_type
= vectype
;
9196 prev_mode
= vec_mode
;
9198 if (!CONVERT_EXPR_CODE_P (code
))
9201 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9202 intermediate steps in promotion sequence. We try
9203 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9205 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9206 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9208 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9209 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9212 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9213 current_vector_size
);
9214 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9219 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9220 TYPE_UNSIGNED (prev_type
));
9222 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9223 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9225 if (!optab3
|| !optab4
9226 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9227 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9228 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9229 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9230 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9231 == CODE_FOR_nothing
)
9232 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9233 == CODE_FOR_nothing
))
9236 interm_types
->quick_push (intermediate_type
);
9237 (*multi_step_cvt
)++;
9239 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9240 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9241 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9242 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9243 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9245 prev_type
= intermediate_type
;
9246 prev_mode
= intermediate_mode
;
9249 interm_types
->release ();
9254 /* Function supportable_narrowing_operation
9256 Check whether an operation represented by the code CODE is a
9257 narrowing operation that is supported by the target platform in
9258 vector form (i.e., when operating on arguments of type VECTYPE_IN
9259 and producing a result of type VECTYPE_OUT).
9261 Narrowing operations we currently support are NOP (CONVERT) and
9262 FIX_TRUNC. This function checks if these operations are supported by
9263 the target platform directly via vector tree-codes.
9266 - CODE1 is the code of a vector operation to be used when
9267 vectorizing the operation, if available.
9268 - MULTI_STEP_CVT determines the number of required intermediate steps in
9269 case of multi-step conversion (like int->short->char - in that case
9270 MULTI_STEP_CVT will be 1).
9271 - INTERM_TYPES contains the intermediate type required to perform the
9272 narrowing operation (short in the above example). */
9275 supportable_narrowing_operation (enum tree_code code
,
9276 tree vectype_out
, tree vectype_in
,
9277 enum tree_code
*code1
, int *multi_step_cvt
,
9278 vec
<tree
> *interm_types
)
9280 machine_mode vec_mode
;
9281 enum insn_code icode1
;
9282 optab optab1
, interm_optab
;
9283 tree vectype
= vectype_in
;
9284 tree narrow_vectype
= vectype_out
;
9286 tree intermediate_type
, prev_type
;
9287 machine_mode intermediate_mode
, prev_mode
;
9291 *multi_step_cvt
= 0;
9295 c1
= VEC_PACK_TRUNC_EXPR
;
9298 case FIX_TRUNC_EXPR
:
9299 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9303 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9304 tree code and optabs used for computing the operation. */
9311 if (code
== FIX_TRUNC_EXPR
)
9312 /* The signedness is determined from output operand. */
9313 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9315 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9320 vec_mode
= TYPE_MODE (vectype
);
9321 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9326 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9327 /* For scalar masks we may have different boolean
9328 vector types having the same QImode. Thus we
9329 add additional check for elements number. */
9330 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9331 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9332 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9334 /* Check if it's a multi-step conversion that can be done using intermediate
9336 prev_mode
= vec_mode
;
9337 prev_type
= vectype
;
9338 if (code
== FIX_TRUNC_EXPR
)
9339 uns
= TYPE_UNSIGNED (vectype_out
);
9341 uns
= TYPE_UNSIGNED (vectype
);
9343 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9344 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9345 costly than signed. */
9346 if (code
== FIX_TRUNC_EXPR
&& uns
)
9348 enum insn_code icode2
;
9351 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9353 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9354 if (interm_optab
!= unknown_optab
9355 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9356 && insn_data
[icode1
].operand
[0].mode
9357 == insn_data
[icode2
].operand
[0].mode
)
9360 optab1
= interm_optab
;
9365 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9366 intermediate steps in promotion sequence. We try
9367 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9368 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9369 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9371 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9372 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9375 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9376 current_vector_size
);
9377 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9382 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9384 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9387 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9388 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9389 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9390 == CODE_FOR_nothing
))
9393 interm_types
->quick_push (intermediate_type
);
9394 (*multi_step_cvt
)++;
9396 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9397 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9398 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9399 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9401 prev_mode
= intermediate_mode
;
9402 prev_type
= intermediate_type
;
9403 optab1
= interm_optab
;
9406 interm_types
->release ();