1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
184 enum vect_relevant relevant
, bool live_p
)
186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
187 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
188 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
189 gimple
*pattern_stmt
;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d: ", relevant
, live_p
);
195 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE
, vect_location
,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info
= vinfo_for_stmt (pattern_stmt
);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
217 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
218 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
222 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
223 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
224 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
226 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE
, vect_location
,
231 "already marked relevant/live.\n");
235 worklist
->safe_push (stmt
);
239 /* Function is_simple_and_all_uses_invariant
241 Return true if STMT is simple and all uses of it are invariant. */
244 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
250 if (!is_gimple_assign (stmt
))
253 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
255 enum vect_def_type dt
= vect_uninitialized_def
;
257 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
261 "use not simple.\n");
265 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
271 /* Function vect_stmt_relevant_p.
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
281 CHECKME: what other side effects would the vectorizer allow? */
284 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
285 enum vect_relevant
*relevant
, bool *live_p
)
287 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
289 imm_use_iterator imm_iter
;
293 *relevant
= vect_unused_in_scope
;
296 /* cond stmt other than loop exit cond. */
297 if (is_ctrl_stmt (stmt
)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
299 != loop_exit_ctrl_vec_info_type
)
300 *relevant
= vect_used_in_scope
;
302 /* changing memory. */
303 if (gimple_code (stmt
) != GIMPLE_PHI
)
304 if (gimple_vdef (stmt
)
305 && !gimple_clobber_p (stmt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE
, vect_location
,
309 "vec_stmt_relevant_p: stmt has vdefs.\n");
310 *relevant
= vect_used_in_scope
;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
316 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
318 basic_block bb
= gimple_bb (USE_STMT (use_p
));
319 if (!flow_bb_inside_loop_p (loop
, bb
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: used out of loop.\n");
325 if (is_gimple_debug (USE_STMT (use_p
)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
331 gcc_assert (bb
== single_exit (loop
)->dest
);
338 if (*live_p
&& *relevant
== vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE
, vect_location
,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant
= vect_used_only_live
;
347 return (*live_p
|| *relevant
);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info
))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt
))
383 if (is_gimple_call (stmt
)
384 && gimple_call_internal_p (stmt
))
385 switch (gimple_call_internal_fn (stmt
))
388 operand
= gimple_call_arg (stmt
, 3);
393 operand
= gimple_call_arg (stmt
, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
405 operand
= gimple_assign_rhs1 (stmt
);
406 if (TREE_CODE (operand
) != SSA_NAME
)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
445 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
448 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
449 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
450 stmt_vec_info dstmt_vinfo
;
451 basic_block bb
, def_bb
;
453 enum vect_def_type dt
;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
460 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt
|| gimple_nop_p (def_stmt
))
471 def_bb
= gimple_bb (def_stmt
);
472 if (!flow_bb_inside_loop_p (loop
, def_bb
))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
485 bb
= gimple_bb (stmt
);
486 if (gimple_code (stmt
) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
488 && gimple_code (def_stmt
) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
490 && bb
->loop_father
== def_bb
->loop_father
)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
496 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope
:
519 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
520 vect_used_in_scope
: vect_unused_in_scope
;
523 case vect_used_in_outer_by_reduction
:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
525 relevant
= vect_used_by_reduction
;
528 case vect_used_in_outer
:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
530 relevant
= vect_used_in_scope
;
533 case vect_used_in_scope
:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE
, vect_location
,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope
:
557 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
559 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
562 case vect_used_by_reduction
:
563 case vect_used_only_live
:
564 relevant
= vect_used_in_outer_by_reduction
;
567 case vect_used_in_scope
:
568 relevant
= vect_used_in_outer
;
576 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
600 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
601 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
602 unsigned int nbbs
= loop
->num_nodes
;
603 gimple_stmt_iterator si
;
606 stmt_vec_info stmt_vinfo
;
610 enum vect_relevant relevant
;
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "=== vect_mark_stmts_to_be_vectorized ===\n");
616 auto_vec
<gimple
*, 64> worklist
;
618 /* 1. Init worklist. */
619 for (i
= 0; i
< nbbs
; i
++)
622 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
631 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
632 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
634 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
636 stmt
= gsi_stmt (si
);
637 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
643 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
644 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
648 /* 2. Process_worklist */
649 while (worklist
.length () > 0)
654 stmt
= worklist
.pop ();
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 stmt_vinfo
= vinfo_for_stmt (stmt
);
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
689 "unsupported use of reduction.\n");
694 case vect_nested_cycle
:
695 if (relevant
!= vect_unused_in_scope
696 && relevant
!= vect_used_in_outer_by_reduction
697 && relevant
!= vect_used_in_outer
)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
701 "unsupported use of nested cycle.\n");
707 case vect_double_reduction_def
:
708 if (relevant
!= vect_unused_in_scope
709 && relevant
!= vect_used_by_reduction
710 && relevant
!= vect_used_only_live
)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
714 "unsupported use of double reduction.\n");
724 if (is_pattern_stmt_p (stmt_vinfo
))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt
))
731 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
732 tree op
= gimple_assign_rhs1 (stmt
);
735 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
737 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
738 relevant
, &worklist
, false)
739 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
740 relevant
, &worklist
, false))
744 for (; i
< gimple_num_ops (stmt
); i
++)
746 op
= gimple_op (stmt
, i
);
747 if (TREE_CODE (op
) == SSA_NAME
748 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
753 else if (is_gimple_call (stmt
))
755 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
757 tree arg
= gimple_call_arg (stmt
, i
);
758 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
765 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
767 tree op
= USE_FROM_PTR (use_p
);
768 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
776 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
778 if (!process_use (stmt
, off
, loop_vinfo
, relevant
, &worklist
, true))
781 } /* while worklist */
787 /* Function vect_model_simple_cost.
789 Models cost for simple operations, i.e. those that only emit ncopies of a
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
794 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
795 enum vect_def_type
*dt
,
796 stmt_vector_for_cost
*prologue_cost_vec
,
797 stmt_vector_for_cost
*body_cost_vec
)
800 int inside_cost
= 0, prologue_cost
= 0;
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info
))
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i
= 0; i
< 2; i
++)
808 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
809 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
810 stmt_info
, 0, vect_prologue
);
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
813 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
814 stmt_info
, 0, vect_body
);
816 if (dump_enabled_p ())
817 dump_printf_loc (MSG_NOTE
, vect_location
,
818 "vect_model_simple_cost: inside_cost = %d, "
819 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
823 /* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
829 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
830 enum vect_def_type
*dt
, int pwr
)
833 int inside_cost
= 0, prologue_cost
= 0;
834 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
835 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
836 void *target_cost_data
;
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info
))
843 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
845 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
847 for (i
= 0; i
< pwr
+ 1; i
++)
849 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
851 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
852 vec_promote_demote
, stmt_info
, 0,
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i
= 0; i
< 2; i
++)
858 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
859 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
860 stmt_info
, 0, vect_prologue
);
862 if (dump_enabled_p ())
863 dump_printf_loc (MSG_NOTE
, vect_location
,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
865 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
868 /* Function vect_cost_group_size
870 For grouped load or store, return the group_size only if it is the first
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
875 vect_cost_group_size (stmt_vec_info stmt_info
)
877 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
879 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
880 return GROUP_SIZE (stmt_info
);
886 /* Function vect_model_store_cost
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
892 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
893 bool store_lanes_p
, enum vect_def_type dt
,
895 stmt_vector_for_cost
*prologue_cost_vec
,
896 stmt_vector_for_cost
*body_cost_vec
)
899 unsigned int inside_cost
= 0, prologue_cost
= 0;
900 struct data_reference
*first_dr
;
903 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
904 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
905 stmt_info
, 0, vect_prologue
);
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
912 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
917 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
918 group_size
= vect_cost_group_size (stmt_info
);
921 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
923 /* Not a grouped access. */
927 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
930 /* We assume that the cost of a single store-lanes instruction is
931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
934 if (!store_lanes_p
&& group_size
> 1
935 && !STMT_VINFO_STRIDED_P (stmt_info
))
937 /* Uses a high and low interleave or shuffle operations for each
939 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
940 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
941 stmt_info
, 0, vect_body
);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE
, vect_location
,
945 "vect_model_store_cost: strided group_size = %d .\n",
949 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
950 /* Costs of the stores. */
951 if (STMT_VINFO_STRIDED_P (stmt_info
)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
954 /* N scalar stores plus extracting the elements. */
955 inside_cost
+= record_stmt_cost (body_cost_vec
,
956 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
957 scalar_store
, stmt_info
, 0, vect_body
);
960 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
962 if (STMT_VINFO_STRIDED_P (stmt_info
))
963 inside_cost
+= record_stmt_cost (body_cost_vec
,
964 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
965 vec_to_scalar
, stmt_info
, 0, vect_body
);
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE
, vect_location
,
969 "vect_model_store_cost: inside_cost = %d, "
970 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
974 /* Calculate cost of DR's memory access. */
976 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
977 unsigned int *inside_cost
,
978 stmt_vector_for_cost
*body_cost_vec
)
980 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
981 gimple
*stmt
= DR_STMT (dr
);
982 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
984 switch (alignment_support_scheme
)
988 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
989 vector_store
, stmt_info
, 0,
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE
, vect_location
,
994 "vect_model_store_cost: aligned.\n");
998 case dr_unaligned_supported
:
1000 /* Here, we assign an additional cost for the unaligned store. */
1001 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1002 unaligned_store
, stmt_info
,
1003 DR_MISALIGNMENT (dr
), vect_body
);
1004 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_NOTE
, vect_location
,
1006 "vect_model_store_cost: unaligned supported by "
1011 case dr_unaligned_unsupported
:
1013 *inside_cost
= VECT_MAX_COST
;
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1017 "vect_model_store_cost: unsupported access.\n");
1027 /* Function vect_model_load_cost
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
1031 accesses are supported for loads, we also account for the costs of the
1032 access scheme chosen. */
1035 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1036 bool load_lanes_p
, slp_tree slp_node
,
1037 stmt_vector_for_cost
*prologue_cost_vec
,
1038 stmt_vector_for_cost
*body_cost_vec
)
1042 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1043 unsigned int inside_cost
= 0, prologue_cost
= 0;
1045 /* Grouped accesses? */
1046 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1049 group_size
= vect_cost_group_size (stmt_info
);
1050 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1052 /* Not a grouped access. */
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p
&& group_size
> 1
1064 && !STMT_VINFO_STRIDED_P (stmt_info
))
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1069 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1070 stmt_info
, 0, vect_body
);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE
, vect_location
,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1078 /* The loads themselves. */
1079 if (STMT_VINFO_STRIDED_P (stmt_info
)
1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1084 inside_cost
+= record_stmt_cost (body_cost_vec
,
1085 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1086 scalar_load
, stmt_info
, 0, vect_body
);
1089 vect_get_load_cost (first_dr
, ncopies
,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1091 || group_size
> 1 || slp_node
),
1092 &inside_cost
, &prologue_cost
,
1093 prologue_cost_vec
, body_cost_vec
, true);
1094 if (STMT_VINFO_STRIDED_P (stmt_info
))
1095 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1096 stmt_info
, 0, vect_body
);
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE
, vect_location
,
1100 "vect_model_load_cost: inside_cost = %d, "
1101 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1105 /* Calculate cost of DR's memory access. */
1107 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1108 bool add_realign_cost
, unsigned int *inside_cost
,
1109 unsigned int *prologue_cost
,
1110 stmt_vector_for_cost
*prologue_cost_vec
,
1111 stmt_vector_for_cost
*body_cost_vec
,
1112 bool record_prologue_costs
)
1114 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1115 gimple
*stmt
= DR_STMT (dr
);
1116 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1118 switch (alignment_support_scheme
)
1122 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1123 stmt_info
, 0, vect_body
);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE
, vect_location
,
1127 "vect_model_load_cost: aligned.\n");
1131 case dr_unaligned_supported
:
1133 /* Here, we assign an additional cost for the unaligned load. */
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1135 unaligned_load
, stmt_info
,
1136 DR_MISALIGNMENT (dr
), vect_body
);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE
, vect_location
,
1140 "vect_model_load_cost: unaligned supported by "
1145 case dr_explicit_realign
:
1147 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1148 vector_load
, stmt_info
, 0, vect_body
);
1149 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1150 vec_perm
, stmt_info
, 0, vect_body
);
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
1155 if (targetm
.vectorize
.builtin_mask_for_load
)
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1157 stmt_info
, 0, vect_body
);
1159 if (dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE
, vect_location
,
1161 "vect_model_load_cost: explicit realign\n");
1165 case dr_explicit_realign_optimized
:
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: unaligned software "
1172 /* Unaligned software pipeline has a load of an address, an initial
1173 load, and possibly a mask operation to "prime" the loop. However,
1174 if this is an access in a group of loads, which provide grouped
1175 access, then the above cost should only be considered for one
1176 access in the group. Inside the loop, there is a load op
1177 and a realignment op. */
1179 if (add_realign_cost
&& record_prologue_costs
)
1181 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1182 vector_stmt
, stmt_info
,
1184 if (targetm
.vectorize
.builtin_mask_for_load
)
1185 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1186 vector_stmt
, stmt_info
,
1190 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1191 stmt_info
, 0, vect_body
);
1192 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1193 stmt_info
, 0, vect_body
);
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE
, vect_location
,
1197 "vect_model_load_cost: explicit realign optimized"
1203 case dr_unaligned_unsupported
:
1205 *inside_cost
= VECT_MAX_COST
;
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1209 "vect_model_load_cost: unsupported access.\n");
1218 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
1222 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1228 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1229 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1233 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1237 if (nested_in_vect_loop_p (loop
, stmt
))
1240 pe
= loop_preheader_edge (loop
);
1241 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1242 gcc_assert (!new_bb
);
1246 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1248 gimple_stmt_iterator gsi_bb_start
;
1250 gcc_assert (bb_vinfo
);
1251 bb
= BB_VINFO_BB (bb_vinfo
);
1252 gsi_bb_start
= gsi_after_labels (bb
);
1253 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1257 if (dump_enabled_p ())
1259 dump_printf_loc (MSG_NOTE
, vect_location
,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1265 /* Function vect_init_vector.
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1276 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1284 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1285 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type
))
1291 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1292 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1294 if (CONSTANT_CLASS_P (val
))
1295 val
= integer_zerop (val
) ? false_val
: true_val
;
1298 new_temp
= make_ssa_name (TREE_TYPE (type
));
1299 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1300 val
, true_val
, false_val
);
1301 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1305 else if (CONSTANT_CLASS_P (val
))
1306 val
= fold_convert (TREE_TYPE (type
), val
);
1309 new_temp
= make_ssa_name (TREE_TYPE (type
));
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1311 init_stmt
= gimple_build_assign (new_temp
,
1312 fold_build1 (VIEW_CONVERT_EXPR
,
1316 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1317 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1321 val
= build_vector_from_val (type
, val
);
1324 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1325 init_stmt
= gimple_build_assign (new_temp
, val
);
1326 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1330 /* Function vect_get_vec_def_for_operand_1.
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
1336 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1340 stmt_vec_info def_stmt_info
= NULL
;
1344 /* operand is a constant or a loop invariant. */
1345 case vect_constant_def
:
1346 case vect_external_def
:
1347 /* Code should use vect_get_vec_def_for_operand. */
1350 /* operand is defined inside the loop. */
1351 case vect_internal_def
:
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1356 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1357 /* Get vectorized pattern statement. */
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1361 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1363 gcc_assert (vec_stmt
);
1364 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1365 vec_oprnd
= PHI_RESULT (vec_stmt
);
1366 else if (is_gimple_call (vec_stmt
))
1367 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1369 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1373 /* operand is defined by a loop header phi - reduction */
1374 case vect_reduction_def
:
1375 case vect_double_reduction_def
:
1376 case vect_nested_cycle
:
1377 /* Code should use get_initial_def_for_reduction. */
1380 /* operand is defined by loop-header phi - induction. */
1381 case vect_induction_def
:
1383 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1387 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1388 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1389 vec_oprnd
= PHI_RESULT (vec_stmt
);
1391 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1401 /* Function vect_get_vec_def_for_operand.
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1414 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1417 enum vect_def_type dt
;
1419 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1420 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1422 if (dump_enabled_p ())
1424 dump_printf_loc (MSG_NOTE
, vect_location
,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1427 dump_printf (MSG_NOTE
, "\n");
1430 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1431 gcc_assert (is_simple_use
);
1432 if (def_stmt
&& dump_enabled_p ())
1434 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1438 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1440 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1444 vector_type
= vectype
;
1445 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1447 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1449 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1451 gcc_assert (vector_type
);
1452 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1455 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1459 /* Function vect_get_vec_def_for_stmt_copy
1461 Return a vector-def for an operand. This function is used when the
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
1464 copies of the vector-stmt are required. In this case the vector-def is
1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1466 of the stmt that defines VEC_OPRND.
1467 DT is the type of the vector def VEC_OPRND.
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
1472 more than one vector stmt to vectorize the scalar stmt. This situation
1473 arises when there are multiple data-types operated upon in the loop; the
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
1477 computing 'VF' results in each iteration). This function is called when
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
1486 VS1.3: vx.3 = memref3
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
1497 get the relevant vector-def for each operand of S2. For operand x it
1498 returns the vector-def 'vx.0'.
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1518 gimple
*vec_stmt_for_operand
;
1519 stmt_vec_info def_stmt_info
;
1521 /* Do nothing; can reuse same def. */
1522 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1525 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1526 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1527 gcc_assert (def_stmt_info
);
1528 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1529 gcc_assert (vec_stmt_for_operand
);
1530 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1531 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1533 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1538 /* Get vectorized definitions for the operands to create a copy of an original
1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1542 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1543 vec
<tree
> *vec_oprnds0
,
1544 vec
<tree
> *vec_oprnds1
)
1546 tree vec_oprnd
= vec_oprnds0
->pop ();
1548 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1549 vec_oprnds0
->quick_push (vec_oprnd
);
1551 if (vec_oprnds1
&& vec_oprnds1
->length ())
1553 vec_oprnd
= vec_oprnds1
->pop ();
1554 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1555 vec_oprnds1
->quick_push (vec_oprnd
);
1560 /* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
1565 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1566 vec
<tree
> *vec_oprnds0
,
1567 vec
<tree
> *vec_oprnds1
,
1568 slp_tree slp_node
, int reduc_index
)
1572 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1573 auto_vec
<tree
> ops (nops
);
1574 auto_vec
<vec
<tree
> > vec_defs (nops
);
1576 ops
.quick_push (op0
);
1578 ops
.quick_push (op1
);
1580 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1582 *vec_oprnds0
= vec_defs
[0];
1584 *vec_oprnds1
= vec_defs
[1];
1590 vec_oprnds0
->create (1);
1591 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1592 vec_oprnds0
->quick_push (vec_oprnd
);
1596 vec_oprnds1
->create (1);
1597 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1598 vec_oprnds1
->quick_push (vec_oprnd
);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1609 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1610 gimple_stmt_iterator
*gsi
)
1612 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1613 vec_info
*vinfo
= stmt_info
->vinfo
;
1615 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1617 if (!gsi_end_p (*gsi
)
1618 && gimple_has_mem_ops (vec_stmt
))
1620 gimple
*at_stmt
= gsi_stmt (*gsi
);
1621 tree vuse
= gimple_vuse (at_stmt
);
1622 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1624 tree vdef
= gimple_vdef (at_stmt
);
1625 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1631 && ((is_gimple_assign (vec_stmt
)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1633 || (is_gimple_call (vec_stmt
)
1634 && !(gimple_call_flags (vec_stmt
)
1635 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1637 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1638 gimple_set_vdef (vec_stmt
, new_vdef
);
1639 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1643 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1645 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1653 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1659 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1660 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1669 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1670 tree vectype_out
, tree vectype_in
)
1673 if (internal_fn_p (cfn
))
1674 ifn
= as_internal_fn (cfn
);
1676 ifn
= associated_internal_fn (fndecl
);
1677 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1679 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1680 if (info
.vectorizable
)
1682 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1683 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1684 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1685 OPTIMIZE_FOR_SPEED
))
1693 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1694 gimple_stmt_iterator
*);
1697 /* Function vectorizable_mask_load_store.
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1706 gimple
**vec_stmt
, slp_tree slp_node
)
1708 tree vec_dest
= NULL
;
1709 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1710 stmt_vec_info prev_stmt_info
;
1711 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1712 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1713 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1714 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1715 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1716 tree rhs_vectype
= NULL_TREE
;
1721 tree dataref_ptr
= NULL_TREE
;
1723 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1727 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1728 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1729 int gather_scale
= 1;
1730 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1734 enum vect_def_type dt
;
1736 if (slp_node
!= NULL
)
1739 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1740 gcc_assert (ncopies
>= 1);
1742 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1743 mask
= gimple_call_arg (stmt
, 2);
1745 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop
&& ncopies
> 1)
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1753 "multiple types in nested loop.");
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1760 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1764 if (!STMT_VINFO_DATA_REF (stmt_info
))
1767 elem_type
= TREE_TYPE (vectype
);
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1772 if (STMT_VINFO_STRIDED_P (stmt_info
))
1775 if (TREE_CODE (mask
) != SSA_NAME
)
1778 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1782 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1784 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
1790 tree rhs
= gimple_call_arg (stmt
, 3);
1791 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1798 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1799 &gather_off
, &gather_scale
);
1800 gcc_assert (gather_decl
);
1801 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1802 &gather_off_vectype
))
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1806 "gather index use not simple.");
1810 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1813 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1817 "masked gather with integer mask not supported.");
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info
)
1823 : DR_STEP (dr
), size_zero_node
) <= 0)
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1827 TYPE_MODE (mask_vectype
),
1830 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1833 if (!vec_stmt
) /* transformation not required. */
1835 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1837 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1840 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1848 tree vec_oprnd0
= NULL_TREE
, op
;
1849 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1850 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1851 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1852 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1853 tree mask_perm_mask
= NULL_TREE
;
1854 edge pe
= loop_preheader_edge (loop
);
1857 enum { NARROW
, NONE
, WIDEN
} modifier
;
1858 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1860 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1861 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1862 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1863 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1864 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1865 scaletype
= TREE_VALUE (arglist
);
1866 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1867 && types_compatible_p (srctype
, masktype
));
1869 if (nunits
== gather_off_nunits
)
1871 else if (nunits
== gather_off_nunits
/ 2)
1873 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1876 for (i
= 0; i
< gather_off_nunits
; ++i
)
1877 sel
[i
] = i
| nunits
;
1879 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1881 else if (nunits
== gather_off_nunits
* 2)
1883 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1886 for (i
= 0; i
< nunits
; ++i
)
1887 sel
[i
] = i
< gather_off_nunits
1888 ? i
: i
+ nunits
- gather_off_nunits
;
1890 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1892 for (i
= 0; i
< nunits
; ++i
)
1893 sel
[i
] = i
| gather_off_nunits
;
1894 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1899 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1901 ptr
= fold_convert (ptrtype
, gather_base
);
1902 if (!is_gimple_min_invariant (ptr
))
1904 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1905 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1906 gcc_assert (!new_bb
);
1909 scale
= build_int_cst (scaletype
, gather_scale
);
1911 prev_stmt_info
= NULL
;
1912 for (j
= 0; j
< ncopies
; ++j
)
1914 if (modifier
== WIDEN
&& (j
& 1))
1915 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1916 perm_mask
, stmt
, gsi
);
1919 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1922 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1924 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1927 == TYPE_VECTOR_SUBPARTS (idxtype
));
1928 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1929 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1931 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1932 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1936 if (mask_perm_mask
&& (j
& 1))
1937 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1938 mask_perm_mask
, stmt
, gsi
);
1942 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1945 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1946 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1950 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1953 == TYPE_VECTOR_SUBPARTS (masktype
));
1954 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1955 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1957 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1958 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1964 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1967 if (!useless_type_conversion_p (vectype
, rettype
))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1970 == TYPE_VECTOR_SUBPARTS (rettype
));
1971 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1972 gimple_call_set_lhs (new_stmt
, op
);
1973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1974 var
= make_ssa_name (vec_dest
);
1975 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1976 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1980 var
= make_ssa_name (vec_dest
, new_stmt
);
1981 gimple_call_set_lhs (new_stmt
, var
);
1984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1986 if (modifier
== NARROW
)
1993 var
= permute_vec_elements (prev_res
, var
,
1994 perm_mask
, stmt
, gsi
);
1995 new_stmt
= SSA_NAME_DEF_STMT (var
);
1998 if (prev_stmt_info
== NULL
)
1999 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2002 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2007 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2009 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2010 stmt_info
= vinfo_for_stmt (stmt
);
2012 tree lhs
= gimple_call_lhs (stmt
);
2013 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2014 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2015 set_vinfo_for_stmt (stmt
, NULL
);
2016 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2017 gsi_replace (gsi
, new_stmt
, true);
2022 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2023 prev_stmt_info
= NULL
;
2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2025 for (i
= 0; i
< ncopies
; i
++)
2027 unsigned align
, misalign
;
2031 tree rhs
= gimple_call_arg (stmt
, 3);
2032 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2033 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype
,
2036 TREE_TYPE (vec_rhs
)));
2037 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2038 NULL_TREE
, &dummy
, gsi
,
2039 &ptr_incr
, false, &inv_p
);
2040 gcc_assert (!inv_p
);
2044 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2045 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2046 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2047 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2048 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2049 TYPE_SIZE_UNIT (vectype
));
2052 align
= TYPE_ALIGN_UNIT (vectype
);
2053 if (aligned_access_p (dr
))
2055 else if (DR_MISALIGNMENT (dr
) == -1)
2057 align
= TYPE_ALIGN_UNIT (elem_type
);
2061 misalign
= DR_MISALIGNMENT (dr
);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2064 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2065 misalign
? misalign
& -misalign
: align
);
2067 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2068 ptr
, vec_mask
, vec_rhs
);
2069 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2071 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2074 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2079 tree vec_mask
= NULL_TREE
;
2080 prev_stmt_info
= NULL
;
2081 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2082 for (i
= 0; i
< ncopies
; i
++)
2084 unsigned align
, misalign
;
2088 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2089 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2090 NULL_TREE
, &dummy
, gsi
,
2091 &ptr_incr
, false, &inv_p
);
2092 gcc_assert (!inv_p
);
2096 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2097 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2098 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2099 TYPE_SIZE_UNIT (vectype
));
2102 align
= TYPE_ALIGN_UNIT (vectype
);
2103 if (aligned_access_p (dr
))
2105 else if (DR_MISALIGNMENT (dr
) == -1)
2107 align
= TYPE_ALIGN_UNIT (elem_type
);
2111 misalign
= DR_MISALIGNMENT (dr
);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2114 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2115 misalign
? misalign
& -misalign
: align
);
2117 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2119 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2120 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2122 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2125 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2135 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2136 stmt_info
= vinfo_for_stmt (stmt
);
2138 tree lhs
= gimple_call_lhs (stmt
);
2139 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2140 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2141 set_vinfo_for_stmt (stmt
, NULL
);
2142 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2143 gsi_replace (gsi
, new_stmt
, true);
2149 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2155 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2156 tree_code
*convert_code
)
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2163 int multi_step_cvt
= 0;
2164 auto_vec
<tree
, 8> interm_types
;
2165 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2166 &code
, &multi_step_cvt
,
2171 *convert_code
= code
;
2175 /* Function vectorizable_call.
2177 Check if GS performs a function call that can be vectorized.
2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2183 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2190 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2191 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2192 tree vectype_out
, vectype_in
;
2195 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2196 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2197 vec_info
*vinfo
= stmt_info
->vinfo
;
2198 tree fndecl
, new_temp
, rhs_type
;
2200 enum vect_def_type dt
[3]
2201 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2202 gimple
*new_stmt
= NULL
;
2204 vec
<tree
> vargs
= vNULL
;
2205 enum { NARROW
, NONE
, WIDEN
} modifier
;
2209 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2212 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2216 /* Is GS a vectorizable call? */
2217 stmt
= dyn_cast
<gcall
*> (gs
);
2221 if (gimple_call_internal_p (stmt
)
2222 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2224 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2227 if (gimple_call_lhs (stmt
) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2231 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2233 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2235 /* Process function arguments. */
2236 rhs_type
= NULL_TREE
;
2237 vectype_in
= NULL_TREE
;
2238 nargs
= gimple_call_num_args (stmt
);
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs
== 0 || nargs
> 3)
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt
)
2248 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2251 rhs_type
= unsigned_type_node
;
2254 for (i
= 0; i
< nargs
; i
++)
2258 op
= gimple_call_arg (stmt
, i
);
2260 /* We can only handle calls with arguments of the same type. */
2262 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2266 "argument types differ.\n");
2270 rhs_type
= TREE_TYPE (op
);
2272 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2276 "use not simple.\n");
2281 vectype_in
= opvectype
;
2283 && opvectype
!= vectype_in
)
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2287 "argument vector types differ.\n");
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
2294 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2296 gcc_assert (vectype_in
);
2299 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2304 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2311 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2312 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2313 if (nunits_in
== nunits_out
/ 2)
2315 else if (nunits_out
== nunits_in
)
2317 else if (nunits_out
== nunits_in
/ 2)
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt
))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2327 "function reads from or writes to memory.\n");
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
2336 internal_fn ifn
= IFN_LAST
;
2337 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2338 tree callee
= gimple_call_fndecl (stmt
);
2340 /* First try using an internal function. */
2341 tree_code convert_code
= ERROR_MARK
;
2343 && (modifier
== NONE
2344 || (modifier
== NARROW
2345 && simple_integer_narrowing (vectype_out
, vectype_in
,
2347 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn
== IFN_LAST
)
2353 if (cfn
!= CFN_LAST
)
2354 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2355 (cfn
, vectype_out
, vectype_in
);
2357 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2358 (callee
, vectype_out
, vectype_in
);
2361 if (ifn
== IFN_LAST
&& !fndecl
)
2363 if (cfn
== CFN_GOMP_SIMD_LANE
2366 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs
== 0);
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "function is not vectorizable.\n");
2386 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2387 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2389 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies
>= 1);
2395 if (!vec_stmt
) /* transformation not required. */
2397 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2401 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2402 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2403 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2404 vec_promote_demote
, stmt_info
, 0, vect_body
);
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2415 scalar_dest
= gimple_call_lhs (stmt
);
2416 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2418 prev_stmt_info
= NULL
;
2419 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2421 tree prev_res
= NULL_TREE
;
2422 for (j
= 0; j
< ncopies
; ++j
)
2424 /* Build argument list for the vectorized call. */
2426 vargs
.create (nargs
);
2432 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2433 vec
<tree
> vec_oprnds0
;
2435 for (i
= 0; i
< nargs
; i
++)
2436 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2437 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2438 vec_oprnds0
= vec_defs
[0];
2440 /* Arguments are ready. Create the new vector stmt. */
2441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2444 for (k
= 0; k
< nargs
; k
++)
2446 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2447 vargs
[k
] = vec_oprndsk
[i
];
2449 if (modifier
== NARROW
)
2451 tree half_res
= make_ssa_name (vectype_in
);
2452 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2453 gimple_call_set_lhs (new_stmt
, half_res
);
2454 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2457 prev_res
= half_res
;
2460 new_temp
= make_ssa_name (vec_dest
);
2461 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2462 prev_res
, half_res
);
2466 if (ifn
!= IFN_LAST
)
2467 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2469 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2470 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2471 gimple_call_set_lhs (new_stmt
, new_temp
);
2473 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2474 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2477 for (i
= 0; i
< nargs
; i
++)
2479 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2480 vec_oprndsi
.release ();
2485 for (i
= 0; i
< nargs
; i
++)
2487 op
= gimple_call_arg (stmt
, i
);
2490 = vect_get_vec_def_for_operand (op
, stmt
);
2493 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2495 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2498 vargs
.quick_push (vec_oprnd0
);
2501 if (gimple_call_internal_p (stmt
)
2502 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2504 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2506 for (k
= 0; k
< nunits_out
; ++k
)
2507 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2508 tree cst
= build_vector (vectype_out
, v
);
2510 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2511 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2512 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2513 new_temp
= make_ssa_name (vec_dest
);
2514 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2516 else if (modifier
== NARROW
)
2518 tree half_res
= make_ssa_name (vectype_in
);
2519 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2520 gimple_call_set_lhs (new_stmt
, half_res
);
2521 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2524 prev_res
= half_res
;
2527 new_temp
= make_ssa_name (vec_dest
);
2528 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2529 prev_res
, half_res
);
2533 if (ifn
!= IFN_LAST
)
2534 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2536 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2537 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2538 gimple_call_set_lhs (new_stmt
, new_temp
);
2540 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2542 if (j
== (modifier
== NARROW
? 1 : 0))
2543 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2547 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2550 else if (modifier
== NARROW
)
2552 for (j
= 0; j
< ncopies
; ++j
)
2554 /* Build argument list for the vectorized call. */
2556 vargs
.create (nargs
* 2);
2562 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2563 vec
<tree
> vec_oprnds0
;
2565 for (i
= 0; i
< nargs
; i
++)
2566 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2567 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2568 vec_oprnds0
= vec_defs
[0];
2570 /* Arguments are ready. Create the new vector stmt. */
2571 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2575 for (k
= 0; k
< nargs
; k
++)
2577 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2578 vargs
.quick_push (vec_oprndsk
[i
]);
2579 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2581 if (ifn
!= IFN_LAST
)
2582 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2584 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2585 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2586 gimple_call_set_lhs (new_stmt
, new_temp
);
2587 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2588 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2591 for (i
= 0; i
< nargs
; i
++)
2593 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2594 vec_oprndsi
.release ();
2599 for (i
= 0; i
< nargs
; i
++)
2601 op
= gimple_call_arg (stmt
, i
);
2605 = vect_get_vec_def_for_operand (op
, stmt
);
2607 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2611 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2613 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2615 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2618 vargs
.quick_push (vec_oprnd0
);
2619 vargs
.quick_push (vec_oprnd1
);
2622 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2623 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2624 gimple_call_set_lhs (new_stmt
, new_temp
);
2625 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2628 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2632 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2635 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2638 /* No current target implements this case. */
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2651 type
= TREE_TYPE (scalar_dest
);
2652 if (is_pattern_stmt_p (stmt_info
))
2653 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2655 lhs
= gimple_call_lhs (stmt
);
2657 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2658 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2659 set_vinfo_for_stmt (stmt
, NULL
);
2660 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2661 gsi_replace (gsi
, new_stmt
, false);
2667 struct simd_call_arg_info
2671 enum vect_def_type dt
;
2672 HOST_WIDE_INT linear_step
;
2674 bool simd_lane_linear
;
2677 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2682 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2683 struct simd_call_arg_info
*arginfo
)
2685 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2687 if (!is_gimple_assign (def_stmt
)
2688 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2692 tree base
= gimple_assign_rhs1 (def_stmt
);
2693 HOST_WIDE_INT linear_step
= 0;
2694 tree v
= gimple_assign_rhs2 (def_stmt
);
2695 while (TREE_CODE (v
) == SSA_NAME
)
2698 def_stmt
= SSA_NAME_DEF_STMT (v
);
2699 if (is_gimple_assign (def_stmt
))
2700 switch (gimple_assign_rhs_code (def_stmt
))
2703 t
= gimple_assign_rhs2 (def_stmt
);
2704 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2706 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2707 v
= gimple_assign_rhs1 (def_stmt
);
2710 t
= gimple_assign_rhs2 (def_stmt
);
2711 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2713 linear_step
= tree_to_shwi (t
);
2714 v
= gimple_assign_rhs1 (def_stmt
);
2717 t
= gimple_assign_rhs1 (def_stmt
);
2718 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v
))
2720 < TYPE_PRECISION (TREE_TYPE (t
))))
2729 else if (is_gimple_call (def_stmt
)
2730 && gimple_call_internal_p (def_stmt
)
2731 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2733 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2739 arginfo
->linear_step
= linear_step
;
2741 arginfo
->simd_lane_linear
= true;
2747 /* Function vectorizable_simd_clone_call.
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2756 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2757 gimple
**vec_stmt
, slp_tree slp_node
)
2762 tree vec_oprnd0
= NULL_TREE
;
2763 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2765 unsigned int nunits
;
2766 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2767 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2768 vec_info
*vinfo
= stmt_info
->vinfo
;
2769 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2770 tree fndecl
, new_temp
;
2772 gimple
*new_stmt
= NULL
;
2774 auto_vec
<simd_call_arg_info
> arginfo
;
2775 vec
<tree
> vargs
= vNULL
;
2777 tree lhs
, rtype
, ratype
;
2778 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt
))
2784 fndecl
= gimple_call_fndecl (stmt
);
2785 if (fndecl
== NULL_TREE
)
2788 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2789 if (node
== NULL
|| node
->simd_clones
== NULL
)
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2795 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2799 if (gimple_call_lhs (stmt
)
2800 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2805 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2807 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2814 /* Process function arguments. */
2815 nargs
= gimple_call_num_args (stmt
);
2817 /* Bail out if the function has zero arguments. */
2821 arginfo
.reserve (nargs
, true);
2823 for (i
= 0; i
< nargs
; i
++)
2825 simd_call_arg_info thisarginfo
;
2828 thisarginfo
.linear_step
= 0;
2829 thisarginfo
.align
= 0;
2830 thisarginfo
.op
= NULL_TREE
;
2831 thisarginfo
.simd_lane_linear
= false;
2833 op
= gimple_call_arg (stmt
, i
);
2834 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2835 &thisarginfo
.vectype
)
2836 || thisarginfo
.dt
== vect_uninitialized_def
)
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2840 "use not simple.\n");
2844 if (thisarginfo
.dt
== vect_constant_def
2845 || thisarginfo
.dt
== vect_external_def
)
2846 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2848 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2852 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2855 gcc_assert (vec_stmt
);
2856 thisarginfo
.linear_step
2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2860 thisarginfo
.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2862 == boolean_true_node
);
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2865 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2866 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2868 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2869 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2870 tree opt
= TREE_TYPE (thisarginfo
.op
);
2871 bias
= fold_convert (TREE_TYPE (step
), bias
);
2872 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2874 = fold_build2 (POINTER_TYPE_P (opt
)
2875 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2876 thisarginfo
.op
, bias
);
2880 && thisarginfo
.dt
!= vect_constant_def
2881 && thisarginfo
.dt
!= vect_external_def
2883 && TREE_CODE (op
) == SSA_NAME
2884 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2886 && tree_fits_shwi_p (iv
.step
))
2888 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2889 thisarginfo
.op
= iv
.base
;
2891 else if ((thisarginfo
.dt
== vect_constant_def
2892 || thisarginfo
.dt
== vect_external_def
)
2893 && POINTER_TYPE_P (TREE_TYPE (op
)))
2894 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2897 if (POINTER_TYPE_P (TREE_TYPE (op
))
2898 && !thisarginfo
.linear_step
2900 && thisarginfo
.dt
!= vect_constant_def
2901 && thisarginfo
.dt
!= vect_external_def
2904 && TREE_CODE (op
) == SSA_NAME
)
2905 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2907 arginfo
.quick_push (thisarginfo
);
2910 unsigned int badness
= 0;
2911 struct cgraph_node
*bestn
= NULL
;
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2913 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2915 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2916 n
= n
->simdclone
->next_clone
)
2918 unsigned int this_badness
= 0;
2919 if (n
->simdclone
->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2921 || n
->simdclone
->nargs
!= nargs
)
2923 if (n
->simdclone
->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2925 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2926 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2927 if (n
->simdclone
->inbranch
)
2928 this_badness
+= 2048;
2929 int target_badness
= targetm
.simd_clone
.usable (n
);
2930 if (target_badness
< 0)
2932 this_badness
+= target_badness
* 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n
->simdclone
->inbranch
)
2936 for (i
= 0; i
< nargs
; i
++)
2938 switch (n
->simdclone
->args
[i
].arg_type
)
2940 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2941 if (!useless_type_conversion_p
2942 (n
->simdclone
->args
[i
].orig_type
,
2943 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2945 else if (arginfo
[i
].dt
== vect_constant_def
2946 || arginfo
[i
].dt
== vect_external_def
2947 || arginfo
[i
].linear_step
)
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2951 if (arginfo
[i
].dt
!= vect_constant_def
2952 && arginfo
[i
].dt
!= vect_external_def
)
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2957 if (arginfo
[i
].dt
== vect_constant_def
2958 || arginfo
[i
].dt
== vect_external_def
2959 || (arginfo
[i
].linear_step
2960 != n
->simdclone
->args
[i
].linear_step
))
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2972 case SIMD_CLONE_ARG_TYPE_MASK
:
2975 if (i
== (size_t) -1)
2977 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2982 if (arginfo
[i
].align
)
2983 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2984 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2986 if (i
== (size_t) -1)
2988 if (bestn
== NULL
|| this_badness
< badness
)
2991 badness
= this_badness
;
2998 for (i
= 0; i
< nargs
; i
++)
2999 if ((arginfo
[i
].dt
== vect_constant_def
3000 || arginfo
[i
].dt
== vect_external_def
)
3001 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3006 if (arginfo
[i
].vectype
== NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3008 > bestn
->simdclone
->simdlen
))
3012 fndecl
= bestn
->decl
;
3013 nunits
= bestn
->simdclone
->simdlen
;
3014 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3020 && gimple_vuse (stmt
))
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies
>= 1);
3027 if (!vec_stmt
) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3030 for (i
= 0; i
< nargs
; i
++)
3031 if ((bestn
->simdclone
->args
[i
].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3033 || (bestn
->simdclone
->args
[i
].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3039 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3040 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3041 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3043 tree sll
= arginfo
[i
].simd_lane_linear
3044 ? boolean_true_node
: boolean_false_node
;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3047 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3061 scalar_dest
= gimple_call_lhs (stmt
);
3062 vec_dest
= NULL_TREE
;
3067 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3068 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3069 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3072 rtype
= TREE_TYPE (ratype
);
3076 prev_stmt_info
= NULL
;
3077 for (j
= 0; j
< ncopies
; ++j
)
3079 /* Build argument list for the vectorized call. */
3081 vargs
.create (nargs
);
3085 for (i
= 0; i
< nargs
; i
++)
3087 unsigned int k
, l
, m
, o
;
3089 op
= gimple_call_arg (stmt
, i
);
3090 switch (bestn
->simdclone
->args
[i
].arg_type
)
3092 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3093 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3094 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3095 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3097 if (TYPE_VECTOR_SUBPARTS (atype
)
3098 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3100 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3101 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3102 / TYPE_VECTOR_SUBPARTS (atype
));
3103 gcc_assert ((k
& (k
- 1)) == 0);
3106 = vect_get_vec_def_for_operand (op
, stmt
);
3109 vec_oprnd0
= arginfo
[i
].op
;
3110 if ((m
& (k
- 1)) == 0)
3112 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3115 arginfo
[i
].op
= vec_oprnd0
;
3117 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3119 bitsize_int ((m
& (k
- 1)) * prec
));
3121 = gimple_build_assign (make_ssa_name (atype
),
3123 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3124 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3128 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3129 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3130 gcc_assert ((k
& (k
- 1)) == 0);
3131 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3133 vec_alloc (ctor_elts
, k
);
3136 for (l
= 0; l
< k
; l
++)
3138 if (m
== 0 && l
== 0)
3140 = vect_get_vec_def_for_operand (op
, stmt
);
3143 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3145 arginfo
[i
].op
= vec_oprnd0
;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3152 vargs
.safe_push (vec_oprnd0
);
3155 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3157 = gimple_build_assign (make_ssa_name (atype
),
3159 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3160 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3166 vargs
.safe_push (op
);
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3174 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3179 edge pe
= loop_preheader_edge (loop
);
3180 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3181 gcc_assert (!new_bb
);
3183 if (arginfo
[i
].simd_lane_linear
)
3185 vargs
.safe_push (arginfo
[i
].op
);
3188 tree phi_res
= copy_ssa_name (op
);
3189 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3190 set_vinfo_for_stmt (new_phi
,
3191 new_stmt_vec_info (new_phi
, loop_vinfo
));
3192 add_phi_arg (new_phi
, arginfo
[i
].op
,
3193 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3195 = POINTER_TYPE_P (TREE_TYPE (op
))
3196 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3197 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3198 ? sizetype
: TREE_TYPE (op
);
3200 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3202 tree tcst
= wide_int_to_tree (type
, cst
);
3203 tree phi_arg
= copy_ssa_name (op
);
3205 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3206 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3207 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3208 set_vinfo_for_stmt (new_stmt
,
3209 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3210 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3212 arginfo
[i
].op
= phi_res
;
3213 vargs
.safe_push (phi_res
);
3218 = POINTER_TYPE_P (TREE_TYPE (op
))
3219 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3220 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3221 ? sizetype
: TREE_TYPE (op
);
3223 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3225 tree tcst
= wide_int_to_tree (type
, cst
);
3226 new_temp
= make_ssa_name (TREE_TYPE (op
));
3227 new_stmt
= gimple_build_assign (new_temp
, code
,
3228 arginfo
[i
].op
, tcst
);
3229 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3230 vargs
.safe_push (new_temp
);
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3244 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3247 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3249 new_temp
= create_tmp_var (ratype
);
3250 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3251 == TYPE_VECTOR_SUBPARTS (rtype
))
3252 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3254 new_temp
= make_ssa_name (rtype
, new_stmt
);
3255 gimple_call_set_lhs (new_stmt
, new_temp
);
3257 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3261 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3264 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3265 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3266 gcc_assert ((k
& (k
- 1)) == 0);
3267 for (l
= 0; l
< k
; l
++)
3272 t
= build_fold_addr_expr (new_temp
);
3273 t
= build2 (MEM_REF
, vectype
, t
,
3274 build_int_cst (TREE_TYPE (t
),
3275 l
* prec
/ BITS_PER_UNIT
));
3278 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3279 size_int (prec
), bitsize_int (l
* prec
));
3281 = gimple_build_assign (make_ssa_name (vectype
), t
);
3282 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3283 if (j
== 0 && l
== 0)
3284 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3288 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3293 tree clobber
= build_constructor (ratype
, NULL
);
3294 TREE_THIS_VOLATILE (clobber
) = 1;
3295 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3296 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3300 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3302 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3303 / TYPE_VECTOR_SUBPARTS (rtype
));
3304 gcc_assert ((k
& (k
- 1)) == 0);
3305 if ((j
& (k
- 1)) == 0)
3306 vec_alloc (ret_ctor_elts
, k
);
3309 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3310 for (m
= 0; m
< o
; m
++)
3312 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3313 size_int (m
), NULL_TREE
, NULL_TREE
);
3315 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3316 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3318 gimple_assign_lhs (new_stmt
));
3320 tree clobber
= build_constructor (ratype
, NULL
);
3321 TREE_THIS_VOLATILE (clobber
) = 1;
3322 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3323 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3327 if ((j
& (k
- 1)) != k
- 1)
3329 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3331 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3332 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3334 if ((unsigned) j
== k
- 1)
3335 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3339 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3344 tree t
= build_fold_addr_expr (new_temp
);
3345 t
= build2 (MEM_REF
, vectype
, t
,
3346 build_int_cst (TREE_TYPE (t
), 0));
3348 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3349 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3350 tree clobber
= build_constructor (ratype
, NULL
);
3351 TREE_THIS_VOLATILE (clobber
) = 1;
3352 vect_finish_stmt_generation (stmt
,
3353 gimple_build_assign (new_temp
,
3359 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3363 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3378 type
= TREE_TYPE (scalar_dest
);
3379 if (is_pattern_stmt_p (stmt_info
))
3380 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3382 lhs
= gimple_call_lhs (stmt
);
3383 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3386 new_stmt
= gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3388 set_vinfo_for_stmt (stmt
, NULL
);
3389 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3390 gsi_replace (gsi
, new_stmt
, true);
3391 unlink_stmt_vdef (stmt
);
3397 /* Function vect_gen_widened_results_half
3399 Create a vector stmt whose code, type, number of arguments, and result
3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3407 vect_gen_widened_results_half (enum tree_code code
,
3409 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3410 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3416 /* Generate half of the widened result: */
3417 if (code
== CALL_EXPR
)
3419 /* Target specific support */
3420 if (op_type
== binary_op
)
3421 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3423 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3424 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3425 gimple_call_set_lhs (new_stmt
, new_temp
);
3429 /* Generic support */
3430 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3431 if (op_type
!= binary_op
)
3433 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3434 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3435 gimple_assign_set_lhs (new_stmt
, new_temp
);
3437 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3443 /* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3451 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3452 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3459 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3460 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3462 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3464 vec_oprnds
->quick_push (vec_oprnd
);
3466 /* Get second vector operand. */
3467 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3468 vec_oprnds
->quick_push (vec_oprnd
);
3472 /* For conversion in multiple steps, continue to get operands
3475 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3479 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3484 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3485 int multi_step_cvt
, gimple
*stmt
,
3487 gimple_stmt_iterator
*gsi
,
3488 slp_tree slp_node
, enum tree_code code
,
3489 stmt_vec_info
*prev_stmt_info
)
3492 tree vop0
, vop1
, new_tmp
, vec_dest
;
3494 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3496 vec_dest
= vec_dsts
.pop ();
3498 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3500 /* Create demotion operation. */
3501 vop0
= (*vec_oprnds
)[i
];
3502 vop1
= (*vec_oprnds
)[i
+ 1];
3503 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3504 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3505 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3506 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3509 /* Store the resulting vector for next recursive call. */
3510 (*vec_oprnds
)[i
/2] = new_tmp
;
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3517 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3520 if (!*prev_stmt_info
)
3521 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3525 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3536 /* At each level of recursion we have half of the operands we had at the
3538 vec_oprnds
->truncate ((i
+1)/2);
3539 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3540 stmt
, vec_dsts
, gsi
, slp_node
,
3541 VEC_PACK_TRUNC_EXPR
,
3545 vec_dsts
.quick_push (vec_dest
);
3549 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3554 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3555 vec
<tree
> *vec_oprnds1
,
3556 gimple
*stmt
, tree vec_dest
,
3557 gimple_stmt_iterator
*gsi
,
3558 enum tree_code code1
,
3559 enum tree_code code2
, tree decl1
,
3560 tree decl2
, int op_type
)
3563 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3564 gimple
*new_stmt1
, *new_stmt2
;
3565 vec
<tree
> vec_tmp
= vNULL
;
3567 vec_tmp
.create (vec_oprnds0
->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3570 if (op_type
== binary_op
)
3571 vop1
= (*vec_oprnds1
)[i
];
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3577 op_type
, vec_dest
, gsi
, stmt
);
3578 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3579 op_type
, vec_dest
, gsi
, stmt
);
3580 if (is_gimple_call (new_stmt1
))
3582 new_tmp1
= gimple_call_lhs (new_stmt1
);
3583 new_tmp2
= gimple_call_lhs (new_stmt2
);
3587 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3588 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3591 /* Store the results for the next step. */
3592 vec_tmp
.quick_push (new_tmp1
);
3593 vec_tmp
.quick_push (new_tmp2
);
3596 vec_oprnds0
->release ();
3597 *vec_oprnds0
= vec_tmp
;
3601 /* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3607 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3608 gimple
**vec_stmt
, slp_tree slp_node
)
3612 tree op0
, op1
= NULL_TREE
;
3613 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3614 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3615 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3616 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3617 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3618 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3621 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3622 gimple
*new_stmt
= NULL
;
3623 stmt_vec_info prev_stmt_info
;
3626 tree vectype_out
, vectype_in
;
3628 tree lhs_type
, rhs_type
;
3629 enum { NARROW
, NONE
, WIDEN
} modifier
;
3630 vec
<tree
> vec_oprnds0
= vNULL
;
3631 vec
<tree
> vec_oprnds1
= vNULL
;
3633 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3634 vec_info
*vinfo
= stmt_info
->vinfo
;
3635 int multi_step_cvt
= 0;
3636 vec
<tree
> vec_dsts
= vNULL
;
3637 vec
<tree
> interm_types
= vNULL
;
3638 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3640 machine_mode rhs_mode
;
3641 unsigned short fltsz
;
3643 /* Is STMT a vectorizable conversion? */
3645 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3648 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3652 if (!is_gimple_assign (stmt
))
3655 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3658 code
= gimple_assign_rhs_code (stmt
);
3659 if (!CONVERT_EXPR_CODE_P (code
)
3660 && code
!= FIX_TRUNC_EXPR
3661 && code
!= FLOAT_EXPR
3662 && code
!= WIDEN_MULT_EXPR
3663 && code
!= WIDEN_LSHIFT_EXPR
)
3666 op_type
= TREE_CODE_LENGTH (code
);
3668 /* Check types of lhs and rhs. */
3669 scalar_dest
= gimple_assign_lhs (stmt
);
3670 lhs_type
= TREE_TYPE (scalar_dest
);
3671 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3673 op0
= gimple_assign_rhs1 (stmt
);
3674 rhs_type
= TREE_TYPE (op0
);
3676 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3677 && !((INTEGRAL_TYPE_P (lhs_type
)
3678 && INTEGRAL_TYPE_P (rhs_type
))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3684 && ((INTEGRAL_TYPE_P (lhs_type
)
3685 && (TYPE_PRECISION (lhs_type
)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3687 || (INTEGRAL_TYPE_P (rhs_type
)
3688 && (TYPE_PRECISION (rhs_type
)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3693 "type conversion to/from bit-precision unsupported."
3698 /* Check the operands of the operation. */
3699 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3701 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3703 "use not simple.\n");
3706 if (op_type
== binary_op
)
3710 op1
= gimple_assign_rhs2 (stmt
);
3711 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3714 if (CONSTANT_CLASS_P (op0
))
3715 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3717 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3723 "use not simple.\n");
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
3731 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3733 gcc_assert (vectype_in
);
3736 if (dump_enabled_p ())
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3741 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3750 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3753 "can't convert between boolean and non "
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3756 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3762 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3763 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3764 if (nunits_in
< nunits_out
)
3766 else if (nunits_out
== nunits_in
)
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3776 else if (modifier
== NARROW
)
3777 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3779 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies
>= 1);
3785 /* Supportable by target? */
3789 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3791 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3798 "conversion not supported by target.\n");
3802 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3803 &code1
, &code2
, &multi_step_cvt
,
3806 /* Binary widening operation can only be supported directly by the
3808 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3812 if (code
!= FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3817 rhs_mode
= TYPE_MODE (rhs_type
);
3818 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3819 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3820 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3821 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3825 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3826 if (cvt_type
== NULL_TREE
)
3829 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3831 if (!supportable_convert_operation (code
, vectype_out
,
3832 cvt_type
, &decl1
, &codecvt1
))
3835 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3836 cvt_type
, &codecvt1
,
3837 &codecvt2
, &multi_step_cvt
,
3841 gcc_assert (multi_step_cvt
== 0);
3843 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3844 vectype_in
, &code1
, &code2
,
3845 &multi_step_cvt
, &interm_types
))
3849 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3852 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3853 codecvt2
= ERROR_MARK
;
3857 interm_types
.safe_push (cvt_type
);
3858 cvt_type
= NULL_TREE
;
3863 gcc_assert (op_type
== unary_op
);
3864 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3865 &code1
, &multi_step_cvt
,
3869 if (code
!= FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3874 rhs_mode
= TYPE_MODE (rhs_type
);
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3877 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3878 if (cvt_type
== NULL_TREE
)
3880 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3883 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3884 &code1
, &multi_step_cvt
,
3893 if (!vec_stmt
) /* transformation not required. */
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_NOTE
, vect_location
,
3897 "=== vectorizable_conversion ===\n");
3898 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3900 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3901 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3903 else if (modifier
== NARROW
)
3905 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3906 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3910 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3911 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3913 interm_types
.release ();
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_NOTE
, vect_location
,
3920 "transform conversion. ncopies = %d.\n", ncopies
);
3922 if (op_type
== binary_op
)
3924 if (CONSTANT_CLASS_P (op0
))
3925 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3926 else if (CONSTANT_CLASS_P (op1
))
3927 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
3935 vec_dsts
.create (multi_step_cvt
+ 1);
3936 vec_dest
= vect_create_destination_var (scalar_dest
,
3937 (cvt_type
&& modifier
== WIDEN
)
3938 ? cvt_type
: vectype_out
);
3939 vec_dsts
.quick_push (vec_dest
);
3943 for (i
= interm_types
.length () - 1;
3944 interm_types
.iterate (i
, &intermediate_type
); i
--)
3946 vec_dest
= vect_create_destination_var (scalar_dest
,
3948 vec_dsts
.quick_push (vec_dest
);
3953 vec_dest
= vect_create_destination_var (scalar_dest
,
3955 ? vectype_out
: cvt_type
);
3959 if (modifier
== WIDEN
)
3961 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3962 if (op_type
== binary_op
)
3963 vec_oprnds1
.create (1);
3965 else if (modifier
== NARROW
)
3966 vec_oprnds0
.create (
3967 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3969 else if (code
== WIDEN_LSHIFT_EXPR
)
3970 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3973 prev_stmt_info
= NULL
;
3977 for (j
= 0; j
< ncopies
; j
++)
3980 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3983 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3985 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1
== CALL_EXPR
)
3990 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3991 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3992 gimple_call_set_lhs (new_stmt
, new_temp
);
3996 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3997 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3998 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3999 gimple_assign_set_lhs (new_stmt
, new_temp
);
4002 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4004 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4007 if (!prev_stmt_info
)
4008 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4011 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j
= 0; j
< ncopies
; j
++)
4029 if (code
== WIDEN_LSHIFT_EXPR
)
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4038 vec_oprnds1
.quick_push (vec_oprnd1
);
4040 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4044 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4045 &vec_oprnds1
, slp_node
, -1);
4049 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4050 vec_oprnds0
.quick_push (vec_oprnd0
);
4051 if (op_type
== binary_op
)
4053 if (code
== WIDEN_LSHIFT_EXPR
)
4056 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4057 vec_oprnds1
.quick_push (vec_oprnd1
);
4063 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4064 vec_oprnds0
.truncate (0);
4065 vec_oprnds0
.quick_push (vec_oprnd0
);
4066 if (op_type
== binary_op
)
4068 if (code
== WIDEN_LSHIFT_EXPR
)
4071 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4073 vec_oprnds1
.truncate (0);
4074 vec_oprnds1
.quick_push (vec_oprnd1
);
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i
= multi_step_cvt
; i
>= 0; i
--)
4081 tree this_dest
= vec_dsts
[i
];
4082 enum tree_code c1
= code1
, c2
= code2
;
4083 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4090 stmt
, this_dest
, gsi
,
4091 c1
, c2
, decl1
, decl2
,
4095 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4099 if (codecvt1
== CALL_EXPR
)
4101 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4102 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4103 gimple_call_set_lhs (new_stmt
, new_temp
);
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4108 new_temp
= make_ssa_name (vec_dest
);
4109 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4116 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4119 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4122 if (!prev_stmt_info
)
4123 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4131 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j
= 0; j
< ncopies
; j
++)
4143 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4147 vec_oprnds0
.truncate (0);
4148 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4149 vect_pow2 (multi_step_cvt
) - 1);
4152 /* Arguments are ready. Create the new vector stmts. */
4154 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4156 if (codecvt1
== CALL_EXPR
)
4158 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4159 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4160 gimple_call_set_lhs (new_stmt
, new_temp
);
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4165 new_temp
= make_ssa_name (vec_dest
);
4166 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4170 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4171 vec_oprnds0
[i
] = new_temp
;
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4175 stmt
, vec_dsts
, gsi
,
4180 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4184 vec_oprnds0
.release ();
4185 vec_oprnds1
.release ();
4186 vec_dsts
.release ();
4187 interm_types
.release ();
4193 /* Function vectorizable_assignment.
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4201 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4202 gimple
**vec_stmt
, slp_tree slp_node
)
4207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4211 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4214 vec
<tree
> vec_oprnds
= vNULL
;
4216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4217 vec_info
*vinfo
= stmt_info
->vinfo
;
4218 gimple
*new_stmt
= NULL
;
4219 stmt_vec_info prev_stmt_info
= NULL
;
4220 enum tree_code code
;
4223 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4226 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt
))
4234 scalar_dest
= gimple_assign_lhs (stmt
);
4235 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4238 code
= gimple_assign_rhs_code (stmt
);
4239 if (gimple_assign_single_p (stmt
)
4240 || code
== PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code
))
4242 op
= gimple_assign_rhs1 (stmt
);
4246 if (code
== VIEW_CONVERT_EXPR
)
4247 op
= TREE_OPERAND (op
, 0);
4249 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4250 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4258 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4260 gcc_assert (ncopies
>= 1);
4262 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4264 if (dump_enabled_p ())
4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4266 "use not simple.\n");
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
4272 if ((CONVERT_EXPR_CODE_P (code
)
4273 || code
== VIEW_CONVERT_EXPR
)
4275 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code
)
4282 || code
== VIEW_CONVERT_EXPR
)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op
))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4290 > TYPE_PRECISION (TREE_TYPE (op
)))
4291 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4300 "type conversion to/from bit-precision "
4305 if (!vec_stmt
) /* transformation not required. */
4307 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE
, vect_location
,
4310 "=== vectorizable_assignment ===\n");
4311 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4320 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4323 for (j
= 0; j
< ncopies
; j
++)
4327 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4329 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4331 /* Arguments are ready. create the new vector stmt. */
4332 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4334 if (CONVERT_EXPR_CODE_P (code
)
4335 || code
== VIEW_CONVERT_EXPR
)
4336 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4337 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4338 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4339 gimple_assign_set_lhs (new_stmt
, new_temp
);
4340 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4342 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4349 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4353 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4356 vec_oprnds
.release ();
4361 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4365 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4368 machine_mode vec_mode
;
4373 vectype
= get_vectype_for_scalar_type (scalar_type
);
4377 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4379 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4381 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4383 || (optab_handler (optab
, TYPE_MODE (vectype
))
4384 == CODE_FOR_nothing
))
4388 vec_mode
= TYPE_MODE (vectype
);
4389 icode
= (int) optab_handler (optab
, vec_mode
);
4390 if (icode
== CODE_FOR_nothing
)
4397 /* Function vectorizable_shift.
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4405 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4406 gimple
**vec_stmt
, slp_tree slp_node
)
4410 tree op0
, op1
= NULL
;
4411 tree vec_oprnd1
= NULL_TREE
;
4412 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4414 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4415 enum tree_code code
;
4416 machine_mode vec_mode
;
4420 machine_mode optab_op2_mode
;
4422 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4423 gimple
*new_stmt
= NULL
;
4424 stmt_vec_info prev_stmt_info
;
4431 vec
<tree
> vec_oprnds0
= vNULL
;
4432 vec
<tree
> vec_oprnds1
= vNULL
;
4435 bool scalar_shift_arg
= true;
4436 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4437 vec_info
*vinfo
= stmt_info
->vinfo
;
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4443 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt
))
4451 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4454 code
= gimple_assign_rhs_code (stmt
);
4456 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4457 || code
== RROTATE_EXPR
))
4460 scalar_dest
= gimple_assign_lhs (stmt
);
4461 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4467 "bit-precision shifts not supported.\n");
4471 op0
= gimple_assign_rhs1 (stmt
);
4472 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4476 "use not simple.\n");
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4482 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4484 gcc_assert (vectype
);
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4489 "no vectype for scalar type\n");
4493 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4494 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4495 if (nunits_out
!= nunits_in
)
4498 op1
= gimple_assign_rhs2 (stmt
);
4499 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4503 "use not simple.\n");
4508 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4518 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4520 gcc_assert (ncopies
>= 1);
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4525 if ((dt
[1] == vect_internal_def
4526 || dt
[1] == vect_induction_def
)
4528 scalar_shift_arg
= false;
4529 else if (dt
[1] == vect_constant_def
4530 || dt
[1] == vect_external_def
4531 || dt
[1] == vect_internal_def
)
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4538 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4541 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4543 scalar_shift_arg
= false;
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4549 if (dt
[1] == vect_internal_def
)
4551 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
4553 scalar_shift_arg
= false;
4558 if (dump_enabled_p ())
4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4560 "operand mode requires invariant argument.\n");
4564 /* Vector shifted by vector. */
4565 if (!scalar_shift_arg
)
4567 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_NOTE
, vect_location
,
4570 "vector/vector shift/rotate found.\n");
4573 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4574 if (op1_vectype
== NULL_TREE
4575 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4579 "unusable type for last operand in"
4580 " vector/vector shift/rotate.\n");
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
4588 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4590 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE
, vect_location
,
4594 "vector/scalar shift/rotate found.\n");
4598 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4600 && (optab_handler (optab
, TYPE_MODE (vectype
))
4601 != CODE_FOR_nothing
))
4603 scalar_shift_arg
= false;
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_NOTE
, vect_location
,
4607 "vector/vector shift/rotate found.\n");
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
4612 dealing with vectors of long long/long/short/char. */
4613 if (dt
[1] == vect_constant_def
)
4614 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4619 && TYPE_MODE (TREE_TYPE (vectype
))
4620 != TYPE_MODE (TREE_TYPE (op1
)))
4622 if (dump_enabled_p ())
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4624 "unusable type for last operand in"
4625 " vector/vector shift/rotate.\n");
4628 if (vec_stmt
&& !slp_node
)
4630 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4631 op1
= vect_init_vector (stmt
, op1
,
4632 TREE_TYPE (vectype
), NULL
);
4639 /* Supportable by target? */
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4647 vec_mode
= TYPE_MODE (vectype
);
4648 icode
= (int) optab_handler (optab
, vec_mode
);
4649 if (icode
== CODE_FOR_nothing
)
4651 if (dump_enabled_p ())
4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4653 "op not supported by target.\n");
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4656 || (vf
< vect_min_worthwhile_factor (code
)
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_NOTE
, vect_location
,
4661 "proceeding using word mode.\n");
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4666 && vf
< vect_min_worthwhile_factor (code
)
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4671 "not worthwhile without SIMD support.\n");
4675 if (!vec_stmt
) /* transformation not required. */
4677 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE
, vect_location
,
4680 "=== vectorizable_shift ===\n");
4681 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_NOTE
, vect_location
,
4689 "transform binary/unary operation.\n");
4692 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4694 prev_stmt_info
= NULL
;
4695 for (j
= 0; j
< ncopies
; j
++)
4700 if (scalar_shift_arg
)
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4706 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4707 if (!VECTOR_MODE_P (optab_op2_mode
))
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_NOTE
, vect_location
,
4711 "operand 1 using scalar mode.\n");
4713 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4714 vec_oprnds1
.quick_push (vec_oprnd1
);
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4723 vec_oprnds1
.quick_push (vec_oprnd1
);
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4732 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4735 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4739 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4741 /* Arguments are ready. Create the new vector stmt. */
4742 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4744 vop1
= vec_oprnds1
[i
];
4745 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4746 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4747 gimple_assign_set_lhs (new_stmt
, new_temp
);
4748 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4750 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4757 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4760 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4763 vec_oprnds0
.release ();
4764 vec_oprnds1
.release ();
4770 /* Function vectorizable_operation.
4772 Check if STMT performs a binary, unary or ternary operation that can
4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4779 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4780 gimple
**vec_stmt
, slp_tree slp_node
)
4784 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4785 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4787 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4788 enum tree_code code
;
4789 machine_mode vec_mode
;
4793 bool target_support_p
;
4795 enum vect_def_type dt
[3]
4796 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4797 gimple
*new_stmt
= NULL
;
4798 stmt_vec_info prev_stmt_info
;
4804 vec
<tree
> vec_oprnds0
= vNULL
;
4805 vec
<tree
> vec_oprnds1
= vNULL
;
4806 vec
<tree
> vec_oprnds2
= vNULL
;
4807 tree vop0
, vop1
, vop2
;
4808 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4809 vec_info
*vinfo
= stmt_info
->vinfo
;
4812 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4815 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt
))
4823 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4826 code
= gimple_assign_rhs_code (stmt
);
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code
== POINTER_PLUS_EXPR
)
4833 /* Support only unary or binary operations. */
4834 op_type
= TREE_CODE_LENGTH (code
);
4835 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4837 if (dump_enabled_p ())
4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4839 "num. args = %d (not unary/binary/ternary op).\n",
4844 scalar_dest
= gimple_assign_lhs (stmt
);
4845 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4847 /* Most operations cannot handle bit-precision types without extra
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4852 /* Exception are bitwise binary operations. */
4853 && code
!= BIT_IOR_EXPR
4854 && code
!= BIT_XOR_EXPR
4855 && code
!= BIT_AND_EXPR
)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4859 "bit-precision arithmetic not supported.\n");
4863 op0
= gimple_assign_rhs1 (stmt
);
4864 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4866 if (dump_enabled_p ())
4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4868 "use not simple.\n");
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4880 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4882 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4886 "not supported operation on bool value.\n");
4889 vectype
= vectype_out
;
4892 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4895 gcc_assert (vectype
);
4898 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4904 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4910 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4911 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4912 if (nunits_out
!= nunits_in
)
4915 if (op_type
== binary_op
|| op_type
== ternary_op
)
4917 op1
= gimple_assign_rhs2 (stmt
);
4918 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4922 "use not simple.\n");
4926 if (op_type
== ternary_op
)
4928 op2
= gimple_assign_rhs3 (stmt
);
4929 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4931 if (dump_enabled_p ())
4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4933 "use not simple.\n");
4939 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4943 /* Multiple types in SLP are handled by creating the appropriate number of
4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4949 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4951 gcc_assert (ncopies
>= 1);
4953 /* Shifts are handled in vectorizable_shift (). */
4954 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4955 || code
== RROTATE_EXPR
)
4958 /* Supportable by target? */
4960 vec_mode
= TYPE_MODE (vectype
);
4961 if (code
== MULT_HIGHPART_EXPR
)
4962 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4965 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4973 target_support_p
= (optab_handler (optab
, vec_mode
)
4974 != CODE_FOR_nothing
);
4977 if (!target_support_p
)
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4981 "op not supported by target.\n");
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4984 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_NOTE
, vect_location
,
4988 "proceeding using word mode.\n");
4991 /* Worthwhile without SIMD support? Check only during analysis. */
4992 if (!VECTOR_MODE_P (vec_mode
)
4994 && vf
< vect_min_worthwhile_factor (code
))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4998 "not worthwhile without SIMD support.\n");
5002 if (!vec_stmt
) /* transformation not required. */
5004 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE
, vect_location
,
5007 "=== vectorizable_operation ===\n");
5008 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE
, vect_location
,
5016 "transform binary/unary operation.\n");
5019 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
5074 prev_stmt_info
= NULL
;
5075 for (j
= 0; j
< ncopies
; j
++)
5080 if (op_type
== binary_op
|| op_type
== ternary_op
)
5081 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5084 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5086 if (op_type
== ternary_op
)
5088 vec_oprnds2
.create (1);
5089 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
5095 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5096 if (op_type
== ternary_op
)
5098 tree vec_oprnd
= vec_oprnds2
.pop ();
5099 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5104 /* Arguments are ready. Create the new vector stmt. */
5105 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5107 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5108 ? vec_oprnds1
[i
] : NULL_TREE
);
5109 vop2
= ((op_type
== ternary_op
)
5110 ? vec_oprnds2
[i
] : NULL_TREE
);
5111 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5112 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5113 gimple_assign_set_lhs (new_stmt
, new_temp
);
5114 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5116 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5129 vec_oprnds0
.release ();
5130 vec_oprnds1
.release ();
5131 vec_oprnds2
.release ();
5136 /* A helper function to ensure data reference DR's base alignment
5140 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5145 if (DR_VECT_AUX (dr
)->base_misaligned
)
5147 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5148 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5150 if (decl_in_symtab_p (base_decl
))
5151 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5154 SET_DECL_ALIGN (base_decl
, TYPE_ALIGN (vectype
));
5155 DECL_USER_ALIGN (base_decl
) = 1;
5157 DR_VECT_AUX (dr
)->base_misaligned
= false;
5162 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5167 perm_mask_for_reverse (tree vectype
)
5172 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5173 sel
= XALLOCAVEC (unsigned char, nunits
);
5175 for (i
= 0; i
< nunits
; ++i
)
5176 sel
[i
] = nunits
- 1 - i
;
5178 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5180 return vect_gen_perm_mask_checked (vectype
, sel
);
5183 /* Function vectorizable_store.
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5192 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5198 tree vec_oprnd
= NULL_TREE
;
5199 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5200 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5202 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5203 struct loop
*loop
= NULL
;
5204 machine_mode vec_mode
;
5206 enum dr_alignment_support alignment_support_scheme
;
5208 enum vect_def_type dt
;
5209 stmt_vec_info prev_stmt_info
= NULL
;
5210 tree dataref_ptr
= NULL_TREE
;
5211 tree dataref_offset
= NULL_TREE
;
5212 gimple
*ptr_incr
= NULL
;
5215 gimple
*next_stmt
, *first_stmt
= NULL
;
5216 bool grouped_store
= false;
5217 bool store_lanes_p
= false;
5218 unsigned int group_size
, i
;
5219 vec
<tree
> dr_chain
= vNULL
;
5220 vec
<tree
> oprnds
= vNULL
;
5221 vec
<tree
> result_chain
= vNULL
;
5223 bool negative
= false;
5224 tree offset
= NULL_TREE
;
5225 vec
<tree
> vec_oprnds
= vNULL
;
5226 bool slp
= (slp_node
!= NULL
);
5227 unsigned int vec_num
;
5228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5229 vec_info
*vinfo
= stmt_info
->vinfo
;
5231 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5232 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5233 int scatter_scale
= 1;
5234 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5235 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5239 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5242 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5246 /* Is vectorizable store? */
5248 if (!is_gimple_assign (stmt
))
5251 scalar_dest
= gimple_assign_lhs (stmt
);
5252 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5253 && is_pattern_stmt_p (stmt_info
))
5254 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5255 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5256 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5257 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5258 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5259 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5260 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5261 && TREE_CODE (scalar_dest
) != MEM_REF
)
5264 /* Cannot have hybrid store SLP -- that would mean storing to the
5265 same location twice. */
5266 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5268 gcc_assert (gimple_assign_single_p (stmt
));
5270 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5271 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5275 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5276 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5281 /* Multiple types in SLP are handled by creating the appropriate number of
5282 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5287 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5289 gcc_assert (ncopies
>= 1);
5291 /* FORNOW. This restriction should be relaxed. */
5292 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5296 "multiple types in nested loop.\n");
5300 op
= gimple_assign_rhs1 (stmt
);
5302 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5306 "use not simple.\n");
5310 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5313 elem_type
= TREE_TYPE (vectype
);
5314 vec_mode
= TYPE_MODE (vectype
);
5316 /* FORNOW. In some cases can vectorize even if data-type not supported
5317 (e.g. - array initialization with 0). */
5318 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5321 if (!STMT_VINFO_DATA_REF (stmt_info
))
5324 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5327 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5328 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5329 size_zero_node
) < 0;
5330 if (negative
&& ncopies
> 1)
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5334 "multiple types with negative step.\n");
5339 gcc_assert (!grouped_store
);
5340 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5341 if (alignment_support_scheme
!= dr_aligned
5342 && alignment_support_scheme
!= dr_unaligned_supported
)
5344 if (dump_enabled_p ())
5345 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5346 "negative step but alignment required.\n");
5349 if (dt
!= vect_constant_def
5350 && dt
!= vect_external_def
5351 && !perm_mask_for_reverse (vectype
))
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5355 "negative step and reversing not supported.\n");
5361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5363 grouped_store
= true;
5364 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5365 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5366 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
5368 if (vect_store_lanes_supported (vectype
, group_size
))
5369 store_lanes_p
= true;
5370 else if (!vect_grouped_store_supported (vectype
, group_size
))
5374 if (first_stmt
== stmt
)
5376 /* STMT is the leader of the group. Check the operands of all the
5377 stmts of the group. */
5378 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5381 gcc_assert (gimple_assign_single_p (next_stmt
));
5382 op
= gimple_assign_rhs1 (next_stmt
);
5383 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5387 "use not simple.\n");
5390 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5395 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5398 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5399 &scatter_off
, &scatter_scale
);
5400 gcc_assert (scatter_decl
);
5401 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5402 &scatter_off_vectype
))
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5406 "scatter index use not simple.");
5411 if (!vec_stmt
) /* transformation not required. */
5413 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5414 /* The SLP costs are calculated during SLP analysis. */
5415 if (!PURE_SLP_STMT (stmt_info
))
5416 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5423 ensure_base_align (stmt_info
, dr
);
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5427 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5428 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5429 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5430 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5431 edge pe
= loop_preheader_edge (loop
);
5434 enum { NARROW
, NONE
, WIDEN
} modifier
;
5435 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5437 if (nunits
== (unsigned int) scatter_off_nunits
)
5439 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5441 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5444 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5445 sel
[i
] = i
| nunits
;
5447 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5448 gcc_assert (perm_mask
!= NULL_TREE
);
5450 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5452 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5455 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5456 sel
[i
] = i
| scatter_off_nunits
;
5458 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5459 gcc_assert (perm_mask
!= NULL_TREE
);
5465 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5466 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5467 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5468 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5469 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5470 scaletype
= TREE_VALUE (arglist
);
5472 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5473 && TREE_CODE (rettype
) == VOID_TYPE
);
5475 ptr
= fold_convert (ptrtype
, scatter_base
);
5476 if (!is_gimple_min_invariant (ptr
))
5478 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5479 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5480 gcc_assert (!new_bb
);
5483 /* Currently we support only unconditional scatter stores,
5484 so mask should be all ones. */
5485 mask
= build_int_cst (masktype
, -1);
5486 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5488 scale
= build_int_cst (scaletype
, scatter_scale
);
5490 prev_stmt_info
= NULL
;
5491 for (j
= 0; j
< ncopies
; ++j
)
5496 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5498 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5500 else if (modifier
!= NONE
&& (j
& 1))
5502 if (modifier
== WIDEN
)
5505 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5506 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5509 else if (modifier
== NARROW
)
5511 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5514 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5522 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5524 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5527 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5529 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5530 == TYPE_VECTOR_SUBPARTS (srctype
));
5531 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5532 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5533 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5534 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5538 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5540 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5541 == TYPE_VECTOR_SUBPARTS (idxtype
));
5542 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5543 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5544 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5545 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5550 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5552 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5554 if (prev_stmt_info
== NULL
)
5555 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5557 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5558 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5565 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5566 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5568 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5571 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5573 /* We vectorize all the stmts of the interleaving group when we
5574 reach the last stmt in the group. */
5575 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5576 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5585 grouped_store
= false;
5586 /* VEC_NUM is the number of vect stmts to be created for this
5588 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5589 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5590 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5591 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5592 op
= gimple_assign_rhs1 (first_stmt
);
5595 /* VEC_NUM is the number of vect stmts to be created for this
5597 vec_num
= group_size
;
5603 group_size
= vec_num
= 1;
5606 if (dump_enabled_p ())
5607 dump_printf_loc (MSG_NOTE
, vect_location
,
5608 "transform store. ncopies = %d\n", ncopies
);
5610 if (STMT_VINFO_STRIDED_P (stmt_info
))
5612 gimple_stmt_iterator incr_gsi
;
5618 gimple_seq stmts
= NULL
;
5619 tree stride_base
, stride_step
, alias_off
;
5623 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5626 = fold_build_pointer_plus
5627 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5628 size_binop (PLUS_EXPR
,
5629 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5630 convert_to_ptrofftype (DR_INIT(first_dr
))));
5631 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5633 /* For a store with loop-invariant (but other than power-of-2)
5634 stride (i.e. not a grouped access) like so:
5636 for (i = 0; i < n; i += stride)
5639 we generate a new induction variable and new stores from
5640 the components of the (vectorized) rhs:
5642 for (j = 0; ; j += VF*stride)
5647 array[j + stride] = tmp2;
5651 unsigned nstores
= nunits
;
5653 tree ltype
= elem_type
;
5656 if (group_size
< nunits
5657 && nunits
% group_size
== 0)
5659 nstores
= nunits
/ group_size
;
5661 ltype
= build_vector_type (elem_type
, group_size
);
5663 else if (group_size
>= nunits
5664 && group_size
% nunits
== 0)
5670 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5671 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5674 ivstep
= stride_step
;
5675 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5676 build_int_cst (TREE_TYPE (ivstep
), vf
));
5678 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5680 create_iv (stride_base
, ivstep
, NULL
,
5681 loop
, &incr_gsi
, insert_after
,
5683 incr
= gsi_stmt (incr_gsi
);
5684 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5686 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5688 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5690 prev_stmt_info
= NULL
;
5691 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5692 next_stmt
= first_stmt
;
5693 for (g
= 0; g
< group_size
; g
++)
5695 running_off
= offvar
;
5698 tree size
= TYPE_SIZE_UNIT (ltype
);
5699 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5701 tree newoff
= copy_ssa_name (running_off
, NULL
);
5702 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5704 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5705 running_off
= newoff
;
5707 unsigned int group_el
= 0;
5708 unsigned HOST_WIDE_INT
5709 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
5710 for (j
= 0; j
< ncopies
; j
++)
5712 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5713 and first_stmt == stmt. */
5718 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5720 vec_oprnd
= vec_oprnds
[0];
5724 gcc_assert (gimple_assign_single_p (next_stmt
));
5725 op
= gimple_assign_rhs1 (next_stmt
);
5726 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5732 vec_oprnd
= vec_oprnds
[j
];
5735 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5736 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5740 for (i
= 0; i
< nstores
; i
++)
5742 tree newref
, newoff
;
5743 gimple
*incr
, *assign
;
5744 tree size
= TYPE_SIZE (ltype
);
5745 /* Extract the i'th component. */
5746 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5747 bitsize_int (i
), size
);
5748 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5751 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5755 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
5757 newref
= build2 (MEM_REF
, ltype
,
5758 running_off
, this_off
);
5760 /* And store it to *running_off. */
5761 assign
= gimple_build_assign (newref
, elem
);
5762 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5766 || group_el
== group_size
)
5768 newoff
= copy_ssa_name (running_off
, NULL
);
5769 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5770 running_off
, stride_step
);
5771 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5773 running_off
= newoff
;
5776 if (g
== group_size
- 1
5779 if (j
== 0 && i
== 0)
5780 STMT_VINFO_VEC_STMT (stmt_info
)
5781 = *vec_stmt
= assign
;
5783 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5784 prev_stmt_info
= vinfo_for_stmt (assign
);
5788 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5795 dr_chain
.create (group_size
);
5796 oprnds
.create (group_size
);
5798 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5799 gcc_assert (alignment_support_scheme
);
5800 /* Targets with store-lane instructions must not require explicit
5802 gcc_assert (!store_lanes_p
5803 || alignment_support_scheme
== dr_aligned
5804 || alignment_support_scheme
== dr_unaligned_supported
);
5807 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5810 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5812 aggr_type
= vectype
;
5814 /* In case the vectorization factor (VF) is bigger than the number
5815 of elements that we can fit in a vectype (nunits), we have to generate
5816 more than one vector stmt - i.e - we need to "unroll" the
5817 vector stmt by a factor VF/nunits. For more details see documentation in
5818 vect_get_vec_def_for_copy_stmt. */
5820 /* In case of interleaving (non-unit grouped access):
5827 We create vectorized stores starting from base address (the access of the
5828 first stmt in the chain (S2 in the above example), when the last store stmt
5829 of the chain (S4) is reached:
5832 VS2: &base + vec_size*1 = vx0
5833 VS3: &base + vec_size*2 = vx1
5834 VS4: &base + vec_size*3 = vx3
5836 Then permutation statements are generated:
5838 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5839 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5842 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5843 (the order of the data-refs in the output of vect_permute_store_chain
5844 corresponds to the order of scalar stmts in the interleaving chain - see
5845 the documentation of vect_permute_store_chain()).
5847 In case of both multiple types and interleaving, above vector stores and
5848 permutation stmts are created for every copy. The result vector stmts are
5849 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5850 STMT_VINFO_RELATED_STMT for the next copies.
5853 prev_stmt_info
= NULL
;
5854 for (j
= 0; j
< ncopies
; j
++)
5861 /* Get vectorized arguments for SLP_NODE. */
5862 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5863 NULL
, slp_node
, -1);
5865 vec_oprnd
= vec_oprnds
[0];
5869 /* For interleaved stores we collect vectorized defs for all the
5870 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5871 used as an input to vect_permute_store_chain(), and OPRNDS as
5872 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5874 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5875 OPRNDS are of size 1. */
5876 next_stmt
= first_stmt
;
5877 for (i
= 0; i
< group_size
; i
++)
5879 /* Since gaps are not supported for interleaved stores,
5880 GROUP_SIZE is the exact number of stmts in the chain.
5881 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5882 there is no interleaving, GROUP_SIZE is 1, and only one
5883 iteration of the loop will be executed. */
5884 gcc_assert (next_stmt
5885 && gimple_assign_single_p (next_stmt
));
5886 op
= gimple_assign_rhs1 (next_stmt
);
5888 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5889 dr_chain
.quick_push (vec_oprnd
);
5890 oprnds
.quick_push (vec_oprnd
);
5891 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5895 /* We should have catched mismatched types earlier. */
5896 gcc_assert (useless_type_conversion_p (vectype
,
5897 TREE_TYPE (vec_oprnd
)));
5898 bool simd_lane_access_p
5899 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5900 if (simd_lane_access_p
5901 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5902 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5903 && integer_zerop (DR_OFFSET (first_dr
))
5904 && integer_zerop (DR_INIT (first_dr
))
5905 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5906 get_alias_set (DR_REF (first_dr
))))
5908 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5909 dataref_offset
= build_int_cst (reference_alias_ptr_type
5910 (DR_REF (first_dr
)), 0);
5915 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5916 simd_lane_access_p
? loop
: NULL
,
5917 offset
, &dummy
, gsi
, &ptr_incr
,
5918 simd_lane_access_p
, &inv_p
);
5919 gcc_assert (bb_vinfo
|| !inv_p
);
5923 /* For interleaved stores we created vectorized defs for all the
5924 defs stored in OPRNDS in the previous iteration (previous copy).
5925 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5926 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5928 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5929 OPRNDS are of size 1. */
5930 for (i
= 0; i
< group_size
; i
++)
5933 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5934 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5935 dr_chain
[i
] = vec_oprnd
;
5936 oprnds
[i
] = vec_oprnd
;
5940 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5941 TYPE_SIZE_UNIT (aggr_type
));
5943 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5944 TYPE_SIZE_UNIT (aggr_type
));
5951 /* Combine all the vectors into an array. */
5952 vec_array
= create_vector_array (vectype
, vec_num
);
5953 for (i
= 0; i
< vec_num
; i
++)
5955 vec_oprnd
= dr_chain
[i
];
5956 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5960 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5961 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5962 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5963 gimple_call_set_lhs (new_stmt
, data_ref
);
5964 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5972 result_chain
.create (group_size
);
5974 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5978 next_stmt
= first_stmt
;
5979 for (i
= 0; i
< vec_num
; i
++)
5981 unsigned align
, misalign
;
5984 /* Bump the vector pointer. */
5985 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5989 vec_oprnd
= vec_oprnds
[i
];
5990 else if (grouped_store
)
5991 /* For grouped stores vectorized defs are interleaved in
5992 vect_permute_store_chain(). */
5993 vec_oprnd
= result_chain
[i
];
5995 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5999 : build_int_cst (reference_alias_ptr_type
6000 (DR_REF (first_dr
)), 0));
6001 align
= TYPE_ALIGN_UNIT (vectype
);
6002 if (aligned_access_p (first_dr
))
6004 else if (DR_MISALIGNMENT (first_dr
) == -1)
6006 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6007 align
= TYPE_ALIGN_UNIT (elem_type
);
6009 align
= get_object_alignment (DR_REF (first_dr
))
6012 TREE_TYPE (data_ref
)
6013 = build_aligned_type (TREE_TYPE (data_ref
),
6014 align
* BITS_PER_UNIT
);
6018 TREE_TYPE (data_ref
)
6019 = build_aligned_type (TREE_TYPE (data_ref
),
6020 TYPE_ALIGN (elem_type
));
6021 misalign
= DR_MISALIGNMENT (first_dr
);
6023 if (dataref_offset
== NULL_TREE
6024 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6025 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6029 && dt
!= vect_constant_def
6030 && dt
!= vect_external_def
)
6032 tree perm_mask
= perm_mask_for_reverse (vectype
);
6034 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6036 tree new_temp
= make_ssa_name (perm_dest
);
6038 /* Generate the permute statement. */
6040 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6041 vec_oprnd
, perm_mask
);
6042 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6044 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6045 vec_oprnd
= new_temp
;
6048 /* Arguments are ready. Create the new vector stmt. */
6049 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6050 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6055 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6063 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6065 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6066 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6070 dr_chain
.release ();
6072 result_chain
.release ();
6073 vec_oprnds
.release ();
6078 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6079 VECTOR_CST mask. No checks are made that the target platform supports the
6080 mask, so callers may wish to test can_vec_perm_p separately, or use
6081 vect_gen_perm_mask_checked. */
6084 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6086 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6089 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6091 mask_elt_type
= lang_hooks
.types
.type_for_mode
6092 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6093 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6095 mask_elts
= XALLOCAVEC (tree
, nunits
);
6096 for (i
= nunits
- 1; i
>= 0; i
--)
6097 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6098 mask_vec
= build_vector (mask_type
, mask_elts
);
6103 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6104 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6107 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6109 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6110 return vect_gen_perm_mask_any (vectype
, sel
);
6113 /* Given a vector variable X and Y, that was generated for the scalar
6114 STMT, generate instructions to permute the vector elements of X and Y
6115 using permutation mask MASK_VEC, insert them at *GSI and return the
6116 permuted vector variable. */
6119 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6120 gimple_stmt_iterator
*gsi
)
6122 tree vectype
= TREE_TYPE (x
);
6123 tree perm_dest
, data_ref
;
6126 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6127 data_ref
= make_ssa_name (perm_dest
);
6129 /* Generate the permute statement. */
6130 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6131 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6136 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6137 inserting them on the loops preheader edge. Returns true if we
6138 were successful in doing so (and thus STMT can be moved then),
6139 otherwise returns false. */
6142 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6148 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6150 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6151 if (!gimple_nop_p (def_stmt
)
6152 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6154 /* Make sure we don't need to recurse. While we could do
6155 so in simple cases when there are more complex use webs
6156 we don't have an easy way to preserve stmt order to fulfil
6157 dependencies within them. */
6160 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6162 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6164 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6165 if (!gimple_nop_p (def_stmt2
)
6166 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6176 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6178 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6179 if (!gimple_nop_p (def_stmt
)
6180 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6182 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6183 gsi_remove (&gsi
, false);
6184 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6191 /* vectorizable_load.
6193 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6200 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6201 slp_tree slp_node
, slp_instance slp_node_instance
)
6204 tree vec_dest
= NULL
;
6205 tree data_ref
= NULL
;
6206 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6207 stmt_vec_info prev_stmt_info
;
6208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6209 struct loop
*loop
= NULL
;
6210 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6211 bool nested_in_vect_loop
= false;
6212 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6216 gimple
*new_stmt
= NULL
;
6218 enum dr_alignment_support alignment_support_scheme
;
6219 tree dataref_ptr
= NULL_TREE
;
6220 tree dataref_offset
= NULL_TREE
;
6221 gimple
*ptr_incr
= NULL
;
6223 int i
, j
, group_size
= -1, group_gap_adj
;
6224 tree msq
= NULL_TREE
, lsq
;
6225 tree offset
= NULL_TREE
;
6226 tree byte_offset
= NULL_TREE
;
6227 tree realignment_token
= NULL_TREE
;
6229 vec
<tree
> dr_chain
= vNULL
;
6230 bool grouped_load
= false;
6231 bool load_lanes_p
= false;
6233 gimple
*first_stmt_for_drptr
= NULL
;
6235 bool negative
= false;
6236 bool compute_in_loop
= false;
6237 struct loop
*at_loop
;
6239 bool slp
= (slp_node
!= NULL
);
6240 bool slp_perm
= false;
6241 enum tree_code code
;
6242 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6245 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6246 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6247 int gather_scale
= 1;
6248 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6249 vec_info
*vinfo
= stmt_info
->vinfo
;
6251 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6254 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6258 /* Is vectorizable load? */
6259 if (!is_gimple_assign (stmt
))
6262 scalar_dest
= gimple_assign_lhs (stmt
);
6263 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6266 code
= gimple_assign_rhs_code (stmt
);
6267 if (code
!= ARRAY_REF
6268 && code
!= BIT_FIELD_REF
6269 && code
!= INDIRECT_REF
6270 && code
!= COMPONENT_REF
6271 && code
!= IMAGPART_EXPR
6272 && code
!= REALPART_EXPR
6274 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6277 if (!STMT_VINFO_DATA_REF (stmt_info
))
6280 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6281 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6285 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6286 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6287 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6292 /* Multiple types in SLP are handled by creating the appropriate number of
6293 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6298 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6300 gcc_assert (ncopies
>= 1);
6302 /* FORNOW. This restriction should be relaxed. */
6303 if (nested_in_vect_loop
&& ncopies
> 1)
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6307 "multiple types in nested loop.\n");
6311 /* Invalidate assumptions made by dependence analysis when vectorization
6312 on the unrolled body effectively re-orders stmts. */
6314 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6315 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6316 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6320 "cannot perform implicit CSE when unrolling "
6321 "with negative dependence distance\n");
6325 elem_type
= TREE_TYPE (vectype
);
6326 mode
= TYPE_MODE (vectype
);
6328 /* FORNOW. In some cases can vectorize even if data-type not supported
6329 (e.g. - data copies). */
6330 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "Aligned load, but unsupported type.\n");
6338 /* Check if the load is a part of an interleaving chain. */
6339 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6341 grouped_load
= true;
6343 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6345 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6346 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6348 if (!slp
&& !STMT_VINFO_STRIDED_P (stmt_info
))
6350 if (vect_load_lanes_supported (vectype
, group_size
))
6351 load_lanes_p
= true;
6352 else if (!vect_grouped_load_supported (vectype
, group_size
))
6356 /* If this is single-element interleaving with an element distance
6357 that leaves unused vector loads around punt - we at least create
6358 very sub-optimal code in that case (and blow up memory,
6360 if (first_stmt
== stmt
6361 && !GROUP_NEXT_ELEMENT (stmt_info
))
6363 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6367 "single-element interleaving not supported "
6368 "for not adjacent vector loads\n");
6372 /* Single-element interleaving requires peeling for gaps. */
6373 gcc_assert (GROUP_GAP (stmt_info
));
6376 /* If there is a gap in the end of the group or the group size cannot
6377 be made a multiple of the vector element count then we access excess
6378 elements in the last iteration and thus need to peel that off. */
6380 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6381 && (GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6382 || (!slp
&& !load_lanes_p
&& vf
% group_size
!= 0)))
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6386 "Data access with gaps requires scalar "
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6392 "Peeling for outer loop is not supported\n");
6396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6399 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6402 /* ??? The following is overly pessimistic (as well as the loop
6403 case above) in the case we can statically determine the excess
6404 elements loaded are within the bounds of a decl that is accessed.
6405 Likewise for BB vectorizations using masked loads is a possibility. */
6406 if (bb_vinfo
&& slp_perm
&& group_size
% nunits
!= 0)
6408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6409 "BB vectorization with gaps at the end of a load "
6410 "is not supported\n");
6414 /* Invalidate assumptions made by dependence analysis when vectorization
6415 on the unrolled body effectively re-orders stmts. */
6416 if (!PURE_SLP_STMT (stmt_info
)
6417 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6418 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6419 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6423 "cannot perform implicit CSE when performing "
6424 "group loads with negative dependence distance\n");
6428 /* Similarly when the stmt is a load that is both part of a SLP
6429 instance and a loop vectorized stmt via the same-dr mechanism
6430 we have to give up. */
6431 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6432 && (STMT_SLP_TYPE (stmt_info
)
6433 != STMT_SLP_TYPE (vinfo_for_stmt
6434 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6438 "conflicting SLP types for CSEd load\n");
6444 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6447 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6448 &gather_off
, &gather_scale
);
6449 gcc_assert (gather_decl
);
6450 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6451 &gather_off_vectype
))
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6455 "gather index use not simple.\n");
6459 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6463 negative
= tree_int_cst_compare (nested_in_vect_loop
6464 ? STMT_VINFO_DR_STEP (stmt_info
)
6466 size_zero_node
) < 0;
6467 if (negative
&& ncopies
> 1)
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6471 "multiple types with negative step.\n");
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6481 "negative step for group load not supported"
6485 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6486 if (alignment_support_scheme
!= dr_aligned
6487 && alignment_support_scheme
!= dr_unaligned_supported
)
6489 if (dump_enabled_p ())
6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6491 "negative step but alignment required.\n");
6494 if (!perm_mask_for_reverse (vectype
))
6496 if (dump_enabled_p ())
6497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6498 "negative step and reversing not supported."
6505 if (!vec_stmt
) /* transformation not required. */
6507 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6508 /* The SLP costs are calculated during SLP analysis. */
6509 if (!PURE_SLP_STMT (stmt_info
))
6510 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_NOTE
, vect_location
,
6517 "transform load. ncopies = %d\n", ncopies
);
6521 ensure_base_align (stmt_info
, dr
);
6523 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6525 tree vec_oprnd0
= NULL_TREE
, op
;
6526 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6527 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6528 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6529 edge pe
= loop_preheader_edge (loop
);
6532 enum { NARROW
, NONE
, WIDEN
} modifier
;
6533 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6535 if (nunits
== gather_off_nunits
)
6537 else if (nunits
== gather_off_nunits
/ 2)
6539 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6542 for (i
= 0; i
< gather_off_nunits
; ++i
)
6543 sel
[i
] = i
| nunits
;
6545 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6547 else if (nunits
== gather_off_nunits
* 2)
6549 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6552 for (i
= 0; i
< nunits
; ++i
)
6553 sel
[i
] = i
< gather_off_nunits
6554 ? i
: i
+ nunits
- gather_off_nunits
;
6556 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6562 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6563 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6564 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6565 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6566 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6567 scaletype
= TREE_VALUE (arglist
);
6568 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6570 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6572 ptr
= fold_convert (ptrtype
, gather_base
);
6573 if (!is_gimple_min_invariant (ptr
))
6575 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6576 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6577 gcc_assert (!new_bb
);
6580 /* Currently we support only unconditional gather loads,
6581 so mask should be all ones. */
6582 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6583 mask
= build_int_cst (masktype
, -1);
6584 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6586 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6587 mask
= build_vector_from_val (masktype
, mask
);
6588 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6590 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6594 for (j
= 0; j
< 6; ++j
)
6596 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6597 mask
= build_real (TREE_TYPE (masktype
), r
);
6598 mask
= build_vector_from_val (masktype
, mask
);
6599 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6604 scale
= build_int_cst (scaletype
, gather_scale
);
6606 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6607 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6608 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6612 for (j
= 0; j
< 6; ++j
)
6614 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6615 merge
= build_real (TREE_TYPE (rettype
), r
);
6619 merge
= build_vector_from_val (rettype
, merge
);
6620 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6622 prev_stmt_info
= NULL
;
6623 for (j
= 0; j
< ncopies
; ++j
)
6625 if (modifier
== WIDEN
&& (j
& 1))
6626 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6627 perm_mask
, stmt
, gsi
);
6630 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6633 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6635 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6637 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6638 == TYPE_VECTOR_SUBPARTS (idxtype
));
6639 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6640 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6642 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6643 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6648 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6650 if (!useless_type_conversion_p (vectype
, rettype
))
6652 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6653 == TYPE_VECTOR_SUBPARTS (rettype
));
6654 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6655 gimple_call_set_lhs (new_stmt
, op
);
6656 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6657 var
= make_ssa_name (vec_dest
);
6658 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6660 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6664 var
= make_ssa_name (vec_dest
, new_stmt
);
6665 gimple_call_set_lhs (new_stmt
, var
);
6668 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6670 if (modifier
== NARROW
)
6677 var
= permute_vec_elements (prev_res
, var
,
6678 perm_mask
, stmt
, gsi
);
6679 new_stmt
= SSA_NAME_DEF_STMT (var
);
6682 if (prev_stmt_info
== NULL
)
6683 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6685 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6686 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6690 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6692 gimple_stmt_iterator incr_gsi
;
6698 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6699 gimple_seq stmts
= NULL
;
6700 tree stride_base
, stride_step
, alias_off
;
6702 gcc_assert (!nested_in_vect_loop
);
6704 if (slp
&& grouped_load
)
6705 first_dr
= STMT_VINFO_DATA_REF
6706 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6711 = fold_build_pointer_plus
6712 (DR_BASE_ADDRESS (first_dr
),
6713 size_binop (PLUS_EXPR
,
6714 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6715 convert_to_ptrofftype (DR_INIT (first_dr
))));
6716 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6718 /* For a load with loop-invariant (but other than power-of-2)
6719 stride (i.e. not a grouped access) like so:
6721 for (i = 0; i < n; i += stride)
6724 we generate a new induction variable and new accesses to
6725 form a new vector (or vectors, depending on ncopies):
6727 for (j = 0; ; j += VF*stride)
6729 tmp2 = array[j + stride];
6731 vectemp = {tmp1, tmp2, ...}
6734 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6735 build_int_cst (TREE_TYPE (stride_step
), vf
));
6737 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6739 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6740 loop
, &incr_gsi
, insert_after
,
6742 incr
= gsi_stmt (incr_gsi
);
6743 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6745 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6746 &stmts
, true, NULL_TREE
);
6748 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6750 prev_stmt_info
= NULL
;
6751 running_off
= offvar
;
6752 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6753 int nloads
= nunits
;
6755 tree ltype
= TREE_TYPE (vectype
);
6756 auto_vec
<tree
> dr_chain
;
6759 if (group_size
< nunits
6760 && nunits
% group_size
== 0)
6762 nloads
= nunits
/ group_size
;
6764 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6765 ltype
= build_aligned_type (ltype
,
6766 TYPE_ALIGN (TREE_TYPE (vectype
)));
6768 else if (group_size
>= nunits
6769 && group_size
% nunits
== 0)
6774 ltype
= build_aligned_type (ltype
,
6775 TYPE_ALIGN (TREE_TYPE (vectype
)));
6777 /* For SLP permutation support we need to load the whole group,
6778 not only the number of vector stmts the permutation result
6782 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6783 dr_chain
.create (ncopies
);
6786 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6789 unsigned HOST_WIDE_INT
6790 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6791 for (j
= 0; j
< ncopies
; j
++)
6794 vec_alloc (v
, nloads
);
6795 for (i
= 0; i
< nloads
; i
++)
6797 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6799 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6800 build2 (MEM_REF
, ltype
,
6801 running_off
, this_off
));
6802 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6804 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
6805 gimple_assign_lhs (new_stmt
));
6809 || group_el
== group_size
)
6811 tree newoff
= copy_ssa_name (running_off
);
6812 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6813 running_off
, stride_step
);
6814 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6816 running_off
= newoff
;
6822 tree vec_inv
= build_constructor (vectype
, v
);
6823 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6824 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6830 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6832 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6837 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6839 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6840 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6844 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6845 slp_node_instance
, false);
6851 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6852 /* For SLP vectorization we directly vectorize a subchain
6853 without permutation. */
6854 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6855 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6856 /* For BB vectorization always use the first stmt to base
6857 the data ref pointer on. */
6859 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6861 /* Check if the chain of loads is already vectorized. */
6862 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6863 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6864 ??? But we can only do so if there is exactly one
6865 as we have no way to get at the rest. Leave the CSE
6867 ??? With the group load eventually participating
6868 in multiple different permutations (having multiple
6869 slp nodes which refer to the same group) the CSE
6870 is even wrong code. See PR56270. */
6873 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6876 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6877 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6880 /* VEC_NUM is the number of vect stmts to be created for this group. */
6883 grouped_load
= false;
6884 /* For SLP permutation support we need to load the whole group,
6885 not only the number of vector stmts the permutation result
6888 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6890 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6891 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6894 vec_num
= group_size
;
6900 group_size
= vec_num
= 1;
6904 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6905 gcc_assert (alignment_support_scheme
);
6906 /* Targets with load-lane instructions must not require explicit
6908 gcc_assert (!load_lanes_p
6909 || alignment_support_scheme
== dr_aligned
6910 || alignment_support_scheme
== dr_unaligned_supported
);
6912 /* In case the vectorization factor (VF) is bigger than the number
6913 of elements that we can fit in a vectype (nunits), we have to generate
6914 more than one vector stmt - i.e - we need to "unroll" the
6915 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6916 from one copy of the vector stmt to the next, in the field
6917 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6918 stages to find the correct vector defs to be used when vectorizing
6919 stmts that use the defs of the current stmt. The example below
6920 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6921 need to create 4 vectorized stmts):
6923 before vectorization:
6924 RELATED_STMT VEC_STMT
6928 step 1: vectorize stmt S1:
6929 We first create the vector stmt VS1_0, and, as usual, record a
6930 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6931 Next, we create the vector stmt VS1_1, and record a pointer to
6932 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6933 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6935 RELATED_STMT VEC_STMT
6936 VS1_0: vx0 = memref0 VS1_1 -
6937 VS1_1: vx1 = memref1 VS1_2 -
6938 VS1_2: vx2 = memref2 VS1_3 -
6939 VS1_3: vx3 = memref3 - -
6940 S1: x = load - VS1_0
6943 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6944 information we recorded in RELATED_STMT field is used to vectorize
6947 /* In case of interleaving (non-unit grouped access):
6954 Vectorized loads are created in the order of memory accesses
6955 starting from the access of the first stmt of the chain:
6958 VS2: vx1 = &base + vec_size*1
6959 VS3: vx3 = &base + vec_size*2
6960 VS4: vx4 = &base + vec_size*3
6962 Then permutation statements are generated:
6964 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6965 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6968 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6969 (the order of the data-refs in the output of vect_permute_load_chain
6970 corresponds to the order of scalar stmts in the interleaving chain - see
6971 the documentation of vect_permute_load_chain()).
6972 The generation of permutation stmts and recording them in
6973 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6975 In case of both multiple types and interleaving, the vector loads and
6976 permutation stmts above are created for every copy. The result vector
6977 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6978 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6980 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6981 on a target that supports unaligned accesses (dr_unaligned_supported)
6982 we generate the following code:
6986 p = p + indx * vectype_size;
6991 Otherwise, the data reference is potentially unaligned on a target that
6992 does not support unaligned accesses (dr_explicit_realign_optimized) -
6993 then generate the following code, in which the data in each iteration is
6994 obtained by two vector loads, one from the previous iteration, and one
6995 from the current iteration:
6997 msq_init = *(floor(p1))
6998 p2 = initial_addr + VS - 1;
6999 realignment_token = call target_builtin;
7002 p2 = p2 + indx * vectype_size
7004 vec_dest = realign_load (msq, lsq, realignment_token)
7009 /* If the misalignment remains the same throughout the execution of the
7010 loop, we can create the init_addr and permutation mask at the loop
7011 preheader. Otherwise, it needs to be created inside the loop.
7012 This can only occur when vectorizing memory accesses in the inner-loop
7013 nested within an outer-loop that is being vectorized. */
7015 if (nested_in_vect_loop
7016 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7017 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7019 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7020 compute_in_loop
= true;
7023 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7024 || alignment_support_scheme
== dr_explicit_realign
)
7025 && !compute_in_loop
)
7027 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7028 alignment_support_scheme
, NULL_TREE
,
7030 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7032 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7033 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7041 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7044 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7046 aggr_type
= vectype
;
7048 prev_stmt_info
= NULL
;
7049 for (j
= 0; j
< ncopies
; j
++)
7051 /* 1. Create the vector or array pointer update chain. */
7054 bool simd_lane_access_p
7055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7056 if (simd_lane_access_p
7057 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7059 && integer_zerop (DR_OFFSET (first_dr
))
7060 && integer_zerop (DR_INIT (first_dr
))
7061 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7062 get_alias_set (DR_REF (first_dr
)))
7063 && (alignment_support_scheme
== dr_aligned
7064 || alignment_support_scheme
== dr_unaligned_supported
))
7066 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7067 dataref_offset
= build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr
)), 0);
7071 else if (first_stmt_for_drptr
7072 && first_stmt
!= first_stmt_for_drptr
)
7075 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7076 at_loop
, offset
, &dummy
, gsi
,
7077 &ptr_incr
, simd_lane_access_p
,
7078 &inv_p
, byte_offset
);
7079 /* Adjust the pointer by the difference to first_stmt. */
7080 data_reference_p ptrdr
7081 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7082 tree diff
= fold_convert (sizetype
,
7083 size_binop (MINUS_EXPR
,
7086 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7091 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7092 offset
, &dummy
, gsi
, &ptr_incr
,
7093 simd_lane_access_p
, &inv_p
,
7096 else if (dataref_offset
)
7097 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7098 TYPE_SIZE_UNIT (aggr_type
));
7100 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7101 TYPE_SIZE_UNIT (aggr_type
));
7103 if (grouped_load
|| slp_perm
)
7104 dr_chain
.create (vec_num
);
7110 vec_array
= create_vector_array (vectype
, vec_num
);
7113 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7114 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7115 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7116 gimple_call_set_lhs (new_stmt
, vec_array
);
7117 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7119 /* Extract each vector into an SSA_NAME. */
7120 for (i
= 0; i
< vec_num
; i
++)
7122 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7124 dr_chain
.quick_push (new_temp
);
7127 /* Record the mapping between SSA_NAMEs and statements. */
7128 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7132 for (i
= 0; i
< vec_num
; i
++)
7135 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7138 /* 2. Create the vector-load in the loop. */
7139 switch (alignment_support_scheme
)
7142 case dr_unaligned_supported
:
7144 unsigned int align
, misalign
;
7147 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7150 : build_int_cst (reference_alias_ptr_type
7151 (DR_REF (first_dr
)), 0));
7152 align
= TYPE_ALIGN_UNIT (vectype
);
7153 if (alignment_support_scheme
== dr_aligned
)
7155 gcc_assert (aligned_access_p (first_dr
));
7158 else if (DR_MISALIGNMENT (first_dr
) == -1)
7160 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7161 align
= TYPE_ALIGN_UNIT (elem_type
);
7163 align
= (get_object_alignment (DR_REF (first_dr
))
7166 TREE_TYPE (data_ref
)
7167 = build_aligned_type (TREE_TYPE (data_ref
),
7168 align
* BITS_PER_UNIT
);
7172 TREE_TYPE (data_ref
)
7173 = build_aligned_type (TREE_TYPE (data_ref
),
7174 TYPE_ALIGN (elem_type
));
7175 misalign
= DR_MISALIGNMENT (first_dr
);
7177 if (dataref_offset
== NULL_TREE
7178 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7179 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7183 case dr_explicit_realign
:
7187 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7189 if (compute_in_loop
)
7190 msq
= vect_setup_realignment (first_stmt
, gsi
,
7192 dr_explicit_realign
,
7195 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7196 ptr
= copy_ssa_name (dataref_ptr
);
7198 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7199 new_stmt
= gimple_build_assign
7200 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7202 (TREE_TYPE (dataref_ptr
),
7203 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7204 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7206 = build2 (MEM_REF
, vectype
, ptr
,
7207 build_int_cst (reference_alias_ptr_type
7208 (DR_REF (first_dr
)), 0));
7209 vec_dest
= vect_create_destination_var (scalar_dest
,
7211 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7212 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7213 gimple_assign_set_lhs (new_stmt
, new_temp
);
7214 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7215 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7216 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7219 bump
= size_binop (MULT_EXPR
, vs
,
7220 TYPE_SIZE_UNIT (elem_type
));
7221 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7222 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7223 new_stmt
= gimple_build_assign
7224 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7227 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7228 ptr
= copy_ssa_name (ptr
, new_stmt
);
7229 gimple_assign_set_lhs (new_stmt
, ptr
);
7230 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7232 = build2 (MEM_REF
, vectype
, ptr
,
7233 build_int_cst (reference_alias_ptr_type
7234 (DR_REF (first_dr
)), 0));
7237 case dr_explicit_realign_optimized
:
7238 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7239 new_temp
= copy_ssa_name (dataref_ptr
);
7241 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7242 new_stmt
= gimple_build_assign
7243 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7245 (TREE_TYPE (dataref_ptr
),
7246 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7247 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7249 = build2 (MEM_REF
, vectype
, new_temp
,
7250 build_int_cst (reference_alias_ptr_type
7251 (DR_REF (first_dr
)), 0));
7256 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7257 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7258 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7259 gimple_assign_set_lhs (new_stmt
, new_temp
);
7260 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7262 /* 3. Handle explicit realignment if necessary/supported.
7264 vec_dest = realign_load (msq, lsq, realignment_token) */
7265 if (alignment_support_scheme
== dr_explicit_realign_optimized
7266 || alignment_support_scheme
== dr_explicit_realign
)
7268 lsq
= gimple_assign_lhs (new_stmt
);
7269 if (!realignment_token
)
7270 realignment_token
= dataref_ptr
;
7271 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7272 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7273 msq
, lsq
, realignment_token
);
7274 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7275 gimple_assign_set_lhs (new_stmt
, new_temp
);
7276 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7278 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7281 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7282 add_phi_arg (phi
, lsq
,
7283 loop_latch_edge (containing_loop
),
7289 /* 4. Handle invariant-load. */
7290 if (inv_p
&& !bb_vinfo
)
7292 gcc_assert (!grouped_load
);
7293 /* If we have versioned for aliasing or the loop doesn't
7294 have any data dependencies that would preclude this,
7295 then we are sure this is a loop invariant load and
7296 thus we can insert it on the preheader edge. */
7297 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7298 && !nested_in_vect_loop
7299 && hoist_defs_of_uses (stmt
, loop
))
7301 if (dump_enabled_p ())
7303 dump_printf_loc (MSG_NOTE
, vect_location
,
7304 "hoisting out of the vectorized "
7306 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7308 tree tem
= copy_ssa_name (scalar_dest
);
7309 gsi_insert_on_edge_immediate
7310 (loop_preheader_edge (loop
),
7311 gimple_build_assign (tem
,
7313 (gimple_assign_rhs1 (stmt
))));
7314 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7315 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7316 set_vinfo_for_stmt (new_stmt
,
7317 new_stmt_vec_info (new_stmt
, vinfo
));
7321 gimple_stmt_iterator gsi2
= *gsi
;
7323 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7325 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7331 tree perm_mask
= perm_mask_for_reverse (vectype
);
7332 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7333 perm_mask
, stmt
, gsi
);
7334 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7337 /* Collect vector loads and later create their permutation in
7338 vect_transform_grouped_load (). */
7339 if (grouped_load
|| slp_perm
)
7340 dr_chain
.quick_push (new_temp
);
7342 /* Store vector loads in the corresponding SLP_NODE. */
7343 if (slp
&& !slp_perm
)
7344 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7346 /* Bump the vector pointer to account for a gap or for excess
7347 elements loaded for a permuted SLP load. */
7348 if (group_gap_adj
!= 0)
7352 = wide_int_to_tree (sizetype
,
7353 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7354 group_gap_adj
, &ovf
));
7355 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7360 if (slp
&& !slp_perm
)
7365 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7366 slp_node_instance
, false))
7368 dr_chain
.release ();
7377 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7378 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7383 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7386 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7389 dr_chain
.release ();
7395 /* Function vect_is_simple_cond.
7398 LOOP - the loop that is being vectorized.
7399 COND - Condition that is checked for simple use.
7402 *COMP_VECTYPE - the vector type for the comparison.
7404 Returns whether a COND can be vectorized. Checks whether
7405 condition operands are supportable using vec_is_simple_use. */
7408 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7411 enum vect_def_type dt
;
7412 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7415 if (TREE_CODE (cond
) == SSA_NAME
7416 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7418 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7419 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7427 if (!COMPARISON_CLASS_P (cond
))
7430 lhs
= TREE_OPERAND (cond
, 0);
7431 rhs
= TREE_OPERAND (cond
, 1);
7433 if (TREE_CODE (lhs
) == SSA_NAME
)
7435 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7436 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7439 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7440 && TREE_CODE (lhs
) != FIXED_CST
)
7443 if (TREE_CODE (rhs
) == SSA_NAME
)
7445 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7446 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7449 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7450 && TREE_CODE (rhs
) != FIXED_CST
)
7453 if (vectype1
&& vectype2
7454 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7457 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7461 /* vectorizable_condition.
7463 Check if STMT is conditional modify expression that can be vectorized.
7464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7465 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7468 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7469 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7470 else clause if it is 2).
7472 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7475 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7476 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7479 tree scalar_dest
= NULL_TREE
;
7480 tree vec_dest
= NULL_TREE
;
7481 tree cond_expr
, then_clause
, else_clause
;
7482 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7483 tree comp_vectype
= NULL_TREE
;
7484 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7485 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7488 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7489 enum vect_def_type dt
, dts
[4];
7491 enum tree_code code
;
7492 stmt_vec_info prev_stmt_info
= NULL
;
7494 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7495 vec
<tree
> vec_oprnds0
= vNULL
;
7496 vec
<tree
> vec_oprnds1
= vNULL
;
7497 vec
<tree
> vec_oprnds2
= vNULL
;
7498 vec
<tree
> vec_oprnds3
= vNULL
;
7500 bool masked
= false;
7502 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7505 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7507 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7510 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7511 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7515 /* FORNOW: not yet supported. */
7516 if (STMT_VINFO_LIVE_P (stmt_info
))
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7520 "value used after loop.\n");
7525 /* Is vectorizable conditional operation? */
7526 if (!is_gimple_assign (stmt
))
7529 code
= gimple_assign_rhs_code (stmt
);
7531 if (code
!= COND_EXPR
)
7534 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7535 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7536 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7541 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7543 gcc_assert (ncopies
>= 1);
7544 if (reduc_index
&& ncopies
> 1)
7545 return false; /* FORNOW */
7547 cond_expr
= gimple_assign_rhs1 (stmt
);
7548 then_clause
= gimple_assign_rhs2 (stmt
);
7549 else_clause
= gimple_assign_rhs3 (stmt
);
7551 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7556 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7559 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7563 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7566 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7569 masked
= !COMPARISON_CLASS_P (cond_expr
);
7570 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7572 if (vec_cmp_type
== NULL_TREE
)
7577 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7578 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7585 vec_oprnds0
.create (1);
7586 vec_oprnds1
.create (1);
7587 vec_oprnds2
.create (1);
7588 vec_oprnds3
.create (1);
7592 scalar_dest
= gimple_assign_lhs (stmt
);
7593 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7595 /* Handle cond expr. */
7596 for (j
= 0; j
< ncopies
; j
++)
7598 gassign
*new_stmt
= NULL
;
7603 auto_vec
<tree
, 4> ops
;
7604 auto_vec
<vec
<tree
>, 4> vec_defs
;
7607 ops
.safe_push (cond_expr
);
7610 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7611 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7613 ops
.safe_push (then_clause
);
7614 ops
.safe_push (else_clause
);
7615 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7616 vec_oprnds3
= vec_defs
.pop ();
7617 vec_oprnds2
= vec_defs
.pop ();
7619 vec_oprnds1
= vec_defs
.pop ();
7620 vec_oprnds0
= vec_defs
.pop ();
7623 vec_defs
.release ();
7631 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7633 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7639 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7640 stmt
, comp_vectype
);
7641 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7642 loop_vinfo
, >emp
, &dts
[0]);
7645 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7646 stmt
, comp_vectype
);
7647 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7648 loop_vinfo
, >emp
, &dts
[1]);
7650 if (reduc_index
== 1)
7651 vec_then_clause
= reduc_def
;
7654 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7656 vect_is_simple_use (then_clause
, loop_vinfo
,
7659 if (reduc_index
== 2)
7660 vec_else_clause
= reduc_def
;
7663 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7665 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7672 = vect_get_vec_def_for_stmt_copy (dts
[0],
7673 vec_oprnds0
.pop ());
7676 = vect_get_vec_def_for_stmt_copy (dts
[1],
7677 vec_oprnds1
.pop ());
7679 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7680 vec_oprnds2
.pop ());
7681 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7682 vec_oprnds3
.pop ());
7687 vec_oprnds0
.quick_push (vec_cond_lhs
);
7689 vec_oprnds1
.quick_push (vec_cond_rhs
);
7690 vec_oprnds2
.quick_push (vec_then_clause
);
7691 vec_oprnds3
.quick_push (vec_else_clause
);
7694 /* Arguments are ready. Create the new vector stmt. */
7695 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7697 vec_then_clause
= vec_oprnds2
[i
];
7698 vec_else_clause
= vec_oprnds3
[i
];
7701 vec_compare
= vec_cond_lhs
;
7704 vec_cond_rhs
= vec_oprnds1
[i
];
7705 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7706 vec_cond_lhs
, vec_cond_rhs
);
7708 new_temp
= make_ssa_name (vec_dest
);
7709 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7710 vec_compare
, vec_then_clause
,
7712 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7721 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7723 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7725 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7728 vec_oprnds0
.release ();
7729 vec_oprnds1
.release ();
7730 vec_oprnds2
.release ();
7731 vec_oprnds3
.release ();
7736 /* vectorizable_comparison.
7738 Check if STMT is comparison expression that can be vectorized.
7739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7740 comparison, put it in VEC_STMT, and insert it at GSI.
7742 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7745 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7746 gimple
**vec_stmt
, tree reduc_def
,
7749 tree lhs
, rhs1
, rhs2
;
7750 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7751 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7752 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7753 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7755 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7756 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7759 enum tree_code code
;
7760 stmt_vec_info prev_stmt_info
= NULL
;
7762 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7763 vec
<tree
> vec_oprnds0
= vNULL
;
7764 vec
<tree
> vec_oprnds1
= vNULL
;
7769 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7772 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7775 mask_type
= vectype
;
7776 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7781 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7783 gcc_assert (ncopies
>= 1);
7784 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7785 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7789 if (STMT_VINFO_LIVE_P (stmt_info
))
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7793 "value used after loop.\n");
7797 if (!is_gimple_assign (stmt
))
7800 code
= gimple_assign_rhs_code (stmt
);
7802 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7805 rhs1
= gimple_assign_rhs1 (stmt
);
7806 rhs2
= gimple_assign_rhs2 (stmt
);
7808 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7809 &dts
[0], &vectype1
))
7812 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7813 &dts
[1], &vectype2
))
7816 if (vectype1
&& vectype2
7817 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7820 vectype
= vectype1
? vectype1
: vectype2
;
7822 /* Invariant comparison. */
7825 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
7826 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
7829 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7834 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7835 vect_model_simple_cost (stmt_info
, ncopies
, dts
, NULL
, NULL
);
7836 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7842 vec_oprnds0
.create (1);
7843 vec_oprnds1
.create (1);
7847 lhs
= gimple_assign_lhs (stmt
);
7848 mask
= vect_create_destination_var (lhs
, mask_type
);
7850 /* Handle cmp expr. */
7851 for (j
= 0; j
< ncopies
; j
++)
7853 gassign
*new_stmt
= NULL
;
7858 auto_vec
<tree
, 2> ops
;
7859 auto_vec
<vec
<tree
>, 2> vec_defs
;
7861 ops
.safe_push (rhs1
);
7862 ops
.safe_push (rhs2
);
7863 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7864 vec_oprnds1
= vec_defs
.pop ();
7865 vec_oprnds0
= vec_defs
.pop ();
7869 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7870 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7875 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7876 vec_oprnds0
.pop ());
7877 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7878 vec_oprnds1
.pop ());
7883 vec_oprnds0
.quick_push (vec_rhs1
);
7884 vec_oprnds1
.quick_push (vec_rhs2
);
7887 /* Arguments are ready. Create the new vector stmt. */
7888 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7890 vec_rhs2
= vec_oprnds1
[i
];
7892 new_temp
= make_ssa_name (mask
);
7893 new_stmt
= gimple_build_assign (new_temp
, code
, vec_rhs1
, vec_rhs2
);
7894 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7896 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7903 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7905 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7907 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7910 vec_oprnds0
.release ();
7911 vec_oprnds1
.release ();
7916 /* Make sure the statement is vectorizable. */
7919 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7921 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7922 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7923 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7925 tree scalar_type
, vectype
;
7926 gimple
*pattern_stmt
;
7927 gimple_seq pattern_def_seq
;
7929 if (dump_enabled_p ())
7931 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7932 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7935 if (gimple_has_volatile_ops (stmt
))
7937 if (dump_enabled_p ())
7938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7939 "not vectorized: stmt has volatile operands\n");
7944 /* Skip stmts that do not need to be vectorized. In loops this is expected
7946 - the COND_EXPR which is the loop exit condition
7947 - any LABEL_EXPRs in the loop
7948 - computations that are used only for array indexing or loop control.
7949 In basic blocks we only analyze statements that are a part of some SLP
7950 instance, therefore, all the statements are relevant.
7952 Pattern statement needs to be analyzed instead of the original statement
7953 if the original statement is not relevant. Otherwise, we analyze both
7954 statements. In basic blocks we are called from some SLP instance
7955 traversal, don't analyze pattern stmts instead, the pattern stmts
7956 already will be part of SLP instance. */
7958 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7959 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7960 && !STMT_VINFO_LIVE_P (stmt_info
))
7962 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7964 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7965 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7967 /* Analyze PATTERN_STMT instead of the original stmt. */
7968 stmt
= pattern_stmt
;
7969 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7970 if (dump_enabled_p ())
7972 dump_printf_loc (MSG_NOTE
, vect_location
,
7973 "==> examining pattern statement: ");
7974 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7979 if (dump_enabled_p ())
7980 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7985 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7988 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7989 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7991 /* Analyze PATTERN_STMT too. */
7992 if (dump_enabled_p ())
7994 dump_printf_loc (MSG_NOTE
, vect_location
,
7995 "==> examining pattern statement: ");
7996 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7999 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
8003 if (is_pattern_stmt_p (stmt_info
)
8005 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8007 gimple_stmt_iterator si
;
8009 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8011 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8012 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8013 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8015 /* Analyze def stmt of STMT if it's a pattern stmt. */
8016 if (dump_enabled_p ())
8018 dump_printf_loc (MSG_NOTE
, vect_location
,
8019 "==> examining pattern def statement: ");
8020 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8023 if (!vect_analyze_stmt (pattern_def_stmt
,
8024 need_to_vectorize
, node
))
8030 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8032 case vect_internal_def
:
8035 case vect_reduction_def
:
8036 case vect_nested_cycle
:
8037 gcc_assert (!bb_vinfo
8038 && (relevance
== vect_used_in_outer
8039 || relevance
== vect_used_in_outer_by_reduction
8040 || relevance
== vect_used_by_reduction
8041 || relevance
== vect_unused_in_scope
8042 || relevance
== vect_used_only_live
));
8045 case vect_induction_def
:
8046 case vect_constant_def
:
8047 case vect_external_def
:
8048 case vect_unknown_def_type
:
8055 gcc_assert (PURE_SLP_STMT (stmt_info
));
8057 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8058 if (dump_enabled_p ())
8060 dump_printf_loc (MSG_NOTE
, vect_location
,
8061 "get vectype for scalar type: ");
8062 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8063 dump_printf (MSG_NOTE
, "\n");
8066 vectype
= get_vectype_for_scalar_type (scalar_type
);
8069 if (dump_enabled_p ())
8071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8072 "not SLPed: unsupported data-type ");
8073 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8075 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8080 if (dump_enabled_p ())
8082 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8083 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8084 dump_printf (MSG_NOTE
, "\n");
8087 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8090 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8092 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8093 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8094 || (is_gimple_call (stmt
)
8095 && gimple_call_lhs (stmt
) == NULL_TREE
));
8096 *need_to_vectorize
= true;
8099 if (PURE_SLP_STMT (stmt_info
) && !node
)
8101 dump_printf_loc (MSG_NOTE
, vect_location
,
8102 "handled only by SLP analysis\n");
8108 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8109 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8110 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8111 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8112 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8113 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8114 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8115 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8116 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8117 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8118 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8119 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8120 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8124 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8125 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8126 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8127 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8128 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8129 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8130 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8131 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8132 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8133 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8138 if (dump_enabled_p ())
8140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8141 "not vectorized: relevant stmt not ");
8142 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8152 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8153 need extra handling, except for vectorizable reductions. */
8154 if (STMT_VINFO_LIVE_P (stmt_info
)
8155 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8156 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
, -1, NULL
);
8160 if (dump_enabled_p ())
8162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8163 "not vectorized: live stmt not ");
8164 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8165 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8175 /* Function vect_transform_stmt.
8177 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8180 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8181 bool *grouped_store
, slp_tree slp_node
,
8182 slp_instance slp_node_instance
)
8184 bool is_store
= false;
8185 gimple
*vec_stmt
= NULL
;
8186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8189 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8190 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8192 switch (STMT_VINFO_TYPE (stmt_info
))
8194 case type_demotion_vec_info_type
:
8195 case type_promotion_vec_info_type
:
8196 case type_conversion_vec_info_type
:
8197 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8201 case induc_vec_info_type
:
8202 gcc_assert (!slp_node
);
8203 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8207 case shift_vec_info_type
:
8208 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8212 case op_vec_info_type
:
8213 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8217 case assignment_vec_info_type
:
8218 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8222 case load_vec_info_type
:
8223 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8228 case store_vec_info_type
:
8229 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8231 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8233 /* In case of interleaving, the whole chain is vectorized when the
8234 last store in the chain is reached. Store stmts before the last
8235 one are skipped, and there vec_stmt_info shouldn't be freed
8237 *grouped_store
= true;
8238 if (STMT_VINFO_VEC_STMT (stmt_info
))
8245 case condition_vec_info_type
:
8246 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8250 case comparison_vec_info_type
:
8251 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8255 case call_vec_info_type
:
8256 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8257 stmt
= gsi_stmt (*gsi
);
8258 if (is_gimple_call (stmt
)
8259 && gimple_call_internal_p (stmt
)
8260 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8264 case call_simd_clone_vec_info_type
:
8265 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8266 stmt
= gsi_stmt (*gsi
);
8269 case reduc_vec_info_type
:
8270 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8275 if (!STMT_VINFO_LIVE_P (stmt_info
))
8277 if (dump_enabled_p ())
8278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8279 "stmt not supported.\n");
8284 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8285 This would break hybrid SLP vectorization. */
8287 gcc_assert (!vec_stmt
8288 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8290 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8291 is being vectorized, but outside the immediately enclosing loop. */
8293 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8294 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8295 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8296 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8297 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8298 || STMT_VINFO_RELEVANT (stmt_info
) ==
8299 vect_used_in_outer_by_reduction
))
8301 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8302 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8303 imm_use_iterator imm_iter
;
8304 use_operand_p use_p
;
8308 if (dump_enabled_p ())
8309 dump_printf_loc (MSG_NOTE
, vect_location
,
8310 "Record the vdef for outer-loop vectorization.\n");
8312 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8313 (to be used when vectorizing outer-loop stmts that use the DEF of
8315 if (gimple_code (stmt
) == GIMPLE_PHI
)
8316 scalar_dest
= PHI_RESULT (stmt
);
8318 scalar_dest
= gimple_assign_lhs (stmt
);
8320 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8322 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8324 exit_phi
= USE_STMT (use_p
);
8325 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8330 /* Handle stmts whose DEF is used outside the loop-nest that is
8331 being vectorized. */
8336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8338 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8339 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8340 && STMT_VINFO_TYPE (slp_stmt_info
) != reduc_vec_info_type
)
8342 done
= vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8348 else if (STMT_VINFO_LIVE_P (stmt_info
)
8349 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8351 done
= vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, &vec_stmt
);
8356 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8362 /* Remove a group of stores (for SLP or interleaving), free their
8366 vect_remove_stores (gimple
*first_stmt
)
8368 gimple
*next
= first_stmt
;
8370 gimple_stmt_iterator next_si
;
8374 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8376 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8377 if (is_pattern_stmt_p (stmt_info
))
8378 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8379 /* Free the attached stmt_vec_info and remove the stmt. */
8380 next_si
= gsi_for_stmt (next
);
8381 unlink_stmt_vdef (next
);
8382 gsi_remove (&next_si
, true);
8383 release_defs (next
);
8384 free_stmt_vec_info (next
);
8390 /* Function new_stmt_vec_info.
8392 Create and initialize a new stmt_vec_info struct for STMT. */
8395 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8398 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8400 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8401 STMT_VINFO_STMT (res
) = stmt
;
8403 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8404 STMT_VINFO_LIVE_P (res
) = false;
8405 STMT_VINFO_VECTYPE (res
) = NULL
;
8406 STMT_VINFO_VEC_STMT (res
) = NULL
;
8407 STMT_VINFO_VECTORIZABLE (res
) = true;
8408 STMT_VINFO_IN_PATTERN_P (res
) = false;
8409 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8410 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8411 STMT_VINFO_DATA_REF (res
) = NULL
;
8412 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8414 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8415 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8416 STMT_VINFO_DR_INIT (res
) = NULL
;
8417 STMT_VINFO_DR_STEP (res
) = NULL
;
8418 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8420 if (gimple_code (stmt
) == GIMPLE_PHI
8421 && is_loop_header_bb_p (gimple_bb (stmt
)))
8422 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8424 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8426 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8427 STMT_SLP_TYPE (res
) = loop_vect
;
8428 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8430 GROUP_FIRST_ELEMENT (res
) = NULL
;
8431 GROUP_NEXT_ELEMENT (res
) = NULL
;
8432 GROUP_SIZE (res
) = 0;
8433 GROUP_STORE_COUNT (res
) = 0;
8434 GROUP_GAP (res
) = 0;
8435 GROUP_SAME_DR_STMT (res
) = NULL
;
8441 /* Create a hash table for stmt_vec_info. */
8444 init_stmt_vec_info_vec (void)
8446 gcc_assert (!stmt_vec_info_vec
.exists ());
8447 stmt_vec_info_vec
.create (50);
8451 /* Free hash table for stmt_vec_info. */
8454 free_stmt_vec_info_vec (void)
8458 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8460 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8461 gcc_assert (stmt_vec_info_vec
.exists ());
8462 stmt_vec_info_vec
.release ();
8466 /* Free stmt vectorization related info. */
8469 free_stmt_vec_info (gimple
*stmt
)
8471 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8476 /* Check if this statement has a related "pattern stmt"
8477 (introduced by the vectorizer during the pattern recognition
8478 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8480 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8482 stmt_vec_info patt_info
8483 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8486 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8487 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8488 gimple_set_bb (patt_stmt
, NULL
);
8489 tree lhs
= gimple_get_lhs (patt_stmt
);
8490 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8491 release_ssa_name (lhs
);
8494 gimple_stmt_iterator si
;
8495 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8497 gimple
*seq_stmt
= gsi_stmt (si
);
8498 gimple_set_bb (seq_stmt
, NULL
);
8499 lhs
= gimple_get_lhs (seq_stmt
);
8500 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8501 release_ssa_name (lhs
);
8502 free_stmt_vec_info (seq_stmt
);
8505 free_stmt_vec_info (patt_stmt
);
8509 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8510 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8511 set_vinfo_for_stmt (stmt
, NULL
);
8516 /* Function get_vectype_for_scalar_type_and_size.
8518 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8522 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8524 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8525 machine_mode simd_mode
;
8526 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8533 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8534 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8537 /* For vector types of elements whose mode precision doesn't
8538 match their types precision we use a element type of mode
8539 precision. The vectorization routines will have to make sure
8540 they support the proper result truncation/extension.
8541 We also make sure to build vector types with INTEGER_TYPE
8542 component type only. */
8543 if (INTEGRAL_TYPE_P (scalar_type
)
8544 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8545 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8546 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8547 TYPE_UNSIGNED (scalar_type
));
8549 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8550 When the component mode passes the above test simply use a type
8551 corresponding to that mode. The theory is that any use that
8552 would cause problems with this will disable vectorization anyway. */
8553 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8554 && !INTEGRAL_TYPE_P (scalar_type
))
8555 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8557 /* We can't build a vector type of elements with alignment bigger than
8559 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8560 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8561 TYPE_UNSIGNED (scalar_type
));
8563 /* If we felt back to using the mode fail if there was
8564 no scalar type for it. */
8565 if (scalar_type
== NULL_TREE
)
8568 /* If no size was supplied use the mode the target prefers. Otherwise
8569 lookup a vector mode of the specified size. */
8571 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8573 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8574 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8578 vectype
= build_vector_type (scalar_type
, nunits
);
8580 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8581 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8587 unsigned int current_vector_size
;
8589 /* Function get_vectype_for_scalar_type.
8591 Returns the vector type corresponding to SCALAR_TYPE as supported
8595 get_vectype_for_scalar_type (tree scalar_type
)
8598 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8599 current_vector_size
);
8601 && current_vector_size
== 0)
8602 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8606 /* Function get_mask_type_for_scalar_type.
8608 Returns the mask type corresponding to a result of comparison
8609 of vectors of specified SCALAR_TYPE as supported by target. */
8612 get_mask_type_for_scalar_type (tree scalar_type
)
8614 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8619 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8620 current_vector_size
);
8623 /* Function get_same_sized_vectype
8625 Returns a vector type corresponding to SCALAR_TYPE of size
8626 VECTOR_TYPE if supported by the target. */
8629 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8631 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8632 return build_same_sized_truth_vector_type (vector_type
);
8634 return get_vectype_for_scalar_type_and_size
8635 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8638 /* Function vect_is_simple_use.
8641 VINFO - the vect info of the loop or basic block that is being vectorized.
8642 OPERAND - operand in the loop or bb.
8644 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8645 DT - the type of definition
8647 Returns whether a stmt with OPERAND can be vectorized.
8648 For loops, supportable operands are constants, loop invariants, and operands
8649 that are defined by the current iteration of the loop. Unsupportable
8650 operands are those that are defined by a previous iteration of the loop (as
8651 is the case in reduction/induction computations).
8652 For basic blocks, supportable operands are constants and bb invariants.
8653 For now, operands defined outside the basic block are not supported. */
8656 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8657 gimple
**def_stmt
, enum vect_def_type
*dt
)
8660 *dt
= vect_unknown_def_type
;
8662 if (dump_enabled_p ())
8664 dump_printf_loc (MSG_NOTE
, vect_location
,
8665 "vect_is_simple_use: operand ");
8666 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8667 dump_printf (MSG_NOTE
, "\n");
8670 if (CONSTANT_CLASS_P (operand
))
8672 *dt
= vect_constant_def
;
8676 if (is_gimple_min_invariant (operand
))
8678 *dt
= vect_external_def
;
8682 if (TREE_CODE (operand
) != SSA_NAME
)
8684 if (dump_enabled_p ())
8685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8690 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8692 *dt
= vect_external_def
;
8696 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8697 if (dump_enabled_p ())
8699 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8700 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8703 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8704 *dt
= vect_external_def
;
8707 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8708 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8711 if (dump_enabled_p ())
8713 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8716 case vect_uninitialized_def
:
8717 dump_printf (MSG_NOTE
, "uninitialized\n");
8719 case vect_constant_def
:
8720 dump_printf (MSG_NOTE
, "constant\n");
8722 case vect_external_def
:
8723 dump_printf (MSG_NOTE
, "external\n");
8725 case vect_internal_def
:
8726 dump_printf (MSG_NOTE
, "internal\n");
8728 case vect_induction_def
:
8729 dump_printf (MSG_NOTE
, "induction\n");
8731 case vect_reduction_def
:
8732 dump_printf (MSG_NOTE
, "reduction\n");
8734 case vect_double_reduction_def
:
8735 dump_printf (MSG_NOTE
, "double reduction\n");
8737 case vect_nested_cycle
:
8738 dump_printf (MSG_NOTE
, "nested cycle\n");
8740 case vect_unknown_def_type
:
8741 dump_printf (MSG_NOTE
, "unknown\n");
8746 if (*dt
== vect_unknown_def_type
)
8748 if (dump_enabled_p ())
8749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8750 "Unsupported pattern.\n");
8754 switch (gimple_code (*def_stmt
))
8761 if (dump_enabled_p ())
8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8763 "unsupported defining stmt:\n");
8770 /* Function vect_is_simple_use.
8772 Same as vect_is_simple_use but also determines the vector operand
8773 type of OPERAND and stores it to *VECTYPE. If the definition of
8774 OPERAND is vect_uninitialized_def, vect_constant_def or
8775 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8776 is responsible to compute the best suited vector type for the
8780 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8781 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8783 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8786 /* Now get a vector type if the def is internal, otherwise supply
8787 NULL_TREE and leave it up to the caller to figure out a proper
8788 type for the use stmt. */
8789 if (*dt
== vect_internal_def
8790 || *dt
== vect_induction_def
8791 || *dt
== vect_reduction_def
8792 || *dt
== vect_double_reduction_def
8793 || *dt
== vect_nested_cycle
)
8795 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8797 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8798 && !STMT_VINFO_RELEVANT (stmt_info
)
8799 && !STMT_VINFO_LIVE_P (stmt_info
))
8800 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8802 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8803 gcc_assert (*vectype
!= NULL_TREE
);
8805 else if (*dt
== vect_uninitialized_def
8806 || *dt
== vect_constant_def
8807 || *dt
== vect_external_def
)
8808 *vectype
= NULL_TREE
;
8816 /* Function supportable_widening_operation
8818 Check whether an operation represented by the code CODE is a
8819 widening operation that is supported by the target platform in
8820 vector form (i.e., when operating on arguments of type VECTYPE_IN
8821 producing a result of type VECTYPE_OUT).
8823 Widening operations we currently support are NOP (CONVERT), FLOAT
8824 and WIDEN_MULT. This function checks if these operations are supported
8825 by the target platform either directly (via vector tree-codes), or via
8829 - CODE1 and CODE2 are codes of vector operations to be used when
8830 vectorizing the operation, if available.
8831 - MULTI_STEP_CVT determines the number of required intermediate steps in
8832 case of multi-step conversion (like char->short->int - in that case
8833 MULTI_STEP_CVT will be 1).
8834 - INTERM_TYPES contains the intermediate type required to perform the
8835 widening operation (short in the above example). */
8838 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8839 tree vectype_out
, tree vectype_in
,
8840 enum tree_code
*code1
, enum tree_code
*code2
,
8841 int *multi_step_cvt
,
8842 vec
<tree
> *interm_types
)
8844 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8845 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8846 struct loop
*vect_loop
= NULL
;
8847 machine_mode vec_mode
;
8848 enum insn_code icode1
, icode2
;
8849 optab optab1
, optab2
;
8850 tree vectype
= vectype_in
;
8851 tree wide_vectype
= vectype_out
;
8852 enum tree_code c1
, c2
;
8854 tree prev_type
, intermediate_type
;
8855 machine_mode intermediate_mode
, prev_mode
;
8856 optab optab3
, optab4
;
8858 *multi_step_cvt
= 0;
8860 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8864 case WIDEN_MULT_EXPR
:
8865 /* The result of a vectorized widening operation usually requires
8866 two vectors (because the widened results do not fit into one vector).
8867 The generated vector results would normally be expected to be
8868 generated in the same order as in the original scalar computation,
8869 i.e. if 8 results are generated in each vector iteration, they are
8870 to be organized as follows:
8871 vect1: [res1,res2,res3,res4],
8872 vect2: [res5,res6,res7,res8].
8874 However, in the special case that the result of the widening
8875 operation is used in a reduction computation only, the order doesn't
8876 matter (because when vectorizing a reduction we change the order of
8877 the computation). Some targets can take advantage of this and
8878 generate more efficient code. For example, targets like Altivec,
8879 that support widen_mult using a sequence of {mult_even,mult_odd}
8880 generate the following vectors:
8881 vect1: [res1,res3,res5,res7],
8882 vect2: [res2,res4,res6,res8].
8884 When vectorizing outer-loops, we execute the inner-loop sequentially
8885 (each vectorized inner-loop iteration contributes to VF outer-loop
8886 iterations in parallel). We therefore don't allow to change the
8887 order of the computation in the inner-loop during outer-loop
8889 /* TODO: Another case in which order doesn't *really* matter is when we
8890 widen and then contract again, e.g. (short)((int)x * y >> 8).
8891 Normally, pack_trunc performs an even/odd permute, whereas the
8892 repack from an even/odd expansion would be an interleave, which
8893 would be significantly simpler for e.g. AVX2. */
8894 /* In any case, in order to avoid duplicating the code below, recurse
8895 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8896 are properly set up for the caller. If we fail, we'll continue with
8897 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8899 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8900 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8901 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8902 stmt
, vectype_out
, vectype_in
,
8903 code1
, code2
, multi_step_cvt
,
8906 /* Elements in a vector with vect_used_by_reduction property cannot
8907 be reordered if the use chain with this property does not have the
8908 same operation. One such an example is s += a * b, where elements
8909 in a and b cannot be reordered. Here we check if the vector defined
8910 by STMT is only directly used in the reduction statement. */
8911 tree lhs
= gimple_assign_lhs (stmt
);
8912 use_operand_p dummy
;
8914 stmt_vec_info use_stmt_info
= NULL
;
8915 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8916 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8917 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8920 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8921 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8934 case VEC_WIDEN_MULT_EVEN_EXPR
:
8935 /* Support the recursion induced just above. */
8936 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8937 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8940 case WIDEN_LSHIFT_EXPR
:
8941 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8942 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8946 c1
= VEC_UNPACK_LO_EXPR
;
8947 c2
= VEC_UNPACK_HI_EXPR
;
8951 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8952 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8955 case FIX_TRUNC_EXPR
:
8956 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8957 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8958 computing the operation. */
8965 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8968 if (code
== FIX_TRUNC_EXPR
)
8970 /* The signedness is determined from output operand. */
8971 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8972 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8976 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8977 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8980 if (!optab1
|| !optab2
)
8983 vec_mode
= TYPE_MODE (vectype
);
8984 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8985 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8991 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8992 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8993 /* For scalar masks we may have different boolean
8994 vector types having the same QImode. Thus we
8995 add additional check for elements number. */
8996 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
8997 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
8998 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9000 /* Check if it's a multi-step conversion that can be done using intermediate
9003 prev_type
= vectype
;
9004 prev_mode
= vec_mode
;
9006 if (!CONVERT_EXPR_CODE_P (code
))
9009 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9010 intermediate steps in promotion sequence. We try
9011 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9013 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9014 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9016 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9017 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9020 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9021 current_vector_size
);
9022 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9027 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9028 TYPE_UNSIGNED (prev_type
));
9030 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9031 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9033 if (!optab3
|| !optab4
9034 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9035 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9036 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9037 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9038 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9039 == CODE_FOR_nothing
)
9040 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9041 == CODE_FOR_nothing
))
9044 interm_types
->quick_push (intermediate_type
);
9045 (*multi_step_cvt
)++;
9047 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9048 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9049 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9050 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9051 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9053 prev_type
= intermediate_type
;
9054 prev_mode
= intermediate_mode
;
9057 interm_types
->release ();
9062 /* Function supportable_narrowing_operation
9064 Check whether an operation represented by the code CODE is a
9065 narrowing operation that is supported by the target platform in
9066 vector form (i.e., when operating on arguments of type VECTYPE_IN
9067 and producing a result of type VECTYPE_OUT).
9069 Narrowing operations we currently support are NOP (CONVERT) and
9070 FIX_TRUNC. This function checks if these operations are supported by
9071 the target platform directly via vector tree-codes.
9074 - CODE1 is the code of a vector operation to be used when
9075 vectorizing the operation, if available.
9076 - MULTI_STEP_CVT determines the number of required intermediate steps in
9077 case of multi-step conversion (like int->short->char - in that case
9078 MULTI_STEP_CVT will be 1).
9079 - INTERM_TYPES contains the intermediate type required to perform the
9080 narrowing operation (short in the above example). */
9083 supportable_narrowing_operation (enum tree_code code
,
9084 tree vectype_out
, tree vectype_in
,
9085 enum tree_code
*code1
, int *multi_step_cvt
,
9086 vec
<tree
> *interm_types
)
9088 machine_mode vec_mode
;
9089 enum insn_code icode1
;
9090 optab optab1
, interm_optab
;
9091 tree vectype
= vectype_in
;
9092 tree narrow_vectype
= vectype_out
;
9094 tree intermediate_type
, prev_type
;
9095 machine_mode intermediate_mode
, prev_mode
;
9099 *multi_step_cvt
= 0;
9103 c1
= VEC_PACK_TRUNC_EXPR
;
9106 case FIX_TRUNC_EXPR
:
9107 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9111 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9112 tree code and optabs used for computing the operation. */
9119 if (code
== FIX_TRUNC_EXPR
)
9120 /* The signedness is determined from output operand. */
9121 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9123 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9128 vec_mode
= TYPE_MODE (vectype
);
9129 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9134 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9135 /* For scalar masks we may have different boolean
9136 vector types having the same QImode. Thus we
9137 add additional check for elements number. */
9138 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9139 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9140 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9142 /* Check if it's a multi-step conversion that can be done using intermediate
9144 prev_mode
= vec_mode
;
9145 prev_type
= vectype
;
9146 if (code
== FIX_TRUNC_EXPR
)
9147 uns
= TYPE_UNSIGNED (vectype_out
);
9149 uns
= TYPE_UNSIGNED (vectype
);
9151 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9152 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9153 costly than signed. */
9154 if (code
== FIX_TRUNC_EXPR
&& uns
)
9156 enum insn_code icode2
;
9159 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9161 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9162 if (interm_optab
!= unknown_optab
9163 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9164 && insn_data
[icode1
].operand
[0].mode
9165 == insn_data
[icode2
].operand
[0].mode
)
9168 optab1
= interm_optab
;
9173 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9174 intermediate steps in promotion sequence. We try
9175 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9176 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9177 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9179 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9180 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9183 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9184 current_vector_size
);
9185 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9190 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9192 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9195 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9196 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9197 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9198 == CODE_FOR_nothing
))
9201 interm_types
->quick_push (intermediate_type
);
9202 (*multi_step_cvt
)++;
9204 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9205 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9206 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9207 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9209 prev_mode
= intermediate_mode
;
9210 prev_type
= intermediate_type
;
9211 optab1
= interm_optab
;
9214 interm_types
->release ();